Repository: attic-labs/noms Branch: master Commit: e5fa29d95e8b Files: 339 Total size: 2.4 MB Directory structure: gitextract_tbm5d7az/ ├── .dockerignore ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── Dockerfile ├── HACKING.md ├── LICENSE ├── README.md ├── cmd/ │ └── util/ │ └── kingpin_command.go ├── codecov.yml ├── doc/ │ ├── cli-tour.md │ ├── decent/ │ │ ├── about.md │ │ ├── architectures.md │ │ ├── demo-ipfs-chat.md │ │ ├── demo-p2p-chat.md │ │ └── quickstart.md │ ├── faq.md │ ├── go-tour.md │ ├── intro.md │ └── spelling.md ├── go/ │ ├── chunks/ │ │ ├── chunk.go │ │ ├── chunk_serializer.go │ │ ├── chunk_serializer_test.go │ │ ├── chunk_store.go │ │ ├── chunk_store_common_test.go │ │ ├── chunk_test.go │ │ ├── memory_store.go │ │ ├── memory_store_test.go │ │ ├── put_cache.go │ │ ├── remote_requests.go │ │ ├── remote_requests_test.go │ │ └── test_utils.go │ ├── config/ │ │ ├── config.go │ │ ├── config_test.go │ │ ├── resolver.go │ │ └── resolver_test.go │ ├── constants/ │ │ ├── http.go │ │ └── version.go │ ├── d/ │ │ ├── check_error.go │ │ ├── try.go │ │ └── try_test.go │ ├── datas/ │ │ ├── commit.go │ │ ├── commit_options.go │ │ ├── commit_test.go │ │ ├── database.go │ │ ├── database_common.go │ │ ├── database_server.go │ │ ├── database_test.go │ │ ├── dataset.go │ │ ├── dataset_test.go │ │ ├── http_chunk_store.go │ │ ├── http_chunk_store_test.go │ │ ├── pull.go │ │ ├── pull_test.go │ │ ├── pulling.md │ │ ├── remote_database_handlers.go │ │ ├── remote_database_handlers_test.go │ │ ├── serialize_hashes.go │ │ └── serialize_hashes_test.go │ ├── diff/ │ │ ├── apply_patch.go │ │ ├── apply_patch_test.go │ │ ├── diff.go │ │ ├── diff_test.go │ │ ├── patch.go │ │ ├── patch_test.go │ │ ├── print_diff.go │ │ └── summary.go │ ├── hash/ │ │ ├── base32.go │ │ ├── base32_test.go │ │ ├── hash.go │ │ ├── hash_slice.go │ │ ├── hash_slice_test.go │ │ └── hash_test.go │ ├── marshal/ │ │ ├── decode.go │ │ ├── decode_test.go │ │ ├── encode.go │ │ ├── encode_test.go │ │ ├── encode_type.go │ │ └── encode_type_test.go │ ├── merge/ │ │ ├── candidate.go │ │ ├── three_way.go │ │ ├── three_way_keyval_test.go │ │ ├── three_way_list.go │ │ ├── three_way_list_test.go │ │ ├── three_way_ordered_sequence.go │ │ ├── three_way_set_test.go │ │ └── three_way_test.go │ ├── metrics/ │ │ ├── histogram.go │ │ └── histogram_test.go │ ├── nbs/ │ │ ├── NBS-on-AWS.md │ │ ├── README.md │ │ ├── aws_chunk_source.go │ │ ├── aws_chunk_source_test.go │ │ ├── aws_table_persister.go │ │ ├── aws_table_persister_test.go │ │ ├── block_store_test.go │ │ ├── cache.go │ │ ├── conjoiner.go │ │ ├── conjoiner_test.go │ │ ├── dynamo_fake_test.go │ │ ├── dynamo_manifest.go │ │ ├── dynamo_manifest_test.go │ │ ├── dynamo_table_reader.go │ │ ├── dynamo_table_reader_test.go │ │ ├── factory.go │ │ ├── factory_test.go │ │ ├── fd_cache.go │ │ ├── fd_cache_test.go │ │ ├── file_manifest.go │ │ ├── file_manifest_test.go │ │ ├── file_table_persister.go │ │ ├── file_table_persister_test.go │ │ ├── fs_table_cache.go │ │ ├── fs_table_cache_test.go │ │ ├── manifest.go │ │ ├── manifest_cache.go │ │ ├── manifest_cache_test.go │ │ ├── mem_table.go │ │ ├── mem_table_test.go │ │ ├── mmap_table_reader.go │ │ ├── mmap_table_reader_test.go │ │ ├── persisting_chunk_source.go │ │ ├── persisting_chunk_source_test.go │ │ ├── root_tracker_test.go │ │ ├── s3_fake_test.go │ │ ├── s3_table_reader.go │ │ ├── s3_table_reader_test.go │ │ ├── stats.go │ │ ├── stats_test.go │ │ ├── store.go │ │ ├── table.go │ │ ├── table_persister.go │ │ ├── table_persister_test.go │ │ ├── table_reader.go │ │ ├── table_set.go │ │ ├── table_set_test.go │ │ ├── table_test.go │ │ ├── table_writer.go │ │ └── test/ │ │ └── manifest_clobber.go │ ├── ngql/ │ │ ├── README.md │ │ ├── query.go │ │ ├── query_test.go │ │ └── types.go │ ├── nomdl/ │ │ ├── lexer.go │ │ ├── parser.go │ │ └── parser_test.go │ ├── perf/ │ │ ├── hash-perf-rig/ │ │ │ ├── README.md │ │ │ └── main.go │ │ └── suite/ │ │ ├── suite.go │ │ └── suite_test.go │ ├── sloppy/ │ │ ├── sloppy.go │ │ └── sloppy_test.go │ ├── spec/ │ │ ├── absolute_path.go │ │ ├── absolute_path_test.go │ │ ├── commit_meta.go │ │ ├── commit_meta_test.go │ │ ├── spec.go │ │ ├── spec_test.go │ │ └── util.go │ ├── types/ │ │ ├── blob.go │ │ ├── blob_editor.go │ │ ├── blob_editor_test.go │ │ ├── blob_leaf_sequence.go │ │ ├── blob_test.go │ │ ├── bool.go │ │ ├── codec.go │ │ ├── codec_test.go │ │ ├── collection.go │ │ ├── collection_test.go │ │ ├── common_supertype.go │ │ ├── common_supertype_test.go │ │ ├── compare_test.go │ │ ├── edit_distance.go │ │ ├── edit_distance_test.go │ │ ├── encode_human_readable.go │ │ ├── encode_human_readable_test.go │ │ ├── encoding_test.go │ │ ├── equals_test.go │ │ ├── get_hash.go │ │ ├── graph_builder.go │ │ ├── graph_builder_test.go │ │ ├── incremental_test.go │ │ ├── indexed_sequence_diff.go │ │ ├── indexed_sequences.go │ │ ├── leaf_sequence.go │ │ ├── less.go │ │ ├── list.go │ │ ├── list_editor.go │ │ ├── list_editor_test.go │ │ ├── list_iterator.go │ │ ├── list_iterator_test.go │ │ ├── list_leaf_sequence.go │ │ ├── list_test.go │ │ ├── make_type.go │ │ ├── map.go │ │ ├── map_editor.go │ │ ├── map_iterator.go │ │ ├── map_iterator_test.go │ │ ├── map_leaf_sequence.go │ │ ├── map_test.go │ │ ├── meta_sequence.go │ │ ├── noms_kind.go │ │ ├── number.go │ │ ├── number_util.go │ │ ├── opcache.go │ │ ├── opcache_compare.go │ │ ├── opcache_test.go │ │ ├── ordered_sequences.go │ │ ├── ordered_sequences_diff.go │ │ ├── ordered_sequences_diff_test.go │ │ ├── path.go │ │ ├── path_test.go │ │ ├── perf/ │ │ │ ├── dummy.go │ │ │ └── perf_test.go │ │ ├── primitives_test.go │ │ ├── ref.go │ │ ├── ref_heap.go │ │ ├── ref_heap_test.go │ │ ├── ref_test.go │ │ ├── rolling_value_hasher.go │ │ ├── rungen.go │ │ ├── sequence.go │ │ ├── sequence_chunker.go │ │ ├── sequence_concat.go │ │ ├── sequence_cursor.go │ │ ├── sequence_cursor_test.go │ │ ├── set.go │ │ ├── set_editor.go │ │ ├── set_iterator.go │ │ ├── set_iterator_test.go │ │ ├── set_leaf_sequence.go │ │ ├── set_test.go │ │ ├── simplify.go │ │ ├── simplify_test.go │ │ ├── string.go │ │ ├── string_test.go │ │ ├── struct.go │ │ ├── struct_test.go │ │ ├── subtype.go │ │ ├── subtype_test.go │ │ ├── type.go │ │ ├── type_desc.go │ │ ├── type_test.go │ │ ├── util_test.go │ │ ├── validate_type.go │ │ ├── validating_decoder.go │ │ ├── validating_decoder_test.go │ │ ├── value.go │ │ ├── value_decoder.go │ │ ├── value_stats.go │ │ ├── value_store.go │ │ ├── value_store_test.go │ │ ├── walk.go │ │ ├── walk_refs.go │ │ ├── walk_refs_test.go │ │ └── walk_test.go │ └── util/ │ ├── clienttest/ │ │ └── client_test_suite.go │ ├── datetime/ │ │ ├── date_time.go │ │ └── date_time_test.go │ ├── exit/ │ │ └── exit.go │ ├── functions/ │ │ ├── all.go │ │ └── all_test.go │ ├── json/ │ │ ├── from_json.go │ │ ├── from_json_test.go │ │ ├── to_json.go │ │ └── to_json_test.go │ ├── math/ │ │ └── minmax.go │ ├── outputpager/ │ │ └── page_output.go │ ├── profile/ │ │ └── profile.go │ ├── progressreader/ │ │ └── reader.go │ ├── random/ │ │ ├── id.go │ │ └── id_test.go │ ├── sizecache/ │ │ ├── size_cache.go │ │ └── size_cache_test.go │ ├── status/ │ │ └── status.go │ ├── test/ │ │ └── equals_ignore_hashes.go │ ├── verbose/ │ │ └── verbose.go │ └── writers/ │ ├── max_line_writer.go │ ├── prefix_writer.go │ └── writers_test.go ├── go.mod ├── go.sum ├── release.sh ├── samples/ │ ├── cli/ │ │ └── nomsconfig/ │ │ └── README.md │ └── go/ │ ├── csv/ │ │ ├── README.md │ │ ├── common.go │ │ ├── csv_reader.go │ │ ├── csv_reader_test.go │ │ ├── kind_slice.go │ │ ├── kind_slice_test.go │ │ ├── read.go │ │ ├── read_test.go │ │ ├── schema.go │ │ ├── schema_test.go │ │ ├── write.go │ │ └── write_test.go │ ├── decent/ │ │ ├── README.md │ │ ├── data/ │ │ │ ├── godfather.html │ │ │ ├── godfather2.html │ │ │ └── godfather3.html │ │ ├── dbg/ │ │ │ └── debug.go │ │ ├── ipfs-chat/ │ │ │ └── main.go │ │ ├── lib/ │ │ │ ├── datapager.go │ │ │ ├── event.go │ │ │ ├── importer.go │ │ │ ├── logger.go │ │ │ ├── model.go │ │ │ ├── model_test.go │ │ │ ├── pubsub.go │ │ │ ├── term_index.go │ │ │ ├── term_index_test.go │ │ │ └── termui.go │ │ └── p2p-chat/ │ │ ├── README.md │ │ └── main.go │ └── nomdex/ │ ├── Readme.md │ ├── expr.go │ ├── nomdex.go │ ├── nomdex_find.go │ ├── nomdex_test.go │ ├── nomdex_update.go │ ├── parser.go │ ├── parser_test.go │ ├── query_range.go │ └── query_range_test.go └── tools/ ├── file/ │ ├── file.go │ └── file_test.go ├── licensify.py ├── loadtest/ │ └── loadtest.go ├── noms/ │ ├── README.md │ ├── __init__.py │ ├── copy.py │ ├── copy_test.py │ ├── pushd.py │ ├── staging.py │ ├── staging_test.py │ ├── symlink.py │ └── symlink_test.py └── runner/ ├── serial.go └── serial_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ .git doc codecov.yml CONTRIBUTING.md LICENSE README.md samples ================================================ FILE: .gitignore ================================================ noms.iml *.pyc *.swp .vscode .idea .noms .nomsconfig .DS_Store node_modules ================================================ FILE: .travis.yml ================================================ language: go go: - 1.x os: - linux env: - GO111MODULE=on before_install: # gox simplifies building for multiple architectures - go get github.com/mitchellh/gox script: - go build ./... - go test ./... - go vet ./... - mkdir linux - mkdir darwin - gox -os="linux darwin" -arch="amd64" -output="{{.OS}}/{{.Dir}}" ./cmd/noms - gox -os="linux darwin" -arch="amd64" -output="{{.OS}}/{{.Dir}}" ./samples/go/csv/csv-import - gox -os="linux darwin" -arch="amd64" -output="{{.OS}}/{{.Dir}}" ./samples/go/csv/csv-export - gox -os="linux darwin" -arch="amd64" -output="{{.OS}}/{{.Dir}}" ./samples/go/xml-import - mv darwin osx - zip -r linux linux - zip -r osx osx deploy: overwrite: true provider: releases skip_cleanup: true api_key: secure: "N2LCdQDlPquU31TK8WZwlYPRT7SSyfsGPBpNPSp5gpJPtF5hlqLf96Fd1R7SYn/LfTcri8baFMxgPVK4FowAzIsTxwkG57vCnJR24atOFVLkaKzVPdQZ30zXDHq2WO1zYw7KzAZq49YWdzwKSShzT7+SpiNZWEE2UiB5ZSQcd7/fii1TUkphzWPeHCB+d9wf1qUyJmm6HQ3PKe9yYRQHczGin6INUV5o+nzlRws2+5Kj7eg519htLgRY0oloncY0fdwTEwbSTTkkja3eoAWQrdPMJH7mDMwpbdgPl3jW8wDdTPHO5mQHRF4GvJHrY18qMJ9Kf8iQ3bdRtIS5XM8kvo8+Le22XQbYH7Q7Ryj/bJN+71KpVLwqWQhOr3fWRrL7r8DDPAG/myw0SK1uMaXCzT3KiYckJv7Q3el9MkHNblvFNxWC4tIrwE0LtP4hbSiIlZ/MV58yJxU8WXVej9AoFnKHLA7hgJUhHy0EIlfeETalDBrqNrh40iNP0maUrhpNJxLGtpOgAUhrdQ3gd//6pWwejkfvMTQ2b+1Qq11wWsSmRI/U1WGbcO/wzjKgVv2PT2sYPgx7TPwPWje5uFTZ4/sehwGG/LDcvuZ5uBXLRcpFIz9oh31nIFzsxhdatSKaaK4zlMzkxec+xqBGe0SVKeL/rW0MeQUbeSqyAf0wtBQ=" file: - linux.zip - osx.zip on: repo: attic-labs/noms tags: true ================================================ FILE: CONTRIBUTING.md ================================================ Contributing to Noms ==================== ## Install Go First setup Go on your machine per https://golang.org/doc/install. You need *at least* Go version 1.11. You can test that you're setup correctly like so: ```shell # Must be at least 1.11 go version ``` ## Get and build Noms Noms uses [Go modules](https://github.com/golang/go/wiki/Modules) a new feature of Go. Therefore if you're an existing Go user, you need to be careful to check out into a directory **other than $GOPATH** (or else use the environment variable GO11MODULES=on to force it on). Hopefully this gets easier to understand in future Go versions when Go modules become stabilized. ```shell cd git clone https://github.com/attic-labs/noms cd noms go install ./cmd/noms go test ./... ``` ## License Noms is open source software, licensed under the [Apache License, Version 2.0](LICENSE). ## Contributing code Due to legal reasons, all contributors must sign a contributor agreement, either for an [individual](https://attic-labs.github.io/ca/individual.html) or [corporation](https://attic-labs.github.io/ca/corporation.html), before a pull request can be accepted. ## Languages * Use Go, JS, or Python. * Shell script is not allowed. ## Coding style * Go uses `gofmt`, advisable to hook into your editor * JS follows the [Airbnb Style Guide](https://github.com/airbnb/javascript) * Tag PRs with either `toward: #` or `fixes: #` to help establish context for why a change is happening * Commit messages follow [Chris Beam's awesome commit message style guide](http://chris.beams.io/posts/git-commit/) ### Go error reporting In general, for Public API in Noms, we use the Go-style of returning errors by default. For non-exposed code, we do provide, and use, some wrappers to do Exception-style error handling. There *must* be an overriding rationale for using this style, however. One reason to use the Exception-style is that the current code doesn't know how to proceed and needs to panic, but you want to signal that a calling function somewhere up the stack might be able to recover from the failure and continue. For these cases, please use the following family of functions to 'raise' a 'catchable' error (see [go/d/try.go](https://godoc.org/github.com/attic-labs/noms/go/d)): * d.PanicIfError() * d.PanicIfTrue() * d.PanicIfFalse() You might see some old code that uses functions that seem similar starting with `d.Chk`, however we are going to remove those and don't want to use them for new code. See #3258 for details. ## Submitting PRs We follow a code review protocol dervied from the one that the [Chromium team](https://www.chromium.org/) uses: 1. Create a GitHub fork of the repo you want to modify (e.g., fork `https://github.com/attic-labs/noms` to `https://github.com//noms`). 2. Add your own fork as a remote to your github repo: `git remote add https://github.com//noms`. 3. Push your changes to a branch at your fork: `git push ` 4. Create a PR using the branch you just created. Usually you can do this by just navigating to https://github.com/attic-labs/noms in a browser - GitHub recognizes the new branch and offers to create a PR for you. 5. When you're ready for review, make a comment in the issue asking for a review. Sometimes people won't review until you do this because we're not sure if you think the PR is ready for review. 6. Iterate with your reviewer using the normal Github review flow. 7. Once the reviewer is happy with the changes, they will submit them. ## Running the tests You can use `go test` command, e.g: * `go test $(go list ./... | grep -v /vendor/)` should run every test except from vendor packages. If you have commit rights, Jenkins automatically runs the Go tests on every PR, then every subsequent patch. To ask Jenkins to immediately run, any committer can reply (no quotes) "Jenkins: test this" to your PR. ### Perf tests By default, neither `go test` nor Jenkins run the perf tests, because they take a while. To run the tests yourself, use the `-perf` and `-v` flag to `go test`, e.g.: * `go test -v ./samples/go/csv/... -perf mem` See https://godoc.org/github.com/attic-labs/noms/go/perf/suite for full documentation and flags. To ask Jenkins to run the perf tests for you, reply (no quotes) "Jenkins: perf this" to your PR. Your results will be viewable at http://perf.noms.io/?ds=http://demo.noms.io/perf::pr_$your-pull-request-number/csv-import. Again, only a committer can do this. ================================================ FILE: Dockerfile ================================================ FROM golang:latest AS build ENV NOMS_SRC=$GOPATH/src/github.com/attic-labs/noms ENV CGO_ENABLED=1 ENV GOOS=linux ENV DOCKER=1 RUN mkdir -pv $NOMS_SRC COPY . ${NOMS_SRC} RUN go test github.com/attic-labs/noms/... RUN go install -v github.com/attic-labs/noms/cmd/noms RUN cp $GOPATH/bin/noms /bin/noms FROM alpine:latest COPY --from=build /bin/noms /bin/noms VOLUME /data EXPOSE 8000 ENTRYPOINT [ "noms" ] CMD ["serve", "/data"] ================================================ FILE: HACKING.md ================================================ # Prerequisites * [Go 1.13 or later](https://golang.org/dl/) * Mac or Linux (Noms isn't currently supported on Windows) # Get ``` git clone https://github.com/attic-labs/noms ``` # Build ``` cd noms go build ./cmd/noms ``` # Test ``` cd noms go test ./go/... go test ./cmd/... ``` # Release Travis automatically creates releases for tagged versions, so the following should do it: ``` git tag latest -f git push origin latest ``` ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Warning - This project is not active Noms is not being maintained. You shouldn't use it, except maybe for fun or research. If you are interested in something like Noms, you probably want Dolt (https://github.com/dolthub/dolt) which is a fork of this project and actively maintained. Send me (aaron at aaronboodman.com) a message if you have questions.
[Use Cases](#use-cases)  |  [Setup](#setup)  |  [Status](#status)  |  [Documentation](./doc/intro.md)  |  [Contact](#contact-us)

[![Build Status](https://travis-ci.org/attic-labs/noms.svg?branch=master)](https://travis-ci.org/attic-labs/noms) [![Docker Build Status](https://img.shields.io/docker/build/noms/noms.svg)](https://hub.docker.com/r/noms/noms/) [![GoDoc](https://godoc.org/github.com/attic-labs/noms?status.svg)](https://godoc.org/github.com/attic-labs/noms) # Welcome *Noms* is a decentralized database philosophically descendant from the Git version control system. Like Git, Noms is: * **Versioned:** By default, all previous versions of the database are retained. You can trivially track how the database evolved to its current state, easily and efficiently compare any two versions, or even rewind and branch from any previous version. * **Synchronizable:** Instances of a single Noms database can be disconnected from each other for any amount of time, then later reconcile their changes efficiently and correctly. Unlike Git, Noms is a database, so it also: * Primarily **stores structured data**, not files and directories (see: [the Noms type system](https://github.com/attic-labs/noms/blob/master/doc/intro.md#types)) * **Scales well** to large amounts of data and concurrent clients * Supports **atomic transactions** (a single instance of Noms is CP, but Noms is typically run in production backed by S3, in which case it is "[effectively CA](https://cloud.google.com/spanner/docs/whitepapers/SpannerAndCap.pdf)") * Supports **efficient indexes** (see: [Noms prolly-trees](https://github.com/attic-labs/noms/blob/master/doc/intro.md#prolly-trees-probabilistic-b-trees)) * Features a **flexible query model** (see: [GraphQL](./go/ngql/README.md)) A Noms database can reside within a file system or in the cloud: * The (built-in) [NBS](./go/nbs) `ChunkStore` implementation provides two back-ends which provide persistence for Noms databases: one for storage in a file system and one for storage in an S3 bucket. Finally, because Noms is content-addressed, it yields a very pleasant programming model. Working with Noms is ***declarative***. You don't `INSERT` new data, `UPDATE` existing data, or `DELETE` old data. You simply *declare* what the data ought to be right now. If you commit the same data twice, it will be deduplicated because of content-addressing. If you commit _almost_ the same data, only the part that is different will be written.
## Use Cases #### [Decentralization](./doc/decent/about.md) Because Noms is very good at sync, it makes a decent basis for rich, collaborative, fully-decentralized applications. #### Mobile Offline-First Database Embed Noms into mobile applications, making it easier to build offline-first, fully synchronizing mobile applications.
## Install 1. Download the latest release: - [**Linux**](https://github.com/attic-labs/noms/releases/download/latest/linux.zip) - [**Mac OS**](https://github.com/attic-labs/noms/releases/download/latest/osx.zip) 2. Unzip the directory somewhere and add it to your `$PATH` 3. Verify Noms is installed correctly: ``` $ noms version format version: 7.18 built from ```
## Run Import some data: ```shell go install github.com/attic-labs/noms/samples/go/csv/csv-import curl 'https://data.cityofnewyork.us/api/views/kku6-nxdu/rows.csv?accessType=DOWNLOAD' > /tmp/data.csv csv-import /tmp/data.csv /tmp/noms::nycdemo ``` Explore: ```shell noms show /tmp/noms::nycdemo ``` Should show: ```go struct Commit { meta: struct Meta { date: "2017-09-19T19:33:01Z", inputFile: "/tmp/data.csv", }, parents: set {}, value: [ // 236 items struct Row { countAmericanIndian: "0", countAsianNonHispanic: "3", countBlackNonHispanic: "21", countCitizenStatusTotal: "44", countCitizenStatusUnknown: "0", countEthnicityTotal: "44", ... ```
## Status Nobody is working on this right now. You shouldn't rely on it unless you're willing to take over development yourself. ### Major Open Issues These are the major things you'd probably want to fix before relying on this for most systems. * Sync performance with long commit chains (https://github.com/attic-labs/noms/issues/2233) * Migration (https://github.com/attic-labs/noms/issues/3363) * Garbage Collection (https://github.com/attic-labs/noms/issues/3374) * Query language * We started trying to hack in GraphQL but it's incomplete and maybe not the right thing. See: [ngql](./go/ngql/README.md) * [Various other smaller bugs and improvements](https://github.com/attic-labs/noms/issues?q=is%3Aissue+is%3Aopen+label%3AP0)
## Learn More About Noms For the decentralized web: [The Decentralized Database](doc/decent/about.md) Learn the basics: [Technical Overview](doc/intro.md) Tour the CLI: [Command-Line Interface Tour](doc/cli-tour.md) Tour the Go API: [Go SDK Tour](doc/go-tour.md)
## Contact Us Interested in using Noms? Awesome! We would be happy to work with you to help understand whether Noms is a fit for your problem. Reach out at: - [Mailing List](https://groups.google.com/forum/#!forum/nomsdb) - [Twitter](https://twitter.com/nomsdb) ## Licensing Noms is open source software, licensed by Attic Labs, Inc. under the Apache License, Version 2.0. ================================================ FILE: cmd/util/kingpin_command.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package util import "github.com/attic-labs/kingpin" type KingpinHandler func(input string) (exitCode int) type KingpinCommand func(*kingpin.Application) (*kingpin.CmdClause, KingpinHandler) ================================================ FILE: codecov.yml ================================================ codecov: branch: master bot: "mikegray" ci: - "jenkins3.noms.io" coverage: precision: 2 # how many decimal places to display in the UI: 0 <= value <= 4 round: down # how coverage is rounded: down/up/nearest range: 70...100 # custom range of coverage colors from red -> yellow -> green notify: slack: default: url: "secret:n+BYhIXTXsaCiMKB3vOf6yP68ytdKd3WpXtJFWPEUsEWXDiGnU5dTB5DO2yv8tR0COdxvs7K31hVpEfHEXdoXOaQhUw3FKf3fh8KZDLN7CGTbeDhw1uNGGyBr2d2TWnopzYtcXomdwMmuckARtiWQx0YXJiZY9YyCrIoDK9HIJQ=" branches: null threshold: 5.0 attachments: "tree, diff" status: project: default: enabled: yes target: auto branches: null threshold: null if_no_uploads: error if_not_found: success if_ci_failed: error patch: default: enabled: yes target: auto branches: null threshold: null if_no_uploads: error if_not_found: success if_ci_failed: error changes: default: enabled: yes branches: null if_no_uploads: error if_not_found: success if_ci_failed: error comment: layout: "tree" branches: null behavior: default ================================================ FILE: doc/cli-tour.md ================================================ [Home](../README.md) » [Technical Overview](intro.md)  |  [Use Cases](../README.md#use-cases)  |  **Command-Line Interface**  |  [Go bindings Tour](go-tour.md) |  [Path Syntax](spelling.md)  |  [FAQ](faq.md) 

# A Short Tour of the Noms CLI This is a quick introduction to the Noms command-line interface. It should only take a few minutes to read. There's also a screencast covering some of this if you prefer: [](https://www.youtube.com/watch?v=ncITL4xdXh4) ## Install Noms ... if you haven't already. Follow the instructions [here](https://github.com/attic-labs/noms#setup). ## The `noms` command Now you should be able to run `noms`: ```shell > noms Noms is a tool for goofing with Noms data. Usage: noms command [arguments] The commands are: diff Shows the difference between two objects ds Noms dataset management log Displays the history of a Noms dataset serve Serves a Noms database over HTTP show Shows a serialization of a Noms object sync Moves datasets between or within databases version Display noms version Use "noms help [command]" for more information about a command. ``` Without any arguments, `noms` lists out all available commands. To get information on a specific command, we can use `noms help [command]`: ```shell > noms help sync usage: noms sync [options] See Spelling Objects at https://github.com/attic-labs/noms/blob/master/doc/spelling.md for details on the object and dataset arguments. ... ``` ## noms ds The `noms ds` command lists the _datasets_ within a particular database: ```shell > noms ds http://demo.noms.io ... sf-film-locations/raw sf-film-locations ... ``` ## noms log Noms datasets are versioned. You can see the history with `log`: ```shell > !? noms log http://demo.noms.io::sf-film-locations commit aprsmg0j2eegk8eehbgj7cd3tmmd1be8 Parent: None Date: "2017-09-19T21:42:46Z" InputPath: "http://localhost:8000::#dksek6tuf8ens06bi4culq85tfp5q4cg.value" ... ``` Note that Noms is a typed system. What is being shown here for each entry is not text, but a serialization of the diff between two datasets. ## noms show You can see the entire serialization of any object in the database with `noms show`: ```shell > noms show 'http://demo.noms.io::#aprsmg0j2eegk8eehbgj7cd3tmmd1be8' struct Commit { meta: struct {}, parents: Set>>, value: List, }({ meta: Meta { date: "2016-07-25T18:34:00+0000", inputPath: "http://localhost:8000::sf-film-locations/raw.value", }, parents: { c506ta03786j48a07he83ju669u78qa2, }, value: [ // 1,241 items Row { Actor1: "Siddarth", ... ``` ## noms sync You can work with Noms databases that are remote exactly the same as you work with local databases. But it's frequently useful to move data to a local machine, for example, to make a private fork or to work with the data disconnected from the source database. Moving data in Noms is done with the `sync` command. Note that unlike Git, we do not make a distinction between _push_ and _pull_. It's the same operation in both directions: ```shell > noms sync http://demo.noms.io::sf-film-locations /tmp/noms::films > noms ds /tmp/noms films ``` We can now make an edit locally: ```shell > go install github.com/attic-labs/noms/samples/go/csv/... > csv-export /tmp/noms::films > /tmp/film-locations.csv ``` open /tmp/film-location.csv and edit it, then: ```shell > csv-import --column-types=String,String,String,String,String,String,String,String,Number,String,String \ /tmp/film-locations.csv /tmp/noms::films ``` ## noms diff The `noms diff` command can show you the differences between any two values. Let's see our change: ```shell > noms diff http://demo.noms.io::sf-film-locations /tmp/noms::films ./.meta { - "date": "2016-07-25T18:51:23+0000" + "date": "2016-07-25T22:51:14+0000" + "inputFile": "/tmp/film-locations.csv" - "inputPath": "http://demo.noms.io::sf-film-locations/raw.value" ./.parents { - pckdvpvr9br1fie6c3pjudrlthe7na18 + q4jcc2i7kntkjiipvjgpr5r02ldroj0g } ./.value[0] { - "Locations": "Epic Roasthouse (399 Embarcadero)" + "Locations": "Epic Roadhouse (399 Embarcadero)" ``` ================================================ FILE: doc/decent/about.md ================================================ [Home](../../README.md) » [Use Cases](../../README.md#use-cases) » **Decentralized** » **About**  |  [Quickstart](quickstart.md)  |  [Architectures](architectures.md)  |  [P2P Chat Demo](demo-p2p-chat.md)  |  [IPFS Chat Demo](demo-ipfs-chat.md)

# Noms — The Decentralized Database [Noms](http://noms.io) makes it ~~easy~~ tractable to create rich, multiuser, collaborative, fully-decentralized applications. Like most databases, Noms features a rich data model, atomic transactions, support for large-scale data, and efficient searches, scans, reads, and updates. Unlike any other database, Noms has built-in multiparty sync and conflict resolution. This feature makes Noms a very good fit for P2P decentralized applications. Any number of dapp peers in a P2P network can concurrently modify the same logical Noms database, and continuously and efficiently sync their changes with each other. All peers will converge to the same state. For many applications, peers can store an entire local copy of the data they are interested in. For larger applications, it should be possible to back Noms by a decentralized blockstore like IPFS, Swarm, or Sia (or in the future, Filecoin), and store large-scale data in a completely decentralized way, without replicating it on every node. Noms also has a blockstore for S3, which is ideal for applications that have some centralized components. **We'd love to talk to you about the possibility of using noms in your project** so please don't hestitate to contact us at [noms@attic.io](mailto:noms@attic.io). ## How it Works Think of Noms like a programmable Git: changes are bundled as commits which reference previous states of the database. Apps pull changes from peers and merge them using a principled set of APIs and strategies. Except that rather than users manually pulling and merging, applications typically do this continuously, automatically converging to a shared state. Your application uses a [Go client library](https://github.com/attic-labs/noms/blob/master/doc/go-tour.md) to interact with Noms data. There is also a [command-line interface](https://github.com/attic-labs/noms/blob/master/doc/cli-tour.md) for working with data and initial support for a [GraphQL-based query language](https://github.com/attic-labs/noms/blob/master/go/ngql/README.md). Some additional features include: * **Versioning**: It’s easy to use, compare, or revert to older database versions * **Efficient diffs**: diffing even huge datasets is efficient due to noms’ use of a novel BTree-like data structure called a [Prolly Tree](https://github.com/attic-labs/noms/blob/master/doc/intro.md#prolly-trees-probabilistic-b-trees) * **Efficient storage**: data are chunked and content-addressable, so there is exactly one copy of each chunk in the database, shared by other data that reference it. Small changes to massive data structures always result in small operations. * **Verifiable**: The entire database rolls up to a single 20-byte hash that uniquely represents the database at that moment - anyone can verify that a particular database hashes to the same value Read the [Noms design overview](https://github.com/attic-labs/noms/blob/master/doc/decent/intro.md). ## Status For overall status of the database, see [Noms Status](../../README.md#status). For the decentralized use case in particular: we are fairly confident in this approach and are actively looking for partners to work with to build it out. - [x] Demonstrate core concept of using Noms to continuously sync across many users (Done! See noms-chat demos) - [ ] Demonstrate using libp2p or similar to traverse NATs - [ ] Investigate backing IPFS with Noms rather than the reverse - this should improve stability and dramatically improve local performance - [ ] Demonstrate using IPFS with a schema that permits nodes to disappear **_If you would like to use noms in your project we’d love to hear from you_**: drop us an email ([noms@attic.io](mailto:noms@attic.io)) or send us a message in slack ([slack.noms.io](http://slack.noms.io)). ================================================ FILE: doc/decent/architectures.md ================================================ [Home](../../README.md) » [Use Cases](../../README.md#use-cases) » **Decentralized** » [About](about.md)  |  [Quickstart](quickstart.md)  | **Architectures**  |  [P2P Chat Demo](demo-p2p-chat.md)  |  [IPFS Chat Demo](demo-ipfs-chat.md)

# Architectures There are many possible ways to use Noms as part of a decentralized application. Noms can naturally be mixed and matched with other decentralized tools like blockchains, IPFS, etc. This page lists a few approaches we find promising. ## Classic P2P Architecture Noms can be used to implement apps in a peer-to-peer configuration. Each instance of the application (i.e., each "node") maintains a database locally with the data that is relevant to it. When a node creates new data, it commits that data to it's database and broadcasts a message to it's peers that contains the hash of it's lastest commit. ![P2P Architecture](./p2p-arch.png) Peers that are listening for these message can decide if that data is relevant to them. Those that are interested can pull the new data from the publisher. The two clients efficiently communicate so that only data that isn't present in the requesting client is transmitted (much the same way that one git client sends source changes to another). Peers can use a flow similar to the following in order to sync changes with one another: ```nohighlight for { listen for new message if new msg is relevant { if new msg is ancestor of current commit { // nothing to do continue } pull new data from sender of msg if current head is ancestor of new msg { // fast forward to the new commit set head of dataset to new commit continue } merge new with current head and commit publish new commit } } ``` Noms has a default [merge policy](https://github.com/attic-labs/noms/blob/2d0e9e738370d49cc09e8fa6e290ceca1c3e2005/go/merge/three_way.go#L14) that covers many classes of concurrent operations. If the application restricts itself to only operations that are mergeable by this policy, then Noms can automatically merge all concurrent changes. In this case, the entire database is effectively a CRDT. If this is not sufficient, then applications can create their own merge policies, implementing whatever merge is appropriate for their use case. # Decentralized Chunkstore Architecture Another potential architecture for decentralized apps uses a decentralized chunkstore (such as IPFS, Swarm, or Sia) rather than local databases. In this case, rather than each node maintaining a local datastore, Noms chunks are stored in a decentralized chunkstore. The underlying chunkstore is responsible for making chunks available when needed. ![Decentralized Architecture](./dist-arch.png) The flow used by peers to sync with one another is similar to the peer-to-peer architecture. The main difference is data is not duplicated on local machines and doesn't have to be pulled during sync. Each app keeps track of it's latest commit in the chunk store. ```nohighlight for { listen for new message if new msg is relevant { if new msg is ancestor of current commit { // nothing to do continue } // No pull necessary if current head is ancestor of new msg { // fast forward to the new commit set head of dataset to new commit continue } merge new with current head and commit publish new commit } } ``` We have a prototype implementation of an IPFS-based chunkstore. If you are interested in pursuing this direction, let us know! ================================================ FILE: doc/decent/demo-ipfs-chat.md ================================================ [Home](../../README.md) » [Use Cases](../../README.md#use-cases) » **Decentralized** » [About](about.md)  |  [Quickstart](quickstart.md)  |  [Architectures](architectures.md)  |  [P2P Chat Demo](demo-p2p-chat.md)  |  **IPFS Chat Demo**

# Demo App: IPFS-based Decentralized Chat This sample app demonstrates backing a P2P noms app by a decentralized blockstore (in this case, IPFS). Data is pulled off the network dynamically as needed - each client doesn't need a complete copy. # Build and Run Demo app code is in the [ipfs-chat](https://github.com/attic-labs/noms/tree/master/samples/go/decent/ipfs-chat/) directory. To get it up and running take the following steps: * Use git to clone the noms repository onto your computer: ```shell go get github.com/attic-labs/noms/samples/go/decent/ipfs-chat ``` * From the noms/samples/go/decent/ipfs-chat directory, build the program with the following command: ```shell go build ``` * Run the ipfs-chat client with the following command: ```shell ./ipfs-chat client --username --node-idx=1 ipfs:/tmp/ipfs1::chat >& /tmp/err1 ``` * Run a second ipfs-chat client with the following command: ```shell ./ipfs-chat client --username --node-idx=2 ipfs:/tmp/ipfs2::chat >& /tmp/err2 ``` If desired, ipfs-chat can be run as a daemon which will replicate all chat content in a local store which will enable clients to go offline without causing data to become unavailable to other clients: ```shell ./ipfs-chat daemon --node-idx=3 ipfs:/tmp/ipfs3::chat ``` Note: the 'node-idx' argument ensures that each IPFS-based program uses a distinct set of ports. This is useful when running multiple IPFS-based programs on the same machine. ================================================ FILE: doc/decent/demo-p2p-chat.md ================================================ [Home](../../README.md) » [Use Cases](../../README.md#use-cases) » **Decentralized** » [About](about.md)  |  [Quickstart](quickstart.md)  |  [Architectures](architectures.md)  |  **P2P Chat Demo**  |  [IPFS Chat Demo](demo-ipfs-chat.md)

# Demo App: P2P Decentralized Chat This sample demonstrates the simplest possible case of building a p2p app on top of Noms. Each node stores a complete copy of the data it is interested in, and peers find each other using [IPFS pubsub](https://ipfs.io/blog/25-pubsub/). Currently, nodes have to have a publicly routable IP, but it should be possible to use [libP2P](https://github.com/libp2p) or similar to connect to most nodes. # Build and Run Demo app code is in the [p2p](https://github.com/attic-labs/noms/tree/master/samples/go/decent/p2p-chat) directory. To get it up and running take the following steps: * Use git to clone the noms repository onto your computer: ```shell go get github.com/attic-labs/noms/samples/go/decent/p2p-chat ``` * From the noms/samples/go/decent/p2p-chat directory, build the program with the following command: ```shell go build ``` * Run the p2p client with the following command: ```shell mkdir /tmp/noms1 ./p2p-chat client --username= --node-idx=1 /tmp/noms1 >& /tmp/err1 ``` * Run a second p2p client with the following command: ```shell mkdir /tmp/noms2 ./p2p-chat client --username= --node-idx=2 /tmp/noms2 >& /tmp/err2 ``` Note: the p2p client relies on IPFS for it's pub/sub implementation. The 'node-idx' argument ensures that each IPFS-based node uses a distinct set of ports. This is useful when running multiple IPFS-based programs on the same machine. ================================================ FILE: doc/decent/quickstart.md ================================================ [Home](../../README.md) » [Use Cases](../../README.md#use-cases) » **Decentralized** » [About](about.md)  |  **Quickstart**  |  [Architectures](architectures.md)  |  [P2P Chat Demo](demo-p2p-chat.md)  |  [IPFS Chat Demo](demo-ipfs-chat.md)

# How to Use Noms in a Decentralized App If you’d like to use noms in your project we’d love to hear from you: drop us an email ([noms@attic.io](mailto:noms@attic.io)) or send us a message in slack ([slack.noms.io](http://slack.noms.io)). The steps you’ll need to take are: 1. Decide how you’ll model your problem using noms’ datatypes: boolean, number, string, blob, map, list, set, structs, ref, and union. (Note: if you are interested in using CRDTs as an alternative to classic datatypes please let us know.) 2. Consider... * How peers will discover each other * How peers will notify each other of changes * How and when they will pull changes, and * What potential there is for conflicting changes. Consider modeling your problem so that changes commute in order to make merging easier. In our [p2p sample](https://github.com/attic-labs/noms/blob/master/doc/decent/demo-p2p-chat.md) application, all peers periodically broadcast their HEAD on a known channel using [IPFS pubsub](https://ipfs.io/blog/25-pubsub/), pull each others' changes immediately, and avoid conflicts by using operations that can be resolved with Noms' built in merge policies. This is basically the simplest possible approach, but lots of options are possible. For example, an alternate approach for discoverability could be to keep a registry of all participating nodes in a blockchain (e.g., by storing them in an Ethereum smart contract). One could store either the current HEAD of each node (updated whenever the node changes state), or just an IPNS name that the node is writing to. As an example of changes that commute consider modeling a stream of chat messages. Appending messages from both parties to a list is not commutative; the result depends on the order in which messages are added to the list. An example of a commutative strategy is adding the messages to a `Map` keyed by `Struct{sender, ordinal}`: the resulting `Map` is the same no matter what order messages are added. 3. Vendor the code into your project. 4. Decide which type of storage you'd like to use: memory (convenient for playing around), disk, IPFS, or S3. (If you want to implement a store on top of another type of storage that's possible too; email us or reach out on slack and we can help.) 5. Set up and instantiate a database for your storage. Generally, you use the spec package to parse a [dataset spec](https://github.com/attic-labs/noms/blob/master/doc/spelling.md) like `mem::mydataset` which you can then ask for [`Database`](https://github.com/attic-labs/noms/blob/master/go/datas/database.go) and [`Dataset`](https://github.com/attic-labs/noms/blob/master/go/datas/dataset.go). * **Memory**: no setup required, just instantiate it: ```go sp := spec.ForDataset("mem::test") // Dataset name is "test" ``` * **Disk**: identify a directory for storage, say `/path/to/chunks`, and then instantiate: ```go sp := spec.ForDataset("/path/to/chunks::test") // Dataset name is "test" ``` * **IPFS**: identify an IPFS node by directory. If an IPFS node doesn't exist at that directory, one will be created: ```go sp := spec.ForDataset("ipfs:/path/to/ipfs_repo::test") // Dataset name is "test" ``` * **S3**: Follow the [S3 setup instructions](https://github.com/attic-labs/noms/blob/master/go/nbs/NBS-on-AWS.md) then instantiate a database and dataset: ```go sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) store := nbs.NewAWSStore("dynamo-table", "store-name", "s3-bucket", s3.New(sess), dynamodb.New(sess), 1<<28)) database := datas.NewDatabase(store) dataset := database.GetDataset("aws://dynamo-table:s3-bucket/store-name::test") // Dataset name is "test" ``` 7. Implement using the [Go API](https://github.com/attic-labs/noms/blob/master/doc/go-tour.md). If you're just playing around you could try something like this: ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" ) // Usage: quickstart /path/to/store::ds func main() { sp, err := spec.ForDataset(os.Args[1]) if err != nil { fmt.Fprintf(os.Stderr, "Unable to parse spec: %s, error: %s\n", sp, err) os.Exit(1) } defer sp.Close() db := sp.GetDatabase() if headValue, ok := sp.GetDataset().MaybeHeadValue(); !ok { data := types.NewList(sp.GetDatabase(), newPerson("Rickon", true), newPerson("Bran", true), newPerson("Arya", false), newPerson("Sansa", false), ) fmt.Fprintf(os.Stdout, "data type: %v\n", types.TypeOf(data).Describe()) _, err = db.CommitValue(sp.GetDataset(), data) if err != nil { fmt.Fprint(os.Stderr, "Error commiting: %s\n", err) os.Exit(1) } } else { // type assertion to convert Head to List personList := headValue.(types.List) // type assertion to convert List Value to Struct personStruct := personList.Get(0).(types.Struct) // prints: Rickon fmt.Fprintf(os.Stdout, "given: %v\n", personStruct.Get("given")) } } func newPerson(givenName string, male bool) types.Struct { return types.NewStruct("Person", types.StructData{ "given": types.String(givenName), "male": types.Bool(male), }) } ``` 8. You can inspect data that you've committed via the [noms command-line interface](https://github.com/attic-labs/noms/blob/master/doc/cli-tour.md). For example: ```shell noms log /path/to/store::ds noms show /path/to/store::ds ``` > Note that Memory tables won't be inspectable because they exist only in the memory of the process that created them. 9. Implement pull and merge. The [pull API](../../go/datas/pull.go) is used pull changes from a peer and the [merge API](../../go/merge/) is used to merge changes before commit. There's an [example of merging in the IPFS-based-chat sample app](https://github.com/attic-labs/noms/blob/master/samples/go/ipfs-chat/pubsub.go). ================================================ FILE: doc/faq.md ================================================ [Home](../README.md) » [Technical Overview](intro.md)  |  [Use Cases](../README.md#use-cases)  |  [Command-Line Interface](cli-tour.md)  |  [Go bindings Tour](go-tour.md) |  [Path Syntax](spelling.md)  |  **FAQ** 

# Frequently Asked Questions ### Decentralized like BitTorrent? No, decentralized like Git. Specifically, Noms isn't itself a peer-to-peer network. If you can get two instances to share data, somehow, then they can synchronize. Noms doesn't define how this should happen though. Currently, instances mainly share data via either HTTP/DNS or a filesystem. But it should be easy to add other mechanisms. For example, it seems like Noms could run well on top of BitTorrent, or IPFS. You should [look into it](https://github.com/attic-labs/noms/issues/2123). ### Isn't it wasteful to store every version? Noms deduplicates chunks of data that are identical within one database. So if multiple versions of one dataset share a lot of data, or if the same data is present in multiple datasets, Noms only stores one copy. That said, it is definitely possible to have write patterns that defeat this. Deduplication is done at the chunk level, and chunks are currently set to an average size of 4KB. So if you change about 1 byte in every 4096 in a single commit, and those changed bytes are well-distributed throughout the dataset, then we will end up making a complete copy of the dataset. ### Is there a way to not store the entire history? Theoretically, definitely. In Git, for example, the concept of "shallow clones" exists, and we could do something similar in Noms. This has not been implemented yet. ### How does Noms handle conflicts? Noms provides several built-in policies that can automatically merge common cases of conflicts. For example concurrent edits to sets are always mergeable and concurrent edits to different keys in a map or struct are also mergeable. The conflict resolution system is pluggable so new policies that are application-specific can be added. However, it's possible to build surprisingly complex applications with just the built-in policies. ### Why don't you just use CRDTs? [Convergent (or Commutative) Replicated Data Types (CRDTs)](http://hal.upmc.fr/inria-00555588/document) are a class of distributed data structures that provably converge to some agreed-upon state with no synchronization. Stated differently: CRDTs define a merge policy that is commutative over all their operations. CRDTs are nice because they require no custom conflict/merge code from the developer. Noms defines a set of intutive built-in merge policies for its core datatypes. For example, the default policy makes all operations on Noms Sets commute (add wins in the case of concurrent remove/add). This means that with the default policy, Noms Sets are a CRDT. If your application uses only operations on Noms datatypes that can be merged with whatever merge policy you are using, then your schema is a CRDT. It's possible to build surprisingly complex applications this way with just the default policy. Noms also allows you to provide your own custom policy. If your policy commutes, then the resulting datatype will be a CRDT. However, it would be nice if application developers could more easily opt-in to using only mergeable operations, thereby enforcing that their schema is a CRDT, and providing confidence that custom merge logic doesn't need to be implemented. More generally, perhaps there could be a way to test that all possible conflict cases have been handled by the developer. This would allow developers to implement their own custom CRDTs. This is something we'd like to research in the future. ### Why don't you support Windows? We are a tiny team and we all personally use Macs as our development machines, and we use Linux in production. These two platforms are very close to identical, and so we can generally test on Mac and assume it will work on Linux. Adding Windows would add significant complexity to our code and build processes which we're not willing to take on. ### But you'll accept patches for Windows, right? No, because then we'll have to maintain those patches. ### Are there any workaround for Windows? You can use it in a virtual machine. We have also heard Noms works OK with gitbash or cygwin, but that's coincidence. ### Why is it called Noms? 1. It's insert-only. OMNOMNOM. 2. It's content addressed. Every value has its own hash, or [name](http://dictionary.reverso.net/french-english/nom). ### Are you sure Noms doesn't stand for something? Pretty sure. But if you like, you can pretend it stands for Non-Mutable Store. ================================================ FILE: doc/go-tour.md ================================================ [Home](../README.md) » [Technical Overview](intro.md)  |  [Use Cases](../README.md#use-cases)  |  [Command-Line Interface](cli-tour.md)  |  **Go bindings Tour** |  [Path Syntax](spelling.md)  |  [FAQ](faq.md) 

# A Short Tour of Noms for Go This is a short introduction to using Noms from Go. It should only take a few minutes if you have some familiarity with Go. During the tour, you can refer to the complete [Go SDK Reference](https://godoc.org/github.com/attic-labs/noms) for more information on anything you see. ## Requirements * [Noms command-line tools](https://github.com/attic-labs/noms#setup) * [Go v1.6+](https://golang.org/dl/) * Ensure your [$GOPATH](https://github.com/golang/go/wiki/GOPATH) is configured ## Start a Local Database Let's create a local database to play with: ```sh > mkdir /tmp/noms-go-tour > noms serve /tmp/noms-go-tour ``` ## [Database](https://github.com/attic-labs/noms/blob/master/go/datas/database.go) Leave the server running, and in a separate terminal: ```sh > mkdir noms-tour > cd noms-tour ``` Then use your favorite editor so that we can start to play with code. To get started with Noms, first create a Database: ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" ) func main() { sp, err := spec.ForDatabase("http://localhost:8000") if err != nil { fmt.Fprintf(os.Stderr, "Could not access database: %s\n", err) return } defer sp.Close() } ``` Now let's run it: ```sh > go run noms-tour.go ``` If you did not leave the server running you would see output of ```Could not access database``` here, otherwise your program should exit cleanly. See [Spelling in Noms](https://github.com/attic-labs/noms/blob/master/doc/spelling.md) for more information on database spec strings. ## [Dataset](https://github.com/attic-labs/noms/blob/master/go/dataset/dataset.go) Datasets are the main interface you'll use to work with Noms. Let's update our example to use a Dataset spec string: ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" ) func main() { sp, err := spec.ForDataset("http://localhost:8000::people") if err != nil { fmt.Fprintf(os.Stderr, "Could not create dataset: %s\n", err) return } defer sp.Close() if _, ok := sp.GetDataset().MaybeHeadValue(); !ok { fmt.Fprintf(os.Stdout, "head is empty\n") } } ``` Now let's run it: ```sh > go run noms-tour.go head is empty ``` Since the dataset does not yet have any values you see ```head is empty```. Let's add some data to make it more interesting: ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" ) func newPerson(givenName string, male bool) types.Struct { return types.NewStruct("Person", types.StructData{ "given": types.String(givenName), "male": types.Bool(male), }) } func main() { sp, err := spec.ForDataset("http://localhost:8000::people") if err != nil { fmt.Fprintf(os.Stderr, "Could not create dataset: %s\n", err) return } defer sp.Close() db := sp.GetDatabase() data := types.NewList(db, newPerson("Rickon", true), newPerson("Bran", true), newPerson("Arya", false), newPerson("Sansa", false), ) fmt.Fprintf(os.Stdout, "data type: %v\n", types.TypeOf(data).Describe()) _, err = db.CommitValue(sp.GetDataset(), data) if err != nil { fmt.Fprint(os.Stderr, "Error commiting: %s\n", err) } } ``` Now you will get output of the data type of our Dataset value: ```shell > go run noms-tour.go data type: List ``` Now you can access the data via your program: ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" ) func main() { sp, err := spec.ForDataset("http://localhost:8000::people") if err != nil { fmt.Fprintf(os.Stderr, "Could not create dataset: %s\n", err) return } defer sp.Close() if headValue, ok := sp.GetDataset().MaybeHeadValue(); !ok { fmt.Fprintf(os.Stdout, "head is empty\n") } else { // type assertion to convert Head to List personList := headValue.(types.List) // type assertion to convert List Value to Struct personStruct := personList.Get(0).(types.Struct) // prints: Rickon fmt.Fprintf(os.Stdout, "given: %v\n", personStruct.Get("given")) } } ``` Running it now: ```sh > go run noms-tour.go given: Rickon ``` You can see this data using the command-line too: ```sh > noms ds http://localhost:8000 people > noms show http://localhost:8000::people struct Commit { meta: struct {}, parents: set {}, value: [ // 4 items struct Person { given: "Rickon", male: true, }, struct Person { given: "Bran", male: true, }, struct Person { given: "Arya", male: false, }, struct Person { given: "Sansa", male: false, }, ], } ``` Let's add some more data. ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" ) func main() { sp, err := spec.ForDataset("http://localhost:8000::people") if err != nil { fmt.Fprintf(os.Stderr, "Could not create dataset: %s\n", err) return } defer sp.Close() if headValue, ok := sp.GetDataset().MaybeHeadValue(); !ok { fmt.Fprintf(os.Stdout, "head is empty\n") } else { // type assertion to convert Head to List personList := headValue.(types.List) personEditor := personList.Edit() data := personEditor.Append( types.NewStruct("Person", types.StructData{ "given": types.String("Jon"), "family": types.String("Snow"), "male": types.Bool(true), }), ).List() fmt.Fprintf(os.Stdout, "data type: %v\n", types.TypeOf(data).Describe()) _, err = sp.GetDatabase().CommitValue(sp.GetDataset(), data) if err != nil { fmt.Fprint(os.Stderr, "Error commiting: %s\n", err) } } } ``` Running this: ```sh > go run noms-tour.go data type: List ``` Datasets are versioned. When you *commit* a new value, you aren't overwriting the old value, but adding to a historical log of values: ```sh > noms log http://localhost:8000::people commit ba3lvopbgcqqnofm3qk7sk4j2doroj1l Parent: f0b1befu9jp82r1vcd4gmuhdno27uobi (root) { + struct Person { + family: "Snow", + given: "Jon", + male: true, + } } commit f0b1befu9jp82r1vcd4gmuhdno27uobi Parent: hshltip9kss28uu910qadq04mhk9kuko commit hshltip9kss28uu910qadq04mhk9kuko Parent: None ``` ## Values Noms supports a [variety of datatypes](https://github.com/attic-labs/noms/blob/master/doc/intro.md#types) beyond List, Struct, String, and Bool we used above. ## Samples You can continue learning more about the Noms Go SDK by looking at the documentation and by reviewing the [samples](https://github.com/attic-labs/noms/blob/master/samples/go). The [hr sample](https://github.com/attic-labs/noms/blob/master/samples/go/hr) is a more complete implementation of our example above and will help you to see further usage of the other datatypes. ================================================ FILE: doc/intro.md ================================================ [Home](../README.md) » **Technical Overview**  |  [Use Cases](../README.md#use-cases)  |  [Command-Line Interface](cli-tour.md)  |  [Go bindings Tour](go-tour.md) |  [Path Syntax](spelling.md)  |  [FAQ](faq.md) 

# Noms Technical Overview Most conventional database systems share two central properties: 1. Data is modeled as a single point-in-time. Once a transaction commits, the previous state of the database is either lost, or available only as a fallback by reconstructing from transaction logs. 2. Data is modeled as a single source of truth. Even large-scale distributed databases which are internally a fault-tolerant network of nodes, present the abstraction to clients of being a single logical master, with which clients must coordinate in order to change state. Noms blends the properties of decentralized systems, such as [Git](https://git-scm.com/), with properties of traditional databases in order to create a general-purpose decentralized database, in which: 1. Any peer’s state is as valid as any other. 2. All commits of the database are retained and available at any time. 3. Any peer is free to move forward independently of communication from any other—while retaining the ability to reconcile changes at some point in the future. 4. The basic properties of structured databases (efficient queries, updates, and range scans) are retained. 5. Diffs between any two sets of data can be computed efficiently. 6. Synchronization between disconnected copies of the database can be performed efficiently and correctly. ## Basics As in Git, [Bitcoin](https://bitcoin.org/en/), [Ethereum](https://www.ethereum.org/), [IPFS](https://ipfs.io/), [Camlistore](https://camlistore.org/), [bup](https://bup.github.io/), and other systems, Noms models data as a [directed acyclic graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph) of nodes in which every node has a _hash_. A node's hash is derived from the values encoded in the node and (transitively) from the values encoded in all nodes which are reachable from that node. In other words, a Noms database is a single large [Merkle DAG](https://github.com/jbenet/random-ideas/issues/20). When two nodes have the same hash, they represent identical logical values and the respective subgraph of nodes reachable from each are topologically equivalent. Importantly, in Noms, the reverse is also true: a single logical value has one and only one hash. When two nodes have differnet hashes, they represent different logical values. Noms extends the ideas of prior systems to enable efficiently computing and reconciling differences, synchronizing state, and building indexes over large-scale, structured data. ## Databases and Datasets A _database_ is the top-level abstraction in Noms. A database has two responsibilities: it provides storage of [content-addressed](https://en.wikipedia.org/wiki/Content-addressable_storage) chunks of data, and it keeps track of zero or more _datasets_. A Noms database can be implemented on top of any underlying storage system that provides key/value storage with at least optional optimistic concurrency. We only use optimistic concurrency to store the current value of each dataset. Chunks themselves are immutable. We have implementations of Noms databases on top of our own file-backed store [Noms Block Store (NBS)](https://github.com/attic-labs/noms/tree/master/go/nbs) (usually used locally), our own [HTTP protocol](https://github.com/attic-labs/noms/blob/master/go/datas/database_server.go) (used for working with a remote database), [Amazon DynamoDB](https://aws.amazon.com/dynamodb/), and [memory](https://github.com/attic-labs/noms/blob/master/go/chunks/memory_store.go) (mainly used for testing). Here's an example of creating an http-backed database using the [Go Noms SDK](go-tour.md): ```go package main import ( "fmt" "os" "github.com/attic-labs/noms/go/spec" ) func main() { sp, err := spec.ForDatabase("http://localhost:8000") if err != nil { fmt.Fprintf(os.Stderr, "Could not access database: %s\n", err) return } defer sp.Close() } ``` A dataset is nothing more than a named pointer into the DAG. Consider the following command to copy the dataset named `foo` to the dataset named `bar` within a database: ```shell noms sync http://localhost:8000::foo http://localhost:8000::bar ``` This command is trivial and causes basically zero IO. Noms first resolves the dataset name `foo` in `http://localhost:8000`. This results in a hash. Noms then checks whether that hash exists in the destination database (which in this case is the same as the source database), finds that it does, and then adds a new dataset pointing at that chunk. Syncs across database can be efficient by the same logic if the destination database already has all or most of the chunks required chunks. ## Time All data in Noms is immutable. Once a piece of data is stored, it is never changed. To represent state changes, Noms uses a progression of `Commit` structures. [TODO - diagram] As in Git, Commits typically have one _parent_, which is the previous commit in time. But in the cases of merges, a Noms commit can have multiple parents. ### Chunks When a value is stored in Noms, it is stored as one or more chunks of data. Chunk boundaries are typically created implicitly, as a way to store large collections efficiently (see [Prolly Trees](#prolly-trees-probabilistic-b-trees)). Programmers can also create explicit chunk boundaries using the `Ref` type (see [Types](#types )). [TODO - Diagram] Every chunk encodes a single logical value (which may be a component of another value and/or be composed of sub-values). Chunks are [addressed](https://en.wikipedia.org/wiki/Content-addressable_storage) in the Noms persistence layer by the hash of the value they encode. ## Types Noms is a typed system, meaning that every Noms value is classified into one of the following _types_: * `Boolean` * `Number` (arbitrary precision binary) * `String` (utf8-encoded) * `Blob` (raw binary data) * `Set` * `List` * `Map` * Unions: `T|U|V|...` * `Ref` (explicit out-of-line references) * `Struct` (user-defined record types, e.g., `Struct Person { name: String, age?: Number })` * `Type` (A value that stores a Noms type) Blobs, sets, lists, and maps can be gigantic - Noms will _chunk_ these types into reasonable sized parts internally for efficient storage, searching, and updating (see [Prolly Trees](#prolly-trees-probabilistic-b-trees) below for more on this). Strings, numbers, unions, and structs are not chunked, and should be used for "reasonably-sized" values. Use `Ref` if you need to force a particular value to be in a different chunk for some reason. Types serve several purposes in Noms: 1. Most importantly, types make Noms data self-describing. You can use the `types.TypeOf` function on any Noms `Value`, no matter how large, and get a very precise description of the entire value and all values reachable from it. This allows software to interoperate without prior agreement or planning. 2. Users of Noms can define their own structures and publish data that uses them. This allows for ad-hoc standardization of types within communities working on similar data. 3. Types can be used _structurally_. A program can check incoming data against a required type. If the incoming root chunk matches the type, or is a superset of it, then the program can proceed with certainty of the shape of all accessible data. This enables richer interoperability between software, since schemas can be expanded over time as long as a compatible subset remains. 4. Eventually, we plan to add type restrictions to datasets, which would enforce the allowed types that can be committed to a dataset. This would allow something akin to schema validation in traditional databases. ### Refs vs Hashes A _hash_ in Noms is just like the hashes used elsewhere in computing: a short string of bytes that uniquely identifies a larger value. Every value in Noms has a hash. Noms currently uses the [sha2-512](https://github.com/attic-labs/noms/blob/master/go/hash/hash.go#L7) hash function, but that can change in future versions of the system. A _ref_ is different in subtle, but important ways. A `Ref` is a part of the type system - a `Ref` is a value. Anywhere you can find a Noms value, you can find a `Ref`. For example, you can commit a `Ref` to a dataset, but you can't commit a bare hash. The difference is that `Ref` carries the type of its target, along with the hash. This allows us to efficiently validate commits that include `Ref`, among other things. ### Type Accretion Noms is an immutable database, which leads to the question: How do you change the schema? If I have a dataset containing `Set`, and I later decide that it should be `Set`, what do I do? You might say that you just commit the new type, but that would mean that users can't look at a dataset and understand what types previous versions contained, without manually exploring every one of those commits. We call our solution to this problem _Type Accretion_. If you construct a `Set` containing only `Number`s, its type will be `Set`. If you then insert a string into this set, the type of the resulting value is `Set`. This is usually completely implicit, done based on the data you store (you can set types explicitly though, which is useful in some cases). We do the same thing for datasets. If you commit a `Set`, the type of the commit we create for you is: ```go Struct Commit { Value: Set Parents: Set>> } ``` This tells you that the current and all previous commits have values of type `Set`. But if you then commit a `Set` to this same dataset, then the type of that commit will be: ```go Struct Commit { Value: Set Parents: Set> | Ref Parents: Cycle }>> } } ``` This tells you that the dataset's current commit has a value of type `Set` and that previous commits are either the same, or else have a value of type `Set`. Type accretion has a number of benefits related to schema changes: 1. You can widen the type of any container (list, set, map) without rewriting any existing data. `Set` becomes `Set | Struct { name: String, age: Number }>>` and all existing data is reused. 2. You can widen containers in ways that other databases wouldn't allow. For example, you can go from `Set` to `Set`. Existing data is still reused. 3. You can change the type of a dataset in either direction - either widening or narrowing it, and the dataset remains self-documenting as to its current and previous types. ## Prolly Trees: Probabilistic B-Trees A critical invariant of Noms is [history-independence](https://arxiv.org/pdf/1501.06508.pdf): the same Noms value will be represented by the same graph of physical chunks, and the same hashes, regardless of what past sequence of logical mutations resulted in the value. This is what makes fast diff, sync, and merge possible in Noms: we can compare two values just by looking at their hash. If their hashes are identical, we know the values are identical without additional work. By modeling collections as trees of values, the same trick can be used to quickly find the differences between larges sets of values. But Noms is also a database, and needs to do what databases do: efficiently search, scan, and mutate large collections. The classic data structures that enable these features inside databases — B-Trees and LSM Trees — can't be used by Noms because they aren't history-independent: their internal state depends upon their mutation history. In order to model large mutable collections in Noms, of the type where B-Trees would typically be used, while preserving efficient diff, sync, and merge, Noms introduces _Prolly Trees_. ### Prolly Tree Structure A Prolly Tree is a [search tree](https://en.wikipedia.org/wiki/Search_tree) where the number of values stored in each node is determined probabilistically, based on the data which is stored in the tree. A Prolly Tree is similar in many ways to a B-Tree, except that the number of values in each node has a probabilistic average rather than an enforced upper and lower bound, and the set of values in each node is determined by the output of a rolling hash function over the values, rather than via split and join operations when upper and lower bounds are exceeded. Like B-Trees, Prolly Trees can model lists, maps, tables, and sets. Below is an example of a small set of ASCII characters stored in a Prolly Tree: ![Prolly Tree Diagram](prolly-tree-structure.png) ### Prolly Tree Construction Prolly Trees can be constructed from scratch using a variation of [content-slicing](https://en.wikipedia.org/wiki/Rolling_hash#Content-based_slicing_using_a_rolling_hash), as used in bup, rsync, Camlistore, and many other systems. ![Prolly Tree Construction](prolly-tree-construction.png) To start, we "chunk" the serialization of a larged sorted sequence by sliding a fixed-size window through it, one byte at a time. At each position, we compute a hash of the bytes in the window. Any hash can be used, but in Noms a [rolling hash](https://en.wikipedia.org/wiki/Rolling_hash) is used for performance. Within each hash, we look for a pattern that has a known probability of occuring. If the pattern is found, that position is a _boundary_. We slide the window forward to the end of the containing item, and write a new _chunk_ containing the bytes between this boundary and the previous, if any. The resulting chunk is stored in a [content-addressed storage system](https://en.wikipedia.org/wiki/Content-addressable_storage). Again, any hash can be used for this, but in Noms [we use truncated SHA-512](https://github.com/attic-labs/noms/blob/master/go/hash/hash.go). By adjusting the pattern we look for, we can control the average size of the chunks our tree will be decomposed into. In Noms, the pattern we look for is the [12 high bits being 1](https://github.com/attic-labs/noms/blob/master/go/types/rolling_value_hasher.go). Since this has a probability of 1/2^12, the average chunk size in Noms is 4kb. Once we've created an initial pass of chunks this way, we build an index describing the contents of each of those chunks, and perform the chunking operation again on the serialization of that index. This continues recursively, until we are left with a node that doesn't chunk. This is the root of the tree. Noms uses a window size of 64 bytes, so the probability of any 1 bit change moving a boundary is about 64/4kb ~= 0.016. ### Prolly Tree Mutation To mutate a Prolly Tree, conceptually we build a new Prolly Tree from scratch, except that we can reuse everything from the previous tree that we know cannot have been affected (because it is outside the window). ![Prolly Tree Mutation](prolly-tree-mutation.png) In the example above, we insert the value _I_ into the set. The chunk boundary is unchanged in this case so the subtrees before and after the modified chunk can be reused as-is. Every so often (1.6% of the time in Noms) a write will move a chunk boundary. This results in one extra chunk getting written at that level in the tree. This can happen at each level, so the expected number of operations to make a change to a prolly tree is 1.016*treedepth. A 4-level Prolly Tree in Noms can hold 4096^4 ~= 281TB of data. We can make a single mutation to that tree with about 4 4kb writes. ### Some Properties of Prolly Trees Operation | B-Trees | Patricia Trees† / HAMTs | Prolly Trees --------- | ------- | ----------------------- | ------------ 1 Random Read | 🎉logk(n) | 🎉logk(n) | 🎉logk(n) 1 Random Write | 🎉logk(n) | 💩2*logk(n) | 👍(1+k/w)*logk(n) Ordered scan of one item with size z | 🎉z/k | ❌ | 🎉z/k Calculate diff of size d | 💩n | 🎉d | 🎉d Verification, proofs | ❌ | 🙌 | 🙌 Structured sharing | ❌ | 🙌 | 🙌 **†** assuming hashed keys, unhashed destroys perf — **n**: total leaf data in tree, **k**: average block size, **w**: window width ### Indexing and Searching with Prolly Trees Like B-Trees, Prolly Trees are sorted. In Noms, we sort keys of type Boolean, Number, and String by their natural order. We sort other key types by their hash. Because of this sorting, Noms collections can be used as efficient indexes, in the same manner as primary and secondary indexes in traditional databases. For example, say you want to quickly be able to find `Person` structs by their age. You could build a map of type `Map>`. This would allow you to quickly (~logk(n) seeks) find all the people of an exact age. But it would _also_ allow you to find all people within a range of ages efficiently (~num_results/logk(n) seeks), even if the ages are non-integral. Also, because Noms collections are ordered search trees, it is possible to implement set operations like union and intersect efficiently on them. So, for example, if you wanted to find all the people of a particular age AND having a particular hair color, you could construct a second map having type `Map>`, and intersect the two sets. Over time, we plan to develop this basic capability into support for some kind of generalized query system. ================================================ FILE: doc/spelling.md ================================================ [Home](../README.md) » [Technical Overview](intro.md)  |  [Use Cases](../README.md#use-cases)  |  [Command-Line Interface](cli-tour.md)  |  [Go bindings Tour](go-tour.md) |  **Path Syntax**  |  [FAQ](faq.md) 

# Spelling in Noms Many commands and APIs in Noms accept database, dataset, or value specifications as arguments. This document describes how to construct these specifications. ## Spelling Databases database specifications take the form: ```nohighlight [:] ``` The `path` part of the name is interpreted differently depending on the protocol: - **http(s)** specs describe a remote database to be accessed over HTTP. In this case, the entire database spec is a normal http(s) URL. For example: `https://dev.noms.io/aa`. - **mem** specs describe an ephemeral memory-backed database. In this case, the path component is not used and must be empty. - **nbs** specs describe a local [Noms Block Store (NBS)](https://github.com/attic-labs/noms/tree/master/go/nbs)-backed database. In this case, the path component should be a relative or absolute path on disk to a directory in which to store the data, e.g. `nbs:/tmp/noms-data`. - In Go, `nbs:` can be ommitted (just `/tmp/noms-data` will work). - **aws** specs describe a remote Noms Block Store backed directly by Amazon Web Services, specifically DynamoDB and S3. The format is a URI containing the names of the DynamoDB table to use, the S3 bucket to use, and the database to serve. For example: `aws:dynamo-table/s3-bucket/database`. ## Spelling Datasets Dataset specifications take the form: ```nohighlight :: ``` See [spelling databases](#spelling-databases) for how to build the `database` part of the name. The `dataset` part is just any string matching the regex `^[a-zA-Z0-9\-_/]+$`. Example datasets: ```nohighlight /tmp/test-db::my-dataset nbs:/tmp/test-db::my-dataset http://localhost:8000::registered-businesses https://demo.noms.io/aa::music ``` ## Spelling Values Value specifications take the form: ```nohighlight :: ``` See [spelling databases](#spelling-databases) for how to build the database part of the name. The `root` part can be either a hash or a dataset name. If `root` begins with `#` it will be interpreted as a hash otherwise it is used as a dataset name. See [spelling datasets](#spelling-datasets) for how to build the dataset part of the name. The `path` part is relative to the `root` provided. ### Specifying Struct Fields Elements of a Noms struct can be referenced using a period `.`. For example, if the `root` is a dataset, then one can use `.value` to get the root of the data in the dataset. In this case `.value` selects the `value` field from the `Commit` struct at the top of the dataset. One could instead use `.meta` to select the `meta` struct from the `Commit` struct. The `root` does not need to be a dataset though, so if it is a hash that references a struct, the same notation still works: `#o38hugtf3l1e8rqtj89mijj1dq57eh4m.field`. ### Specifying Collection Values Elements of a Noms list, map, or set can be retrieved using brackets `[...]`. For example, if the dataset is a Noms map of number to struct then one could use `.value[42]` to get the Noms struct associated with the key 42. Similarly selecting the first element from a Noms list would be `.value[0]`. If the Noms map was keyed by string, then using `.value["0000024-02-999"]` would reference the Noms struct associated with key "0000024-02-999". Noms lists also support indexing from the back, using `.value[-1]` to mean the last element of a last, `.value[-2]` for the 2nd last, and so on. If the key of a Noms map or set is a Noms struct or a more complex value, then indexing into the collection can be done using the hash of that more complex value. For example, if the `root` of our dataset is a Noms set of Noms structs, then if you provide the hash of the struct element then you can index into the map using the brackets as described above. e.g. http://localhost:8000::dataset.value[#o38hugtf3l1e8rqtj89mijj1dq57eh4m].field Similarly, the key is addressable using `@key` syntax. One use for this is when you have the hash of a complex value, but want need to retrieve the key (rather than or in addition to the value) in a Noms map. The syntax is to append `@key` after the closing bracket of the index specifier. e.g. http://localhost:8000::dataset.value[#o38hugtf3l1e8rqtj89mijj1dq57eh4m]@key would retrieve the key element specified by the hash key `#o38hugtf3l1e8rqtj89mijj1dq57eh4m` from the `dataset.value` collection. ### Specifying Collection Positions Elements of a Noms list, map, or set can be retrived _by their position_ using the `@at(index)` annotation. For lists, this is exactly equivalent to `[index]`. For sets and maps, note that Noms has a stable ordering, so `@at(0)` will always return the smallest element, `@at(1)` the 2nd smallest, and so on. `@at(-1)` will return the largest. For maps, adding the `@key` annotation will retrieve the key of the map entry instead of the value. ### Examples ```sh # “sf-registered-business” dataset at https://demo.noms.io/cli-tour https://demo.noms.io/cli-tour::sf-registered-business # value o38hugtf3l1e8rqtj89mijj1dq57eh4m at https://localhost:8000 https://localhost:8000/monkey::#o38hugtf3l1e8rqtj89mijj1dq57eh4m # “bonk” dataset at /foo/bar /foo/bar::bonk # from https://demo.noms.io/cli-tour, select the "sf-registered-business" dataset, # the root value is a Noms map, select the value of the Noms map identified by string # key "0000024-02-999", then from that resulting struct select the Ownership_Name field https://demo.noms.io/cli-tour::sf-registered-business.value["0000024-02-999"].Ownership_Name ``` Be careful with shell escaping. Your shell might require escaping of the double quotes and other characters or use single quotes around the entire command line argument. e.g.: ```sh > noms show https://demo.noms.io/cli-tour::sf-registered-business.value["0000024-02-999"].Ownership_Name error: Invalid index: 0000024-02-999 > noms show https://demo.noms.io/cli-tour::sf-registered-business.value[\"0000024-02-999\"].Ownership_Name "EASTMAN KODAK CO" > noms show 'https://demo.noms.io/cli-tour::sf-registered-business.value["0000024-02-999"].Ownership_Name' "EASTMAN KODAK CO" ``` ================================================ FILE: go/chunks/chunk.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package chunks provides facilities for representing, storing, and fetching content-addressed chunks of Noms data. package chunks import ( "bytes" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) // Chunk is a unit of stored data in noms type Chunk struct { r hash.Hash data []byte } var EmptyChunk = NewChunk([]byte{}) func (c Chunk) Hash() hash.Hash { return c.r } func (c Chunk) Data() []byte { return c.data } func (c Chunk) IsEmpty() bool { return len(c.data) == 0 } // NewChunk creates a new Chunk backed by data. This means that the returned Chunk has ownership of this slice of memory. func NewChunk(data []byte) Chunk { r := hash.Of(data) return Chunk{r, data} } // NewChunkWithHash creates a new chunk with a known hash. The hash is not re-calculated or verified. This should obviously only be used in cases where the caller already knows the specified hash is correct. func NewChunkWithHash(r hash.Hash, data []byte) Chunk { return Chunk{r, data} } // ChunkWriter wraps an io.WriteCloser, additionally providing the ability to grab the resulting Chunk for all data written through the interface. Calling Chunk() or Close() on an instance disallows further writing. type ChunkWriter struct { buffer *bytes.Buffer c Chunk } func NewChunkWriter() *ChunkWriter { b := &bytes.Buffer{} return &ChunkWriter{ buffer: b, } } func (w *ChunkWriter) Write(data []byte) (int, error) { if w.buffer == nil { d.Panic("Write() cannot be called after Hash() or Close().") } size, err := w.buffer.Write(data) d.Chk.NoError(err) return size, nil } // Chunk() closes the writer and returns the resulting Chunk. func (w *ChunkWriter) Chunk() Chunk { d.Chk.NoError(w.Close()) return w.c } // Close() closes computes the hash and Puts it into the ChunkSink Note: The Write() method never returns an error. Instead, like other noms interfaces, errors are reported via panic. func (w *ChunkWriter) Close() error { if w.buffer == nil { return nil } w.c = NewChunk(w.buffer.Bytes()) w.buffer = nil return nil } ================================================ FILE: go/chunks/chunk_serializer.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "bytes" "encoding/binary" "io" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) /* Chunk Serialization: Chunk 0 Chunk 1 .. Chunk N Chunk: Hash // 20-byte hash Len // 4-byte int Data // len(Data) == Len */ // Serialize a single Chunk to writer. func Serialize(chunk Chunk, writer io.Writer) { d.PanicIfFalse(chunk.data != nil) h := chunk.Hash() n, err := io.Copy(writer, bytes.NewReader(h[:])) d.Chk.NoError(err) d.PanicIfFalse(int64(hash.ByteLen) == n) // Because of chunking at higher levels, no chunk should never be more than 4GB chunkSize := uint32(len(chunk.Data())) err = binary.Write(writer, binary.BigEndian, chunkSize) d.Chk.NoError(err) n, err = io.Copy(writer, bytes.NewReader(chunk.Data())) d.Chk.NoError(err) d.PanicIfFalse(uint32(n) == chunkSize) } // Deserialize reads off of |reader| until EOF, sending chunks to // chunkChan in the order they are read. Objects sent over chunkChan are // *Chunk. func Deserialize(reader io.Reader, chunkChan chan<- *Chunk) (err error) { for { var c Chunk c, err = deserializeChunk(reader) if err != nil { break } d.Chk.NotEqual(EmptyChunk.Hash(), c.Hash()) chunkChan <- &c } if err == io.EOF { err = nil } return } func deserializeChunk(reader io.Reader) (Chunk, error) { h := hash.Hash{} n, err := io.ReadFull(reader, h[:]) if err != nil { return EmptyChunk, err } d.PanicIfFalse(int(hash.ByteLen) == n) chunkSize := uint32(0) if err = binary.Read(reader, binary.BigEndian, &chunkSize); err != nil { return EmptyChunk, err } data := make([]byte, int(chunkSize)) if n, err = io.ReadFull(reader, data); err != nil { return EmptyChunk, err } d.PanicIfFalse(int(chunkSize) == n) c := NewChunk(data) if h != c.Hash() { d.Panic("%s != %s", h, c.Hash().String()) } return c, nil } ================================================ FILE: go/chunks/chunk_serializer_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "bytes" "testing" "github.com/stretchr/testify/assert" ) func TestSerializeRoundTrip(t *testing.T) { assert := assert.New(t) inputs := [][]byte{[]byte("abc"), []byte("def")} chnx := make([]Chunk, len(inputs)) for i, data := range inputs { chnx[i] = NewChunk(data) } buf := &bytes.Buffer{} Serialize(chnx[0], buf) Serialize(chnx[1], buf) chunkChan := make(chan *Chunk) go func() { defer close(chunkChan) err := Deserialize(bytes.NewReader(buf.Bytes()), chunkChan) assert.NoError(err) }() for c := range chunkChan { assert.Equal(chnx[0].Hash(), c.Hash()) chnx = chnx[1:] } assert.Len(chnx, 0) } func TestBadSerialization(t *testing.T) { bad := []byte{0, 1} // Not enough bytes to read first length ch := make(chan *Chunk) defer close(ch) assert.Error(t, Deserialize(bytes.NewReader(bad), ch)) } ================================================ FILE: go/chunks/chunk_store.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "io" "github.com/attic-labs/noms/go/hash" ) // ChunkStore is the core storage abstraction in noms. We can put data // anyplace we have a ChunkStore implementation for. type ChunkStore interface { // Get the Chunk for the value of the hash in the store. If the hash is // absent from the store EmptyChunk is returned. Get(h hash.Hash) Chunk // GetMany gets the Chunks with |hashes| from the store. On return, // |foundChunks| will have been fully sent all chunks which have been // found. Any non-present chunks will silently be ignored. GetMany(hashes hash.HashSet, foundChunks chan *Chunk) // Returns true iff the value at the address |h| is contained in the // store Has(h hash.Hash) bool // Returns a new HashSet containing any members of |hashes| that are // absent from the store. HasMany(hashes hash.HashSet) (absent hash.HashSet) // Put caches c in the ChunkSource. Upon return, c must be visible to // subsequent Get and Has calls, but must not be persistent until a call // to Flush(). Put may be called concurrently with other calls to Put(), // Get(), GetMany(), Has() and HasMany(). Put(c Chunk) // Returns the NomsVersion with which this ChunkSource is compatible. Version() string // Rebase brings this ChunkStore into sync with the persistent storage's // current root. Rebase() // Root returns the root of the database as of the time the ChunkStore // was opened or the most recent call to Rebase. Root() hash.Hash // Commit atomically attempts to persist all novel Chunks and update the // persisted root hash from last to current (or keeps it the same). // If last doesn't match the root in persistent storage, returns false. Commit(current, last hash.Hash) bool // Stats may return some kind of struct that reports statistics about the // ChunkStore instance. The type is implementation-dependent, and impls // may return nil Stats() interface{} // StatsSummary may return a string containing summarized statistics for // this ChunkStore. It must return "Unsupported" if this operation is not // supported. StatsSummary() string // Close tears down any resources in use by the implementation. After // Close(), the ChunkStore may not be used again. It is NOT SAFE to call // Close() concurrently with any other ChunkStore method; behavior is // undefined and probably crashy. io.Closer } // Factory allows the creation of namespaced ChunkStore instances. The details // of how namespaces are separated is left up to the particular implementation // of Factory and ChunkStore. type Factory interface { CreateStore(ns string) ChunkStore // CreateStoreFromCache allows caller to signal to the factory that it's // willing to tolerate an out-of-date ChunkStore. CreateStoreFromCache(ns string) ChunkStore // Shutter shuts down the factory. Subsequent calls to CreateStore() will fail. Shutter() } ================================================ FILE: go/chunks/chunk_store_common_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "github.com/stretchr/testify/suite" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" ) type ChunkStoreTestSuite struct { suite.Suite Factory Factory } func (suite *ChunkStoreTestSuite) TestChunkStorePut() { store := suite.Factory.CreateStore("ns") input := "abc" c := NewChunk([]byte(input)) store.Put(c) h := c.Hash() // Reading it via the API should work. assertInputInStore(input, h, store, suite.Assert()) } func (suite *ChunkStoreTestSuite) TestChunkStoreRoot() { store := suite.Factory.CreateStore("ns") oldRoot := store.Root() suite.True(oldRoot.IsEmpty()) bogusRoot := hash.Parse("8habda5skfek1265pc5d5l1orptn5dr0") newRoot := hash.Parse("8la6qjbh81v85r6q67lqbfrkmpds14lg") // Try to update root with bogus oldRoot result := store.Commit(newRoot, bogusRoot) suite.False(result) // Now do a valid root update result = store.Commit(newRoot, oldRoot) suite.True(result) } func (suite *ChunkStoreTestSuite) TestChunkStoreCommitPut() { name := "ns" store := suite.Factory.CreateStore(name) input := "abc" c := NewChunk([]byte(input)) store.Put(c) h := c.Hash() // Reading it via the API should work... assertInputInStore(input, h, store, suite.Assert()) // ...but it shouldn't be persisted yet assertInputNotInStore(input, h, suite.Factory.CreateStore(name), suite.Assert()) store.Commit(h, store.Root()) // Commit persists Chunks assertInputInStore(input, h, store, suite.Assert()) assertInputInStore(input, h, suite.Factory.CreateStore(name), suite.Assert()) } func (suite *ChunkStoreTestSuite) TestChunkStoreGetNonExisting() { store := suite.Factory.CreateStore("ns") h := hash.Parse("11111111111111111111111111111111") c := store.Get(h) suite.True(c.IsEmpty()) } func (suite *ChunkStoreTestSuite) TestChunkStoreVersion() { store := suite.Factory.CreateStore("ns") oldRoot := store.Root() suite.True(oldRoot.IsEmpty()) newRoot := hash.Parse("11111222223333344444555556666677") suite.True(store.Commit(newRoot, oldRoot)) suite.Equal(constants.NomsVersion, store.Version()) } func (suite *ChunkStoreTestSuite) TestChunkStoreCommitUnchangedRoot() { store1, store2 := suite.Factory.CreateStore("ns"), suite.Factory.CreateStore("ns") input := "abc" c := NewChunk([]byte(input)) store1.Put(c) h := c.Hash() // Reading c from store1 via the API should work... assertInputInStore(input, h, store1, suite.Assert()) // ...but not store2. assertInputNotInStore(input, h, store2, suite.Assert()) store1.Commit(store1.Root(), store1.Root()) store2.Rebase() // Now, reading c from store2 via the API should work... assertInputInStore(input, h, store2, suite.Assert()) } ================================================ FILE: go/chunks/chunk_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "testing" "github.com/stretchr/testify/assert" ) func TestChunk(t *testing.T) { c := NewChunk([]byte("abc")) h := c.Hash() // See http://www.di-mgt.com.au/sha_testvectors.html assert.Equal(t, "rmnjb8cjc5tblj21ed4qs821649eduie", h.String()) } func TestChunkWriteAfterCloseFails(t *testing.T) { assert := assert.New(t) input := "abc" w := NewChunkWriter() _, err := w.Write([]byte(input)) assert.NoError(err) assert.NoError(w.Close()) assert.Panics(func() { w.Write([]byte(input)) }, "Write() after Close() should barf!") } func TestChunkWriteAfterChunkFails(t *testing.T) { assert := assert.New(t) input := "abc" w := NewChunkWriter() _, err := w.Write([]byte(input)) assert.NoError(err) _ = w.Chunk() assert.Panics(func() { w.Write([]byte(input)) }, "Write() after Chunk() should barf!") } func TestChunkChunkCloses(t *testing.T) { assert := assert.New(t) input := "abc" w := NewChunkWriter() _, err := w.Write([]byte(input)) assert.NoError(err) w.Chunk() assert.Panics(func() { w.Write([]byte(input)) }, "Write() after Close() should barf!") } ================================================ FILE: go/chunks/memory_store.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "sync" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) // MemoryStorage provides a "persistent" storage layer to back multiple // MemoryStoreViews. A MemoryStorage instance holds the ground truth for the // root and set of chunks that are visible to all MemoryStoreViews vended by // NewView(), allowing them to implement the transaction-style semantics that // ChunkStore requires. type MemoryStorage struct { data map[hash.Hash]Chunk rootHash hash.Hash mu sync.RWMutex } // NewView vends a MemoryStoreView backed by this MemoryStorage. It's // initialized with the currently "persisted" root. func (ms *MemoryStorage) NewView() ChunkStore { return &MemoryStoreView{storage: ms, rootHash: ms.rootHash} } // Get retrieves the Chunk with the Hash h, returning EmptyChunk if it's not // present. func (ms *MemoryStorage) Get(h hash.Hash) Chunk { ms.mu.RLock() defer ms.mu.RUnlock() if c, ok := ms.data[h]; ok { return c } return EmptyChunk } // Has returns true if the Chunk with the Hash h is present in ms.data, false // if not. func (ms *MemoryStorage) Has(r hash.Hash) bool { ms.mu.RLock() defer ms.mu.RUnlock() _, ok := ms.data[r] return ok } // Len returns the number of Chunks in ms.data. func (ms *MemoryStorage) Len() int { ms.mu.RLock() defer ms.mu.RUnlock() return len(ms.data) } // Root returns the currently "persisted" root hash of this in-memory store. func (ms *MemoryStorage) Root() hash.Hash { ms.mu.RLock() defer ms.mu.RUnlock() return ms.rootHash } // Update checks the "persisted" root against last and, iff it matches, // updates the root to current, adds all of novel to ms.data, and returns // true. Otherwise returns false. func (ms *MemoryStorage) Update(current, last hash.Hash, novel map[hash.Hash]Chunk) bool { ms.mu.Lock() defer ms.mu.Unlock() if last != ms.rootHash { return false } if ms.data == nil { ms.data = map[hash.Hash]Chunk{} } for h, c := range novel { ms.data[h] = c } ms.rootHash = current return true } // MemoryStoreView is an in-memory implementation of store.ChunkStore. Useful // mainly for tests. // The proper way to get one: // storage := &MemoryStorage{} // ms := storage.NewView() type MemoryStoreView struct { pending map[hash.Hash]Chunk rootHash hash.Hash mu sync.RWMutex storage *MemoryStorage } func (ms *MemoryStoreView) Get(h hash.Hash) Chunk { ms.mu.RLock() defer ms.mu.RUnlock() if c, ok := ms.pending[h]; ok { return c } return ms.storage.Get(h) } func (ms *MemoryStoreView) GetMany(hashes hash.HashSet, foundChunks chan *Chunk) { for h := range hashes { c := ms.Get(h) if !c.IsEmpty() { foundChunks <- &c } } return } func (ms *MemoryStoreView) Has(h hash.Hash) bool { ms.mu.RLock() defer ms.mu.RUnlock() if _, ok := ms.pending[h]; ok { return true } return ms.storage.Has(h) } func (ms *MemoryStoreView) HasMany(hashes hash.HashSet) hash.HashSet { absent := hash.HashSet{} for h := range hashes { if !ms.Has(h) { absent.Insert(h) } } return absent } func (ms *MemoryStoreView) Version() string { return constants.NomsVersion } func (ms *MemoryStoreView) Put(c Chunk) { ms.mu.Lock() defer ms.mu.Unlock() if ms.pending == nil { ms.pending = map[hash.Hash]Chunk{} } ms.pending[c.Hash()] = c } func (ms *MemoryStoreView) Len() int { ms.mu.RLock() defer ms.mu.RUnlock() return len(ms.pending) + ms.storage.Len() } func (ms *MemoryStoreView) Rebase() { ms.mu.Lock() defer ms.mu.Unlock() ms.rootHash = ms.storage.Root() } func (ms *MemoryStoreView) Root() hash.Hash { ms.mu.RLock() defer ms.mu.RUnlock() return ms.rootHash } func (ms *MemoryStoreView) Commit(current, last hash.Hash) bool { ms.mu.Lock() defer ms.mu.Unlock() if last != ms.rootHash { return false } success := ms.storage.Update(current, last, ms.pending) if success { ms.pending = nil } ms.rootHash = ms.storage.Root() return success } func (ms *MemoryStoreView) Stats() interface{} { return nil } func (ms *MemoryStoreView) StatsSummary() string { return "Unsupported" } func (ms *MemoryStoreView) Close() error { return nil } type memoryStoreFactory struct { stores map[string]*MemoryStorage mu *sync.Mutex } func NewMemoryStoreFactory() Factory { return &memoryStoreFactory{map[string]*MemoryStorage{}, &sync.Mutex{}} } func (f *memoryStoreFactory) CreateStoreFromCache(ns string) ChunkStore { return f.CreateStore(ns) } func (f *memoryStoreFactory) CreateStore(ns string) ChunkStore { f.mu.Lock() defer f.mu.Unlock() if f.stores == nil { d.Panic("Cannot use memoryStoreFactory after Shutter().") } if ms, present := f.stores[ns]; present { return ms.NewView() } f.stores[ns] = &MemoryStorage{} return f.stores[ns].NewView() } func (f *memoryStoreFactory) Shutter() { f.stores = nil } ================================================ FILE: go/chunks/memory_store_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "testing" "github.com/stretchr/testify/suite" ) func TestMemoryStoreTestSuite(t *testing.T) { suite.Run(t, &MemoryStoreTestSuite{}) } type MemoryStoreTestSuite struct { ChunkStoreTestSuite } func (suite *MemoryStoreTestSuite) SetupTest() { suite.Factory = NewMemoryStoreFactory() } func (suite *MemoryStoreTestSuite) TearDownTest() { suite.Factory.Shutter() } ================================================ FILE: go/chunks/put_cache.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "sync" "github.com/attic-labs/noms/go/hash" ) func newUnwrittenPutCache() *unwrittenPutCache { return &unwrittenPutCache{map[hash.Hash]Chunk{}, &sync.Mutex{}} } type unwrittenPutCache struct { unwrittenPuts map[hash.Hash]Chunk mu *sync.Mutex } func (p *unwrittenPutCache) Add(c Chunk) bool { p.mu.Lock() defer p.mu.Unlock() if _, ok := p.unwrittenPuts[c.Hash()]; !ok { p.unwrittenPuts[c.Hash()] = c return true } return false } func (p *unwrittenPutCache) Has(c Chunk) (has bool) { p.mu.Lock() defer p.mu.Unlock() _, has = p.unwrittenPuts[c.Hash()] return } func (p *unwrittenPutCache) Get(r hash.Hash) Chunk { p.mu.Lock() defer p.mu.Unlock() if c, ok := p.unwrittenPuts[r]; ok { return c } return EmptyChunk } func (p *unwrittenPutCache) Clear(chunks []Chunk) { p.mu.Lock() defer p.mu.Unlock() for _, c := range chunks { delete(p.unwrittenPuts, c.Hash()) } } ================================================ FILE: go/chunks/remote_requests.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "sync" "github.com/attic-labs/noms/go/hash" ) type ReadRequest interface { Hashes() hash.HashSet Outstanding() OutstandingRequest } func NewGetRequest(r hash.Hash, ch chan<- *Chunk) GetRequest { return GetRequest{hash.HashSet{r: struct{}{}}, ch} } type GetRequest struct { hashes hash.HashSet ch chan<- *Chunk } func NewGetManyRequest(hashes hash.HashSet, wg *sync.WaitGroup, ch chan<- *Chunk) GetManyRequest { return GetManyRequest{hashes, wg, ch} } type GetManyRequest struct { hashes hash.HashSet wg *sync.WaitGroup ch chan<- *Chunk } func NewAbsentRequest(r hash.Hash, ch chan<- bool) AbsentRequest { return AbsentRequest{hash.HashSet{r: struct{}{}}, ch} } type AbsentRequest struct { hashes hash.HashSet ch chan<- bool } func NewAbsentManyRequest(hashes hash.HashSet, wg *sync.WaitGroup, ch chan<- hash.Hash) AbsentManyRequest { return AbsentManyRequest{hashes, wg, ch} } type AbsentManyRequest struct { hashes hash.HashSet wg *sync.WaitGroup ch chan<- hash.Hash } func (g GetRequest) Hashes() hash.HashSet { return g.hashes } func (g GetRequest) Outstanding() OutstandingRequest { return OutstandingGet(g.ch) } func (g GetManyRequest) Hashes() hash.HashSet { return g.hashes } func (g GetManyRequest) Outstanding() OutstandingRequest { return OutstandingGetMany{g.wg, g.ch} } func (h AbsentRequest) Hashes() hash.HashSet { return h.hashes } func (h AbsentRequest) Outstanding() OutstandingRequest { return OutstandingAbsent(h.ch) } func (h AbsentManyRequest) Hashes() hash.HashSet { return h.hashes } func (h AbsentManyRequest) Outstanding() OutstandingRequest { return OutstandingAbsentMany{h.wg, h.ch} } type OutstandingRequest interface { Satisfy(h hash.Hash, c *Chunk) Fail() } type OutstandingGet chan<- *Chunk type OutstandingGetMany struct { wg *sync.WaitGroup ch chan<- *Chunk } type OutstandingAbsent chan<- bool type OutstandingAbsentMany struct { wg *sync.WaitGroup ch chan<- hash.Hash } func (r OutstandingGet) Satisfy(h hash.Hash, c *Chunk) { r <- c } func (r OutstandingGet) Fail() { r <- &EmptyChunk } func (ogm OutstandingGetMany) Satisfy(h hash.Hash, c *Chunk) { ogm.ch <- c ogm.wg.Done() } func (ogm OutstandingGetMany) Fail() { ogm.wg.Done() } func (oh OutstandingAbsent) Satisfy(h hash.Hash, c *Chunk) { oh <- false } func (oh OutstandingAbsent) Fail() { oh <- true } func (ohm OutstandingAbsentMany) Satisfy(h hash.Hash, c *Chunk) { ohm.ch <- h ohm.wg.Done() } func (ohm OutstandingAbsentMany) Fail() { ohm.wg.Done() } // ReadBatch represents a set of queued Get/Has requests, each of which are blocking on a receive channel for a response. type ReadBatch map[hash.Hash][]OutstandingRequest // Close ensures that callers to Get() and Has() are failed correctly if the corresponding chunk wasn't in the response from the server (i.e. it wasn't found). func (rb *ReadBatch) Close() error { for _, reqs := range *rb { for _, req := range reqs { req.Fail() } } return nil } ================================================ FILE: go/chunks/remote_requests_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "sync" "testing" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestGetRequestBatch(t *testing.T) { assert := assert.New(t) h0 := hash.Parse("00000000000000000000000000000000") c1 := NewChunk([]byte("abc")) h1 := c1.Hash() c2 := NewChunk([]byte("123")) h2 := c2.Hash() tally := func(b bool, trueCnt, falseCnt *int) { if b { *trueCnt++ } else { *falseCnt++ } } req0chan := make(chan bool, 1) req1chan := make(chan *Chunk, 1) req2chan := make(chan bool, 1) req3chan := make(chan bool, 1) req4chan := make(chan *Chunk, 1) defer func() { close(req0chan); close(req1chan); close(req2chan); close(req3chan); close(req4chan) }() batch := ReadBatch{ h0: []OutstandingRequest{OutstandingAbsent(req0chan), OutstandingGet(req1chan)}, h1: []OutstandingRequest{OutstandingAbsent(req2chan)}, h2: []OutstandingRequest{OutstandingAbsent(req3chan), OutstandingGet(req4chan)}, } go func() { for requestedHash, reqs := range batch { for _, req := range reqs { if requestedHash == h1 { req.Satisfy(h1, &c1) delete(batch, h1) } else if requestedHash == h2 { req.Satisfy(h2, &c2) delete(batch, h2) } } } batch.Close() }() var r0True, r0False, r2True, r2False, r3True, r3False int b := <-req0chan tally(b, &r0True, &r0False) c := <-req1chan assert.EqualValues(EmptyChunk.Hash(), c.Hash()) b = <-req2chan tally(b, &r2True, &r2False) b = <-req3chan tally(b, &r3True, &r3False) c = <-req4chan assert.EqualValues(c2.Hash(), c.Hash()) assert.Equal(1, r0True) assert.Equal(0, r0False) assert.Equal(0, r2True) assert.Equal(1, r2False) assert.Equal(0, r3True) assert.Equal(1, r3False) } func TestGetManyRequestBatch(t *testing.T) { assert := assert.New(t) h0 := hash.Parse("00000000000000000000000000000000") c1 := NewChunk([]byte("abc")) h1 := c1.Hash() c2 := NewChunk([]byte("123")) h2 := c2.Hash() chunks := make(chan *Chunk) hashes := hash.NewHashSet(h0, h1, h2) wg := &sync.WaitGroup{} wg.Add(len(hashes)) go func() { wg.Wait(); close(chunks) }() req := NewGetManyRequest(hashes, wg, chunks) batch := ReadBatch{ h0: {req.Outstanding()}, h1: {req.Outstanding()}, h2: {req.Outstanding()}, } go func() { for reqHash, reqs := range batch { for _, req := range reqs { if reqHash == h1 { req.Satisfy(h1, &c1) delete(batch, h1) } else if reqHash == h2 { req.Satisfy(h2, &c2) delete(batch, h2) } } } batch.Close() }() for c := range chunks { hashes.Remove(c.Hash()) } assert.Len(hashes, 1) assert.True(hashes.Has(h0)) } func TestAbsentManyRequestBatch(t *testing.T) { assert := assert.New(t) h0 := hash.Parse("00000000000000000000000000000000") c1 := NewChunk([]byte("abc")) h1 := c1.Hash() c2 := NewChunk([]byte("123")) h2 := c2.Hash() found := make(chan hash.Hash) hashes := hash.NewHashSet(h0, h1, h2) wg := &sync.WaitGroup{} wg.Add(len(hashes)) go func() { wg.Wait(); close(found) }() req := NewAbsentManyRequest(hashes, wg, found) batch := ReadBatch{} for h := range req.Hashes() { batch[h] = []OutstandingRequest{req.Outstanding()} } go func() { for reqHash, reqs := range batch { for _, req := range reqs { if reqHash == h1 { req.Satisfy(h1, &EmptyChunk) delete(batch, h1) } else if reqHash == h2 { req.Satisfy(h2, &EmptyChunk) delete(batch, h2) } } } batch.Close() }() for h := range found { hashes.Remove(h) } assert.Len(hashes, 1) assert.True(hashes.Has(h0)) } ================================================ FILE: go/chunks/test_utils.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package chunks import ( "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func assertInputInStore(input string, h hash.Hash, s ChunkStore, assert *assert.Assertions) { chunk := s.Get(h) assert.False(chunk.IsEmpty(), "Shouldn't get empty chunk for %s", h.String()) assert.Equal(input, string(chunk.Data())) } func assertInputNotInStore(input string, h hash.Hash, s ChunkStore, assert *assert.Assertions) { chunk := s.Get(h) assert.True(chunk.IsEmpty(), "Shouldn't get non-empty chunk for %s: %v", h.String(), chunk) } type TestStorage struct { MemoryStorage } func (t *TestStorage) NewView() *TestStoreView { return &TestStoreView{ChunkStore: t.MemoryStorage.NewView()} } type TestStoreView struct { ChunkStore Reads int Hases int Writes int } func (s *TestStoreView) Get(h hash.Hash) Chunk { s.Reads++ return s.ChunkStore.Get(h) } func (s *TestStoreView) GetMany(hashes hash.HashSet, foundChunks chan *Chunk) { s.Reads += len(hashes) s.ChunkStore.GetMany(hashes, foundChunks) } func (s *TestStoreView) Has(h hash.Hash) bool { s.Hases++ return s.ChunkStore.Has(h) } func (s *TestStoreView) HasMany(hashes hash.HashSet) hash.HashSet { s.Hases += len(hashes) return s.ChunkStore.HasMany(hashes) } func (s *TestStoreView) Put(c Chunk) { s.Writes++ s.ChunkStore.Put(c) } type TestStoreFactory struct { stores map[string]*TestStorage } func NewTestStoreFactory() *TestStoreFactory { return &TestStoreFactory{map[string]*TestStorage{}} } func (f *TestStoreFactory) CreateStore(ns string) ChunkStore { if f.stores == nil { d.Panic("Cannot use TestStoreFactory after Shutter().") } if ts, present := f.stores[ns]; present { return ts.NewView() } f.stores[ns] = &TestStorage{} return f.stores[ns].NewView() } func (f *TestStoreFactory) Shutter() { f.stores = nil } ================================================ FILE: go/config/config.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package config import ( "bytes" "errors" "fmt" "io/ioutil" "os" "path/filepath" "github.com/BurntSushi/toml" "github.com/attic-labs/noms/go/spec" ) type Config struct { File string Db map[string]DbConfig } type DbConfig struct { Url string } const ( NomsConfigFile = ".nomsconfig" DefaultDbAlias = "default" ) var NoConfig = errors.New(fmt.Sprintf("no %s found", NomsConfigFile)) // Find the closest directory containing .nomsconfig starting // in cwd and then searching up ancestor tree. // Look first looking in cwd and then up through its ancestors func FindNomsConfig() (*Config, error) { curDir, err := os.Getwd() if err != nil { return nil, err } for { nomsConfig := filepath.Join(curDir, NomsConfigFile) info, err := os.Stat(nomsConfig) if err == nil && !info.IsDir() { // found return ReadConfig(nomsConfig) } else if err != nil && !os.IsNotExist(err) { // can't read return nil, err } nextDir := filepath.Dir(curDir) if nextDir == curDir { // stop at root return nil, NoConfig } curDir = nextDir } } func ReadConfig(name string) (*Config, error) { data, err := ioutil.ReadFile(name) if err != nil { return nil, err } c, err := NewConfig(string(data)) if err != nil { return nil, err } c.File = name return qualifyPaths(name, c) } func NewConfig(data string) (*Config, error) { c := new(Config) if _, err := toml.Decode(data, c); err != nil { return nil, err } return c, nil } func (c *Config) WriteTo(configHome string) (string, error) { file := filepath.Join(configHome, NomsConfigFile) if err := os.MkdirAll(filepath.Dir(file), os.ModePerm); err != nil { return "", err } if err := ioutil.WriteFile(file, []byte(c.writeableString()), os.ModePerm); err != nil { return "", err } return file, nil } // Replace relative directory in path part of spec with an absolute // directory. Assumes the path is relative to the location of the config file func absDbSpec(configHome string, url string) string { dbSpec, err := spec.ForDatabase(url) if err != nil { return url } if dbSpec.Protocol != "nbs" { return url } dbName := dbSpec.DatabaseName if !filepath.IsAbs(dbName) { dbName = filepath.Join(configHome, dbName) } return "nbs:" + dbName } func qualifyPaths(configPath string, c *Config) (*Config, error) { file, err := filepath.Abs(configPath) if err != nil { return nil, err } dir := filepath.Dir(file) qc := *c qc.File = file for k, r := range c.Db { qc.Db[k] = DbConfig{absDbSpec(dir, r.Url)} } return &qc, nil } func (c *Config) String() string { var buffer bytes.Buffer if c.File != "" { buffer.WriteString(fmt.Sprintf("file = %s\n", c.File)) } buffer.WriteString(c.writeableString()) return buffer.String() } func (c *Config) writeableString() string { var buffer bytes.Buffer for k, r := range c.Db { buffer.WriteString(fmt.Sprintf("[db.%s]\n", k)) buffer.WriteString(fmt.Sprintf("\t"+`url = "%s"`+"\n", r.Url)) } return buffer.String() } ================================================ FILE: go/config/config_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package config import ( "io/ioutil" "os" "path/filepath" "strings" "testing" "github.com/attic-labs/noms/go/spec" "github.com/stretchr/testify/assert" ) const ( nbsSpec = "nbs:./local" memSpec = "mem" httpSpec = "http://test.com:8080/foo" nbsAbsSpec = "nbs:/tmp/noms" remoteAlias = "origin" ) var ( ctestRoot = os.TempDir() ldbConfig = &Config{ "", map[string]DbConfig{ DefaultDbAlias: {nbsSpec}, remoteAlias: {httpSpec}, }, } httpConfig = &Config{ "", map[string]DbConfig{ DefaultDbAlias: {httpSpec}, remoteAlias: {nbsSpec}, }, } memConfig = &Config{ "", map[string]DbConfig{ DefaultDbAlias: {memSpec}, remoteAlias: {httpSpec}, }, } ldbAbsConfig = &Config{ "", map[string]DbConfig{ DefaultDbAlias: {nbsAbsSpec}, remoteAlias: {httpSpec}, }, } ) type paths struct { home string config string } func getPaths(assert *assert.Assertions, base string) paths { abs, err := filepath.Abs(ctestRoot) assert.NoError(err) abs, err = filepath.EvalSymlinks(ctestRoot) assert.NoError(err) home := filepath.Join(abs, base) config := filepath.Join(home, NomsConfigFile) return paths{home, config} } func qualifyFilePath(assert *assert.Assertions, path string) string { p, err := filepath.Abs(path) assert.NoError(err) return p } func assertDbSpecsEquiv(assert *assert.Assertions, expected string, actual string) { e, err := spec.ForDatabase(expected) assert.NoError(err) if e.Protocol != "nbs" { assert.Equal(expected, actual) } else { a, err := spec.ForDatabase(actual) assert.NoError(err) assert.Equal(e.Protocol, a.Protocol, actual) if filepath.IsAbs(e.DatabaseName) { assert.Equal(e.DatabaseName, a.DatabaseName, actual) } else { // If the original path is relative, it will return as absolute. // All we do here is ensure that the path suffix is the same. eName := strings.TrimPrefix(e.DatabaseName, ".") assert.True(strings.HasSuffix(a.DatabaseName, eName), "expected: %s; actual: %s", eName, actual) } } } func validateConfig(assert *assert.Assertions, file string, e *Config, a *Config) { assert.Equal(qualifyFilePath(assert, file), qualifyFilePath(assert, a.File)) assert.Equal(len(e.Db), len(a.Db)) for k, er := range e.Db { ar, ok := a.Db[k] assert.True(ok) assertDbSpecsEquiv(assert, er.Url, ar.Url) } } func writeConfig(assert *assert.Assertions, c *Config, home string) string { file, err := c.WriteTo(home) assert.NoError(err, home) return file } func TestConfig(t *testing.T) { assert := assert.New(t) path := getPaths(assert, "home") writeConfig(assert, ldbConfig, path.home) // Test from home assert.NoError(os.Chdir(path.home)) c, err := FindNomsConfig() assert.NoError(err, path.config) validateConfig(assert, path.config, ldbConfig, c) // Test from subdir subdir := filepath.Join(path.home, "subdir") assert.NoError(os.MkdirAll(subdir, os.ModePerm)) assert.NoError(os.Chdir(subdir)) c, err = FindNomsConfig() assert.NoError(err, path.config) validateConfig(assert, path.config, ldbConfig, c) // Test from subdir with intervening .nomsconfig directory nomsDir := filepath.Join(subdir, NomsConfigFile) err = os.MkdirAll(nomsDir, os.ModePerm) assert.NoError(err, nomsDir) assert.NoError(os.Chdir(subdir)) c, err = FindNomsConfig() assert.NoError(err, path.config) validateConfig(assert, path.config, ldbConfig, c) } func TestUnreadableConfig(t *testing.T) { // BUG 3816 if os.Getenv("DOCKER") != "" { t.Skip("Skipping testing in Docker environment") } assert := assert.New(t) path := getPaths(assert, "home.unreadable") writeConfig(assert, ldbConfig, path.home) assert.NoError(os.Chmod(path.config, 0333)) // write-only assert.NoError(os.Chdir(path.home)) _, err := FindNomsConfig() assert.Error(err, path.config) } func TestNoConfig(t *testing.T) { assert := assert.New(t) path := getPaths(assert, "home.none") assert.NoError(os.MkdirAll(path.home, os.ModePerm)) assert.NoError(os.Chdir(path.home)) _, err := FindNomsConfig() assert.Equal(NoConfig, err) } func TestBadConfig(t *testing.T) { assert := assert.New(t) path := getPaths(assert, "home.bad") cfile := writeConfig(assert, ldbConfig, path.home) // overwrite with something invalid assert.NoError(ioutil.WriteFile(cfile, []byte("invalid config"), os.ModePerm)) assert.NoError(os.Chdir(path.home)) _, err := FindNomsConfig() assert.Error(err, path.config) } func TestQualifyingPaths(t *testing.T) { assert := assert.New(t) path := getPaths(assert, "home") assert.NoError(os.Chdir(path.home)) for _, tc := range []*Config{httpConfig, memConfig, ldbAbsConfig} { writeConfig(assert, tc, path.home) ac, err := FindNomsConfig() assert.NoError(err, path.config) validateConfig(assert, path.config, tc, ac) } } func TestCwd(t *testing.T) { assert := assert.New(t) cwd, err := os.Getwd() assert.NoError(err) cwd = filepath.Join(cwd, "test") abs, err := filepath.Abs("test") assert.NoError(err) assert.Equal(cwd, abs) } ================================================ FILE: go/config/resolver.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package config import ( "fmt" "strings" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/verbose" ) type Resolver struct { config *Config dotDatapath string // set to the first datapath that was resolved } // A Resolver enables using db defaults, db aliases and dataset '.' replacement in command // line arguments when a .nomsconfig file is present. To use it, create a config resolver // before command line processing and use it to resolve each dataspec argument in // succession. func NewResolver() *Resolver { c, err := FindNomsConfig() if err != nil { if err != NoConfig { panic(fmt.Errorf("Failed to read .nomsconfig due to: %v", err)) } return &Resolver{} } return &Resolver{c, ""} } // Print replacement if one occurred func (r *Resolver) verbose(orig string, replacement string) string { if orig != replacement { if orig == "" { orig = `""` } verbose.Log("\tresolving %s -> %s\n", orig, replacement) } return replacement } // Resolve string to database name. If config is defined: // - replace the empty string with the default db url // - replace any db alias with it's url func (r *Resolver) ResolveDbSpec(str string) string { if r.config != nil { if str == "" { return r.config.Db[DefaultDbAlias].Url } if val, ok := r.config.Db[str]; ok { return val.Url } } return str } // Resolve string to dataset or path name. // - replace database name as described in ResolveDatabase // - if this is the first call to ResolvePath, remember the // datapath part for subsequent calls. // - if this is not the first call and a "." is used, replace // it with the first datapath. func (r *Resolver) ResolvePathSpec(str string) string { if r.config != nil { split := strings.SplitN(str, spec.Separator, 2) db, rest := "", split[0] if len(split) > 1 { db, rest = split[0], split[1] } if r.dotDatapath == "" { r.dotDatapath = rest } else if rest == "." { rest = r.dotDatapath } return r.ResolveDbSpec(db) + spec.Separator + rest } return str } // Resolve string to database spec. If a config is present, // - resolve a db alias to its db spec // - resolve "" to the default db spec func (r *Resolver) GetDatabase(str string) (datas.Database, error) { sp, err := spec.ForDatabase(r.verbose(str, r.ResolveDbSpec(str))) if err != nil { return nil, err } return sp.GetDatabase(), nil } // Resolve string to a chunkstore. Like ResolveDatabase, but returns the underlying ChunkStore func (r *Resolver) GetChunkStore(str string) (chunks.ChunkStore, error) { sp, err := spec.ForDatabase(r.verbose(str, r.ResolveDbSpec(str))) if err != nil { return nil, err } return sp.NewChunkStore(), nil } // Resolve string to a dataset. If a config is present, // - if no db prefix is present, assume the default db // - if the db prefix is an alias, replace it func (r *Resolver) GetDataset(str string) (datas.Database, datas.Dataset, error) { sp, err := spec.ForDataset(r.verbose(str, r.ResolvePathSpec(str))) if err != nil { return nil, datas.Dataset{}, err } return sp.GetDatabase(), sp.GetDataset(), nil } // Resolve string to a value path. If a config is present, // - if no db spec is present, assume the default db // - if the db spec is an alias, replace it func (r *Resolver) GetPath(str string) (datas.Database, types.Value, error) { sp, err := spec.ForPath(r.verbose(str, r.ResolvePathSpec(str))) if err != nil { return nil, nil, err } return sp.GetDatabase(), sp.GetValue(), nil } ================================================ FILE: go/config/resolver_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package config import ( "fmt" "os" "path/filepath" "testing" "github.com/attic-labs/noms/go/spec" "github.com/stretchr/testify/assert" ) const ( localSpec = nbsSpec remoteSpec = httpSpec testDs = "testds" testObject = "#pckdvpvr9br1fie6c3pjudrlthe7na18" ) type testData struct { input string expected string } var ( rtestRoot = os.TempDir() rtestConfig = &Config{ "", map[string]DbConfig{ DefaultDbAlias: {localSpec}, remoteAlias: {remoteSpec}, }, } dbTestsNoAliases = []testData{ {localSpec, localSpec}, {remoteSpec, remoteSpec}, } dbTestsWithAliases = []testData{ {"", localSpec}, {remoteAlias, remoteSpec}, } pathTestsNoAliases = []testData{ {remoteSpec + "::" + testDs, remoteSpec + "::" + testDs}, {remoteSpec + "::" + testObject, remoteSpec + "::" + testObject}, } pathTestsWithAliases = []testData{ {testDs, localSpec + "::" + testDs}, {remoteAlias + "::" + testDs, remoteSpec + "::" + testDs}, {testObject, localSpec + "::" + testObject}, {remoteAlias + "::" + testObject, remoteSpec + "::" + testObject}, } ) func withConfig(t *testing.T) *Resolver { assert := assert.New(t) dir := filepath.Join(rtestRoot, "with-config") _, err := rtestConfig.WriteTo(dir) assert.NoError(err, dir) assert.NoError(os.Chdir(dir)) r := NewResolver() // resolver must be created after changing directory return r } func withoutConfig(t *testing.T) *Resolver { assert := assert.New(t) dir := filepath.Join(rtestRoot, "without-config") assert.NoError(os.MkdirAll(dir, os.ModePerm), dir) assert.NoError(os.Chdir(dir)) r := NewResolver() // resolver must be created after changing directory return r } func assertPathSpecsEquiv(assert *assert.Assertions, expected string, actual string) { e, err := spec.ForPath(expected) assert.NoError(err) a, err := spec.ForPath(actual) assert.NoError(err) databaseSpec := func(sp spec.Spec) string { return fmt.Sprintf("%s:%s", sp.Protocol, sp.DatabaseName) } assertDbSpecsEquiv(assert, databaseSpec(e), databaseSpec(a)) assert.Equal(e.Path.String(), a.Path.String()) } func TestResolveDatabaseWithConfig(t *testing.T) { r := withConfig(t) assert := assert.New(t) for _, d := range append(dbTestsNoAliases, dbTestsWithAliases...) { db := r.ResolveDbSpec(d.input) assertDbSpecsEquiv(assert, d.expected, db) } } func TestResolvePathWithConfig(t *testing.T) { r := withConfig(t) assert := assert.New(t) for _, d := range append(pathTestsNoAliases, pathTestsWithAliases...) { path := r.ResolvePathSpec(d.input) assertPathSpecsEquiv(assert, d.expected, path) } } func TestResolveDatabaseWithoutConfig(t *testing.T) { r := withoutConfig(t) assert := assert.New(t) for _, d := range dbTestsNoAliases { db := r.ResolveDbSpec(d.input) assert.Equal(d.expected, db, d.input) } } func TestResolvePathWithoutConfig(t *testing.T) { r := withoutConfig(t) assert := assert.New(t) for _, d := range pathTestsNoAliases { path := r.ResolvePathSpec(d.input) assertPathSpecsEquiv(assert, d.expected, path) } } func TestResolveDestPathWithDot(t *testing.T) { r := withConfig(t) assert := assert.New(t) data := []struct { src string dest string expSrc string expDest string }{ {testDs, remoteSpec + "::.", localSpec + "::" + testDs, remoteSpec + "::" + testDs}, {remoteSpec + "::" + testDs, ".", remoteSpec + "::" + testDs, localSpec + "::" + testDs}, } for _, d := range data { src := r.ResolvePathSpec(d.src) dest := r.ResolvePathSpec(d.dest) assertPathSpecsEquiv(assert, d.expSrc, src) assertPathSpecsEquiv(assert, d.expDest, dest) } } ================================================ FILE: go/constants/http.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package constants const ( RootPath = "/root/" GetRefsPath = "/getRefs/" GetBlobPath = "/getBlob/" HasRefsPath = "/hasRefs/" WriteValuePath = "/writeValue/" BasePath = "/" GraphQLPath = "/graphql/" StatsPath = "/stats/" ) ================================================ FILE: go/constants/version.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package constants collects common constants used in Noms, such as the Noms data format version. package constants const NomsVersion = "7.18" var NomsGitSHA = "" ================================================ FILE: go/d/check_error.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package d import ( "fmt" "os" "github.com/attic-labs/kingpin" "github.com/attic-labs/noms/go/util/exit" ) func CheckError(err error) { if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) kingpin.Usage() exit.Fail() } } func CheckErrorNoUsage(err error) { if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) exit.Fail() } } ================================================ FILE: go/d/try.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package d implements several debug, error and assertion functions used throughout Noms. package d import ( "errors" "fmt" "reflect" "github.com/stretchr/testify/assert" ) // d.Chk.() -- used in test cases and as assertions var ( Chk = assert.New(&panicker{}) ) type panicker struct { } func (s panicker) Errorf(format string, args ...interface{}) { panic(fmt.Sprintf(format, args...)) } // Panic(err) creates an error using format and args and wraps it in a // WrappedError which can be handled using Try() and TryCatch() func Panic(format string, args ...interface{}) { if len(args) == 0 { err := errors.New(format) panic(Wrap(err)) } err := fmt.Errorf(format, args...) panic(Wrap(err)) } // PanicIfError(err) && PanicIfTrue(expr) can be used to panic in a way that's // easily handled by Try() and TryCatch() func PanicIfError(err error) { if err != nil { panic(Wrap(err)) } } // If b is true, creates a default error, wraps it and panics. func PanicIfTrue(b bool) { if b { panic(Wrap(errors.New("Expected true"))) } } // If b is false, creates a default error, wraps it and panics. func PanicIfFalse(b bool) { if !b { panic(Wrap(errors.New("Expected false"))) } } // If 'f' panics with a WrappedError then recover that error. // If types is empty, return the WrappedError. // if types is not empty and cause is not one of the listed types, re-panic. // if types is not empty and cause is one of the types, return 'cause' func Try(f func(), types ...interface{}) (err error) { defer recoverWrappedTypes(&err, types) f() return } // If 'f' panics with a WrappedError then recover that error and return it. // If types is empty, return the WrappedError. // if types is not empty and cause is not one of the listed types, re-panic. // if types is not empty and cause is one of the types, return 'cause' func TryCatch(f func(), catch func(err error) error) (err error) { defer recoverWrapped(&err, catch) f() return } type WrappedError interface { Error() string Cause() error } // Wraps an error. The enclosing error has a default Error() that contains the error msg along // with a backtrace. The original error can be retrieved by calling err.Cause(). func Wrap(err error) WrappedError { if err == nil { return nil } if we, ok := err.(WrappedError); ok { return we } st := stackTracer{} assert := assert.New(&st) assert.Fail(err.Error()) return wrappedError{st.stackTrace, err} } // If err is a WrappedError, then Cause() is returned, otherwise returns err. func Unwrap(err error) error { cause := err we, ok := err.(WrappedError) if ok { cause = we.Cause() } return cause } func causeInTypes(err error, types ...interface{}) bool { cause := Unwrap(err) typ := reflect.TypeOf(cause) for _, t := range types { if typ == reflect.TypeOf(t) { return true } } return false } // Utility method, that checks type of error and panics with wrapped error not one of the listed types. func PanicIfNotType(err error, types ...interface{}) error { if err == nil { return nil } if !causeInTypes(err, types...) { we, ok := err.(WrappedError) if !ok { we = Wrap(err) } panic(we) } return Unwrap(err) } type wrappedError struct { msg string cause error } func (we wrappedError) Error() string { return we.msg } func (we wrappedError) Cause() error { return we.cause } func (we wrappedError) Unwrap() error { return we.cause } type stackTracer struct { stackTrace string } func (s *stackTracer) Errorf(format string, args ...interface{}) { s.stackTrace = fmt.Sprintf(format, args...) } func recoverWrappedTypes(errp *error, types []interface{}) { if r := recover(); r != nil { if wrapper, ok := r.(wrappedError); !ok { panic(r) } else if len(types) > 0 && !causeInTypes(wrapper, types...) { panic(r) } else if len(types) > 0 { *errp = wrapper.Cause() } else { *errp = wrapper } } } func recoverWrapped(errp *error, catch func(err error) error) { if r := recover(); r != nil { we, ok := r.(wrappedError) if !ok { panic(r) } if catch != nil { *errp = catch(we) } else { *errp = Unwrap(we) } } } ================================================ FILE: go/d/try_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package d import ( "errors" "fmt" "testing" "github.com/stretchr/testify/assert" ) var ( te = testError{"te"} te2 = testError2{"te2"} ) type testError struct { s string } func (e testError) Error() string { return e.s } type testError2 struct { s string } func (e testError2) Error() string { return e.s } func TestTry2(t *testing.T) { assert := assert.New(t) assert.Panics(func() { Try(func() { panic(te) }) }) assert.Panics(func() { Try(func() { PanicIfError(te) }, te2) }) assert.Error(func() error { return Try(func() { PanicIfError(te) }) }()) assert.Error(func() error { return Try(func() { PanicIfError(te) }, testError{}) }()) assert.Nil(func() error { return Try(func() { PanicIfError(nil) }) }()) } func TestTryCatch(t *testing.T) { assert := assert.New(t) assert.Panics(func() { TryCatch(func() { panic(Wrap(te)) }, func(err error) error { if !causeInTypes(err, testError2{}) { panic(err) } return Unwrap(err) }) }) assert.Panics(func() { TryCatch(func() { panic(te) }, func(err error) error { if !causeInTypes(err, testError{}) { panic(err) } return Unwrap(err) }) }) assert.IsType(wrappedError{}, func() error { return TryCatch(func() { panic(Wrap(te)) }, func(err error) error { return err }) }()) assert.Error(func() error { return TryCatch(func() { panic(Wrap(te)) }, func(err error) error { if !causeInTypes(err, testError2{}, testError{}) { panic(err) } return Unwrap(err) }) }()) } func TestUnwrap(t *testing.T) { assert := assert.New(t) err := errors.New("test") we := wrappedError{"test msg", err} assert.Equal(err, Unwrap(err)) assert.Equal(err, Unwrap(we)) } func TestPanicIfTrue(t *testing.T) { assert := assert.New(t) arg := "arg value" format := "could be a format: %s" formatted := fmt.Sprintf(format, arg) assert.Panics(func() { PanicIfTrue(true) }) assert.Panics(func() { PanicIfTrue(true) }) assert.NotPanics(func() { PanicIfTrue(false) }) err := Try(func() { Panic(format) }) assert.Equal(errors.New(format), Unwrap(err)) err = Try(func() { Panic(format, arg) }) assert.Equal(errors.New(formatted), Unwrap(err)) } func TestPanicIfFalse(t *testing.T) { assert := assert.New(t) arg := "arg value" format := "could be a format: %s" formatted := fmt.Sprintf(format, arg) assert.Panics(func() { PanicIfFalse(false) }) assert.Panics(func() { PanicIfFalse(false) }) assert.NotPanics(func() { PanicIfFalse(true) }) err := Try(func() { Panic(format) }) assert.Equal(errors.New(format), Unwrap(err)) err = Try(func() { Panic(format, arg) }) assert.Equal(errors.New(formatted), Unwrap(err)) } func TestPanicIfNotType(t *testing.T) { assert := assert.New(t) te := testError{"te"} te2 := testError2{"te2"} assert.Panics(func() { PanicIfNotType(te, te2) }) assert.Equal(te, PanicIfNotType(te, te)) assert.Equal(te2, PanicIfNotType(te2, te, te2)) } func TestCauseInTypes(t *testing.T) { assert := assert.New(t) te := testError{"te"} te2 := testError2{"te2"} assert.True(causeInTypes(te, te)) assert.True(causeInTypes(te, te2, te)) assert.False(causeInTypes(te, te2)) assert.False(causeInTypes(te)) } func TestWrap(t *testing.T) { assert := assert.New(t) te := testError{"te"} we := Wrap(te) assert.Equal(te, we.Cause()) assert.Equal(te, errors.Unwrap(we)) assert.IsType(wrappedError{}, we) assert.Equal(we, Wrap(we)) fmt.Printf("st: %s, cause: %s\n", we.Error(), we.Cause()) assert.Nil(Wrap(nil)) } ================================================ FILE: go/datas/commit.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "sort" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/nomdl" "github.com/attic-labs/noms/go/types" ) const ( ParentsField = "parents" ValueField = "value" MetaField = "meta" commitName = "Commit" ) var commitTemplate = types.MakeStructTemplate(commitName, []string{MetaField, ParentsField, ValueField}) var valueCommitType = nomdl.MustParseType(`Struct Commit { meta: Struct {}, parents: Set>>, value: Value, }`) // NewCommit creates a new commit object. // // A commit has the following type: // // ``` // struct Commit { // meta: M, // parents: Set>>, // value: T, // } // ``` // where M is a struct type and T is any type. func NewCommit(value types.Value, parents types.Set, meta types.Struct) types.Struct { return commitTemplate.NewStruct([]types.Value{meta, parents, value}) } // FindCommonAncestor returns the most recent common ancestor of c1 and c2, if // one exists, setting ok to true. If there is no common ancestor, ok is set // to false. func FindCommonAncestor(c1, c2 types.Ref, vr types.ValueReader) (a types.Ref, ok bool) { if !IsRefOfCommitType(types.TypeOf(c1)) { d.Panic("FindCommonAncestor() called on %s", types.TypeOf(c1).Describe()) } if !IsRefOfCommitType(types.TypeOf(c2)) { d.Panic("FindCommonAncestor() called on %s", types.TypeOf(c2).Describe()) } c1Q, c2Q := &types.RefByHeight{c1}, &types.RefByHeight{c2} for !c1Q.Empty() && !c2Q.Empty() { c1Ht, c2Ht := c1Q.MaxHeight(), c2Q.MaxHeight() if c1Ht == c2Ht { c1Parents, c2Parents := c1Q.PopRefsOfHeight(c1Ht), c2Q.PopRefsOfHeight(c2Ht) if common, ok := findCommonRef(c1Parents, c2Parents); ok { return common, true } parentsToQueue(c1Parents, c1Q, vr) parentsToQueue(c2Parents, c2Q, vr) } else if c1Ht > c2Ht { parentsToQueue(c1Q.PopRefsOfHeight(c1Ht), c1Q, vr) } else { parentsToQueue(c2Q.PopRefsOfHeight(c2Ht), c2Q, vr) } } return } func parentsToQueue(refs types.RefSlice, q *types.RefByHeight, vr types.ValueReader) { for _, r := range refs { c := r.TargetValue(vr).(types.Struct) p := c.Get(ParentsField).(types.Set) p.IterAll(func(v types.Value) { q.PushBack(v.(types.Ref)) }) } sort.Sort(q) } func findCommonRef(a, b types.RefSlice) (types.Ref, bool) { toRefSet := func(s types.RefSlice) map[hash.Hash]types.Ref { out := map[hash.Hash]types.Ref{} for _, r := range s { out[r.TargetHash()] = r } return out } aSet, bSet := toRefSet(a), toRefSet(b) for s, r := range aSet { if _, present := bSet[s]; present { return r, true } } return types.Ref{}, false } func makeCommitStructType(metaType, parentsType, valueType *types.Type) *types.Type { return types.MakeStructType("Commit", types.StructField{ Name: MetaField, Type: metaType, }, types.StructField{ Name: ParentsField, Type: parentsType, }, types.StructField{ Name: ValueField, Type: valueType, }, ) } func getRefElementType(t *types.Type) *types.Type { d.PanicIfFalse(t.TargetKind() == types.RefKind) return t.Desc.(types.CompoundDesc).ElemTypes[0] } func IsCommitType(t *types.Type) bool { return types.IsSubtype(valueCommitType, t) } func IsCommit(v types.Value) bool { return types.IsValueSubtypeOf(v, valueCommitType) } func IsRefOfCommitType(t *types.Type) bool { return t.TargetKind() == types.RefKind && IsCommitType(getRefElementType(t)) } ================================================ FILE: go/datas/commit_options.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "github.com/attic-labs/noms/go/merge" "github.com/attic-labs/noms/go/types" ) // CommitOptions is used to pass options into Commit. type CommitOptions struct { // Parents, if provided is the parent commits of the commit we are // creating. Parents types.Set // Meta is a Struct that describes arbitrary metadata about this Commit, // e.g. a timestamp or descriptive text. Meta types.Struct // Policy will be called to attempt to merge this Commit with the current // Head, if this is not a fast-forward. If Policy is nil, no merging will // be attempted. Note that because Commit() retries in some cases, Policy // might also be called multiple times with different values. Policy merge.Policy } ================================================ FILE: go/datas/commit_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "fmt" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/nomdl" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestNewCommit(t *testing.T) { assert := assert.New(t) assertTypeEquals := func(e, a *types.Type) { assert.True(a.Equals(e), "Actual: %s\nExpected %s", a.Describe(), e.Describe()) } storage := &chunks.TestStorage{} db := NewDatabase(storage.NewView()) defer db.Close() commit := NewCommit(types.Number(1), types.NewSet(db), types.EmptyStruct) at := types.TypeOf(commit) et := makeCommitStructType( types.EmptyStructType, types.MakeSetType(types.MakeUnionType()), types.NumberType, ) assertTypeEquals(et, at) // Committing another Number commit2 := NewCommit(types.Number(2), types.NewSet(db, types.NewRef(commit)), types.EmptyStruct) at2 := types.TypeOf(commit2) et2 := nomdl.MustParseType(`Struct Commit { meta: Struct {}, parents: Set>>, value: Number, }`) assertTypeEquals(et2, at2) // Now commit a String commit3 := NewCommit(types.String("Hi"), types.NewSet(db, types.NewRef(commit2)), types.EmptyStruct) at3 := types.TypeOf(commit3) et3 := nomdl.MustParseType(`Struct Commit { meta: Struct {}, parents: Set>>, value: Number | String, }`) assertTypeEquals(et3, at3) // Now commit a String with MetaInfo meta := types.NewStruct("Meta", types.StructData{"date": types.String("some date"), "number": types.Number(9)}) metaType := nomdl.MustParseType(`Struct Meta { date: String, number: Number, }`) assertTypeEquals(metaType, types.TypeOf(meta)) commit4 := NewCommit(types.String("Hi"), types.NewSet(db, types.NewRef(commit2)), meta) at4 := types.TypeOf(commit4) et4 := nomdl.MustParseType(`Struct Commit { meta: Struct {} | Struct Meta { date: String, number: Number, }, parents: Set>>, value: Number | String, }`) assertTypeEquals(et4, at4) // Merge-commit with different parent types commit5 := NewCommit(types.String("Hi"), types.NewSet(db, types.NewRef(commit2), types.NewRef(commit3)), types.EmptyStruct) at5 := types.TypeOf(commit5) et5 := nomdl.MustParseType(`Struct Commit { meta: Struct {}, parents: Set>>, value: Number | String, }`) assertTypeEquals(et5, at5) } func TestCommitWithoutMetaField(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} db := NewDatabase(storage.NewView()) defer db.Close() metaCommit := types.NewStruct("Commit", types.StructData{ "value": types.Number(9), "parents": types.NewSet(db), "meta": types.EmptyStruct, }) assert.True(IsCommit(metaCommit)) assert.True(IsCommitType(types.TypeOf(metaCommit))) noMetaCommit := types.NewStruct("Commit", types.StructData{ "value": types.Number(9), "parents": types.NewSet(db), }) assert.False(IsCommit(noMetaCommit)) assert.False(IsCommitType(types.TypeOf(noMetaCommit))) } // Convert list of Struct's to Set func toRefSet(vrw types.ValueReadWriter, commits ...types.Struct) types.Set { set := types.NewSet(vrw).Edit() for _, p := range commits { set.Insert(types.NewRef(p)) } return set.Set() } // Convert Set> to a string of Struct.Get("value")'s func toValuesString(refSet types.Set, vr types.ValueReader) string { values := []string{} refSet.IterAll(func(v types.Value) { values = append(values, fmt.Sprintf("%v", v.(types.Ref).TargetValue(vr).(types.Struct).Get("value"))) }) return strings.Join(values, ",") } func TestFindCommonAncestor(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} db := NewDatabase(storage.NewView()) defer db.Close() // Add a commit and return it addCommit := func(datasetID string, val string, parents ...types.Struct) types.Struct { ds := db.GetDataset(datasetID) var err error ds, err = db.Commit(ds, types.String(val), CommitOptions{Parents: toRefSet(db, parents...)}) assert.NoError(err) return ds.Head() } // Assert that c is the common ancestor of a and b assertCommonAncestor := func(expected, a, b types.Struct) { if found, ok := FindCommonAncestor(types.NewRef(a), types.NewRef(b), db); assert.True(ok) { ancestor := found.TargetValue(db).(types.Struct) assert.True( expected.Equals(ancestor), "%s should be common ancestor of %s, %s. Got %s", expected.Get(ValueField), a.Get(ValueField), b.Get(ValueField), ancestor.Get(ValueField), ) } } // Build commit DAG // // ds-a: a1<-a2<-a3<-a4<-a5<-a6 // ^ ^ ^ | // | \ \----\ /-/ // | \ \V // ds-b: \ b3<-b4<-b5 // \ // \ // ds-c: c2<-c3 // / // / // V // ds-d: d1<-d2 // a, b, c, d := "ds-a", "ds-b", "ds-c", "ds-d" a1 := addCommit(a, "a1") d1 := addCommit(d, "d1") a2 := addCommit(a, "a2", a1) c2 := addCommit(c, "c2", a1) d2 := addCommit(d, "d2", d1) a3 := addCommit(a, "a3", a2) b3 := addCommit(b, "b3", a2) c3 := addCommit(c, "c3", c2, d2) a4 := addCommit(a, "a4", a3) b4 := addCommit(b, "b4", b3) a5 := addCommit(a, "a5", a4) b5 := addCommit(b, "b5", b4, a3) a6 := addCommit(a, "a6", a5, b5) assertCommonAncestor(a1, a1, a1) // All self assertCommonAncestor(a1, a1, a2) // One side self assertCommonAncestor(a2, a3, b3) // Common parent assertCommonAncestor(a2, a4, b4) // Common grandparent assertCommonAncestor(a1, a6, c3) // Traversing multiple parents on both sides // No common ancestor if found, ok := FindCommonAncestor(types.NewRef(d2), types.NewRef(a6), db); !assert.False(ok) { assert.Fail( "Unexpected common ancestor!", "Should be no common ancestor of %s, %s. Got %s", d2.Get(ValueField), a6.Get(ValueField), found.TargetValue(db).(types.Struct).Get(ValueField), ) } } func TestNewCommitRegressionTest(t *testing.T) { storage := &chunks.TestStorage{} db := NewDatabase(storage.NewView()) defer db.Close() c1 := NewCommit(types.String("one"), types.NewSet(db), types.EmptyStruct) cx := NewCommit(types.Bool(true), types.NewSet(db), types.EmptyStruct) value := types.String("two") parents := types.NewSet(db, types.NewRef(c1)) meta := types.NewStruct("", types.StructData{ "basis": cx, }) // Used to fail NewCommit(value, parents, meta) } ================================================ FILE: go/datas/database.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package datas defines and implements the database layer used in Noms. package datas import ( "io" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" ) // Database provides versioned storage for noms values. While Values can be // directly read and written from a Database, it is generally more appropriate // to read data by inspecting the Head of a Dataset and write new data by // updating the Head of a Dataset via Commit() or similar. Particularly, new // data is not guaranteed to be persistent until after a Commit (Delete, // SetHead, or FastForward) operation completes. // The Database API is stateful, meaning that calls to GetDataset() or // Datasets() occurring after a call to Commit() (et al) will represent the // result of the Commit(). type Database interface { // To implement types.ValueWriter, Database implementations provide // WriteValue(). WriteValue() writes v to this Database, though v is not // guaranteed to be be persistent until after a subsequent Commit(). The // return value is the Ref of v. // Written values won't be persisted until a commit-alike types.ValueReadWriter // Close must have no side-effects io.Closer // Datasets returns the root of the database which is a // Map> where string is a datasetID. Datasets() types.Map // GetDataset returns a Dataset struct containing the current mapping of // datasetID in the above Datasets Map. GetDataset(datasetID string) Dataset // Rebase brings this Database's view of the world inline with upstream. Rebase() // Commit updates the Commit that ds.ID() in this database points at. All // Values that have been written to this Database are guaranteed to be // persistent after Commit() returns. // The new Commit struct is constructed using v, opts.Parents, and // opts.Meta. If opts.Parents is the zero value (types.Set{}) then // the current head is used. If opts.Meta is the zero value // (types.Struct{}) then a fully initialized empty Struct is passed to // NewCommit. // The returned Dataset is always the newest snapshot, regardless of // success or failure, and Datasets() is updated to match backing storage // upon return as well. If the update cannot be performed, e.g., because // of a conflict, Commit returns an 'ErrMergeNeeded' error. Commit(ds Dataset, v types.Value, opts CommitOptions) (Dataset, error) // CommitValue updates the Commit that ds.ID() in this database points at. // All Values that have been written to this Database are guaranteed to be // persistent after Commit(). // The new Commit struct is constructed using `v`, and the current Head of // `ds` as the lone Parent. // The returned Dataset is always the newest snapshot, regardless of // success or failure, and Datasets() is updated to match backing storage // upon return as well. If the update cannot be performed, e.g., because // of a conflict, Commit returns an 'ErrMergeNeeded' error. CommitValue(ds Dataset, v types.Value) (Dataset, error) // Delete removes the Dataset named ds.ID() from the map at the root of // the Database. The Dataset data is not necessarily cleaned up at this // time, but may be garbage collected in the future. // The returned Dataset is always the newest snapshot, regardless of // success or failure, and Datasets() is updated to match backing storage // upon return as well. If the update cannot be performed, e.g., because // of a conflict, Delete returns an 'ErrMergeNeeded' error. Delete(ds Dataset) (Dataset, error) // SetHead ignores any lineage constraints (e.g. the current Head being in // commit’s Parent set) and force-sets a mapping from datasetID: commit in // this database. // All Values that have been written to this Database are guaranteed to be // persistent after SetHead(). If the update cannot be performed, e.g., // because another process moved the current Head out from under you, // error will be non-nil. // The newest snapshot of the Dataset is always returned, so the caller an // easily retry using the latest. // Regardless, Datasets() is updated to match backing storage upon return. SetHead(ds Dataset, newHeadRef types.Ref) (Dataset, error) // FastForward takes a types.Ref to a Commit object and makes it the new // Head of ds iff it is a descendant of the current Head. Intended to be // used e.g. after a call to Pull(). If the update cannot be performed, // e.g., because another process moved the current Head out from under // you, err will be non-nil. // The newest snapshot of the Dataset is always returned, so the caller // can easily retry using the latest. // Regardless, Datasets() is updated to match backing storage upon return. FastForward(ds Dataset, newHeadRef types.Ref) (Dataset, error) // Stats may return some kind of struct that reports statistics about the // ChunkStore that backs this Database instance. The type is // implementation-dependent, and impls may return nil Stats() interface{} // StatsSummary may return a string containing summarized statistics for // the ChunkStore that backs this Database. It must return "Unsupported" // if this operation is not supported. StatsSummary() string Flush() // chunkStore returns the ChunkStore used to read and write // groups of values to the database efficiently. This interface is a low- // level detail of the database that should infrequently be needed by // clients. chunkStore() chunks.ChunkStore } func NewDatabase(cs chunks.ChunkStore) Database { return newDatabase(cs) } ================================================ FILE: go/datas/database_common.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "errors" "fmt" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/merge" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/random" ) type database struct { *types.ValueStore rt rootTracker } var ( ErrOptimisticLockFailed = errors.New("Optimistic lock failed on database Root update") ErrMergeNeeded = errors.New("Dataset head is not ancestor of commit") ) // rootTracker is a narrowing of the ChunkStore interface, to keep Database disciplined about working directly with Chunks type rootTracker interface { Rebase() Root() hash.Hash Commit(current, last hash.Hash) bool } func newDatabase(cs chunks.ChunkStore) *database { vs := types.NewValueStore(cs) if _, ok := cs.(*httpChunkStore); ok { vs.SetEnforceCompleteness(false) } return &database{ ValueStore: vs, // ValueStore is responsible for closing |cs| rt: vs, } } func (db *database) chunkStore() chunks.ChunkStore { return db.ChunkStore() } func (db *database) Stats() interface{} { return db.ChunkStore().Stats() } func (db *database) StatsSummary() string { return db.ChunkStore().StatsSummary() } func (db *database) Flush() { // TODO: This is a pretty ghetto hack - do better. // See: https://github.com/attic-labs/noms/issues/3530 ds := db.GetDataset(fmt.Sprintf("-/flush/%s", random.Id())) r := db.WriteValue(types.Bool(true)) ds, err := db.CommitValue(ds, r) d.PanicIfError(err) _, err = db.Delete(ds) d.PanicIfError(err) } func (db *database) Datasets() types.Map { rootHash := db.rt.Root() if rootHash.IsEmpty() { return types.NewMap(db) } return db.ReadValue(rootHash).(types.Map) } func (db *database) GetDataset(datasetID string) Dataset { if !DatasetFullRe.MatchString(datasetID) { d.Panic("Invalid dataset ID: %s", datasetID) } var head types.Value if r, ok := db.Datasets().MaybeGet(types.String(datasetID)); ok { head = r.(types.Ref).TargetValue(db) } return newDataset(db, datasetID, head) } func (db *database) Rebase() { db.rt.Rebase() } func (db *database) Close() error { return db.ValueStore.Close() } func (db *database) SetHead(ds Dataset, newHeadRef types.Ref) (Dataset, error) { return db.doHeadUpdate(ds, func(ds Dataset) error { return db.doSetHead(ds, newHeadRef) }) } func (db *database) doSetHead(ds Dataset, newHeadRef types.Ref) error { if currentHeadRef, ok := ds.MaybeHeadRef(); ok && newHeadRef.Equals(currentHeadRef) { return nil } commit := db.validateRefAsCommit(newHeadRef) currentRootHash, currentDatasets := db.rt.Root(), db.Datasets() commitRef := db.WriteValue(commit) // will be orphaned if the tryCommitChunks() below fails currentDatasets = currentDatasets.Edit().Set(types.String(ds.ID()), types.ToRefOfValue(commitRef)).Map() return db.tryCommitChunks(currentDatasets, currentRootHash) } func (db *database) FastForward(ds Dataset, newHeadRef types.Ref) (Dataset, error) { return db.doHeadUpdate(ds, func(ds Dataset) error { return db.doFastForward(ds, newHeadRef) }) } func (db *database) doFastForward(ds Dataset, newHeadRef types.Ref) error { currentHeadRef, ok := ds.MaybeHeadRef() if ok && newHeadRef.Equals(currentHeadRef) { return nil } if ok && newHeadRef.Height() <= currentHeadRef.Height() { return ErrMergeNeeded } commit := db.validateRefAsCommit(newHeadRef) return db.doCommit(ds.ID(), commit, nil) } func (db *database) Commit(ds Dataset, v types.Value, opts CommitOptions) (Dataset, error) { return db.doHeadUpdate( ds, func(ds Dataset) error { return db.doCommit(ds.ID(), buildNewCommit(ds, v, opts), opts.Policy) }, ) } func (db *database) CommitValue(ds Dataset, v types.Value) (Dataset, error) { return db.Commit(ds, v, CommitOptions{}) } // doCommit manages concurrent access the single logical piece of mutable state: the current Root. doCommit is optimistic in that it is attempting to update head making the assumption that currentRootHash is the hash of the current head. The call to Commit below will return an 'ErrOptimisticLockFailed' error if that assumption fails (e.g. because of a race with another writer) and the entire algorithm must be tried again. This method will also fail and return an 'ErrMergeNeeded' error if the |commit| is not a descendent of the current dataset head func (db *database) doCommit(datasetID string, commit types.Struct, mergePolicy merge.Policy) error { if !IsCommit(commit) { d.Panic("Can't commit a non-Commit struct to dataset %s", datasetID) } // This could loop forever, given enough simultaneous committers. BUG 2565 var err error for err = ErrOptimisticLockFailed; err == ErrOptimisticLockFailed; { currentRootHash, currentDatasets := db.rt.Root(), db.Datasets() commitRef := db.WriteValue(commit) // will be orphaned if the tryCommitChunks() below fails // If there's nothing in the DB yet, skip all this logic. if !currentRootHash.IsEmpty() { r, hasHead := currentDatasets.MaybeGet(types.String(datasetID)) // First commit in dataset is always fast-forward, so go through all this iff there's already a Head for datasetID. if hasHead { head := r.(types.Ref).TargetValue(db) currentHeadRef := types.NewRef(head) ancestorRef, found := FindCommonAncestor(commitRef, currentHeadRef, db) if !found { return ErrMergeNeeded } // This covers all cases where currentHeadRef is not an ancestor of commit, including the following edge cases: // - commit is a duplicate of currentHead. // - we hit an ErrOptimisticLockFailed and looped back around because some other process changed the Head out from under us. if currentHeadRef.TargetHash() != ancestorRef.TargetHash() || currentHeadRef.TargetHash() == commitRef.TargetHash() { if mergePolicy == nil { return ErrMergeNeeded } ancestor, currentHead := db.validateRefAsCommit(ancestorRef), db.validateRefAsCommit(currentHeadRef) merged, err := mergePolicy(commit.Get(ValueField), currentHead.Get(ValueField), ancestor.Get(ValueField), db, nil) if err != nil { return err } commitRef = db.WriteValue(NewCommit(merged, types.NewSet(db, commitRef, currentHeadRef), types.EmptyStruct)) } } } currentDatasets = currentDatasets.Edit().Set(types.String(datasetID), types.ToRefOfValue(commitRef)).Map() err = db.tryCommitChunks(currentDatasets, currentRootHash) } return err } func (db *database) Delete(ds Dataset) (Dataset, error) { return db.doHeadUpdate(ds, func(ds Dataset) error { return db.doDelete(ds.ID()) }) } // doDelete manages concurrent access the single logical piece of mutable state: the current Root. doDelete is optimistic in that it is attempting to update head making the assumption that currentRootHash is the hash of the current head. The call to Commit below will return an 'ErrOptimisticLockFailed' error if that assumption fails (e.g. because of a race with another writer) and the entire algorithm must be tried again. func (db *database) doDelete(datasetIDstr string) error { datasetID := types.String(datasetIDstr) currentRootHash, currentDatasets := db.rt.Root(), db.Datasets() var initialHead types.Ref if r, hasHead := currentDatasets.MaybeGet(datasetID); !hasHead { return nil } else { initialHead = r.(types.Ref) } var err error for { currentDatasets = currentDatasets.Edit().Remove(datasetID).Map() err = db.tryCommitChunks(currentDatasets, currentRootHash) if err != ErrOptimisticLockFailed { break } // If the optimistic lock failed because someone changed the Head of datasetID, then return ErrMergeNeeded. If it failed because someone changed a different Dataset, we should try again. currentRootHash, currentDatasets = db.rt.Root(), db.Datasets() if r, hasHead := currentDatasets.MaybeGet(datasetID); !hasHead || (hasHead && !initialHead.Equals(r)) { err = ErrMergeNeeded break } } return err } func (db *database) tryCommitChunks(currentDatasets types.Map, currentRootHash hash.Hash) (err error) { newRootHash := db.WriteValue(currentDatasets).TargetHash() if !db.rt.Commit(newRootHash, currentRootHash) { err = ErrOptimisticLockFailed } return } func (db *database) validateRefAsCommit(r types.Ref) types.Struct { v := db.ReadValue(r.TargetHash()) if v == nil { panic(r.TargetHash().String() + " not found") } if !IsCommit(v) { panic("Not a commit: " + types.EncodedValueMaxLines(v, 10) + " ...\n") } return v.(types.Struct) } func buildNewCommit(ds Dataset, v types.Value, opts CommitOptions) types.Struct { parents := opts.Parents if (parents == types.Set{}) { parents = types.NewSet(ds.Database()) if headRef, ok := ds.MaybeHeadRef(); ok { parents = parents.Edit().Insert(headRef).Set() } } meta := opts.Meta if meta.IsZeroValue() { meta = types.EmptyStruct } return NewCommit(v, parents, meta) } func (db *database) doHeadUpdate(ds Dataset, updateFunc func(ds Dataset) error) (Dataset, error) { err := updateFunc(ds) return db.GetDataset(ds.ID()), err } ================================================ FILE: go/datas/database_server.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "fmt" "log" "net" "net/http" "strconv" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/julienschmidt/httprouter" ) type connectionState struct { c net.Conn cs http.ConnState } type RemoteDatabaseServer struct { cs chunks.ChunkStore address string port int l *net.Listener csChan chan *connectionState closing bool // Called just before the server is started. Ready func() } func NewRemoteDatabaseServer(cs chunks.ChunkStore, address string, port int) *RemoteDatabaseServer { dataVersion := cs.Version() if constants.NomsVersion != dataVersion { d.Panic("SDK version %s is incompatible with data of version %s", constants.NomsVersion, dataVersion) } return &RemoteDatabaseServer{ cs, address, port, nil, make(chan *connectionState, 16), false, func() {}, } } // Port is the actual port used. This may be different than the port passed in to NewRemoteDatabaseServer. func (s *RemoteDatabaseServer) Port() int { return s.port } func Router(cs chunks.ChunkStore, prefix string) *httprouter.Router { router := httprouter.New() router.POST(prefix+constants.GetRefsPath, corsHandle(makeHandle(HandleGetRefs, cs))) router.GET(prefix+constants.GetBlobPath, corsHandle(makeHandle(HandleGetBlob, cs))) router.OPTIONS(prefix+constants.GetRefsPath, corsHandle(noopHandle)) router.POST(prefix+constants.HasRefsPath, corsHandle(makeHandle(HandleHasRefs, cs))) router.OPTIONS(prefix+constants.HasRefsPath, corsHandle(noopHandle)) router.GET(prefix+constants.RootPath, corsHandle(makeHandle(HandleRootGet, cs))) router.POST(prefix+constants.RootPath, corsHandle(makeHandle(HandleRootPost, cs))) router.OPTIONS(prefix+constants.RootPath, corsHandle(noopHandle)) router.POST(prefix+constants.WriteValuePath, corsHandle(makeHandle(HandleWriteValue, cs))) router.OPTIONS(prefix+constants.WriteValuePath, corsHandle(noopHandle)) router.GET(prefix+constants.BasePath, corsHandle(makeHandle(HandleBaseGet, cs))) router.GET(prefix+constants.GraphQLPath, corsHandle(makeHandle(HandleGraphQL, cs))) router.POST(prefix+constants.GraphQLPath, corsHandle(makeHandle(HandleGraphQL, cs))) router.OPTIONS(prefix+constants.GraphQLPath, corsHandle(noopHandle)) router.GET(prefix+constants.StatsPath, corsHandle(makeHandle(HandleStats, cs))) router.OPTIONS(prefix+constants.StatsPath, corsHandle(noopHandle)) return router } // Run blocks while the RemoteDatabaseServer is listening. Running on a separate go routine is supported. func (s *RemoteDatabaseServer) Run() { l, err := net.Listen("tcp", fmt.Sprintf("%s:%d", s.address, s.port)) d.Chk.NoError(err) s.l = &l _, port, err := net.SplitHostPort(l.Addr().String()) d.Chk.NoError(err) s.port, err = strconv.Atoi(port) d.Chk.NoError(err) log.Printf("Listening on %s:%d...\n", s.address, s.port) router := Router(s.cs, "") srv := &http.Server{ Handler: http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { router.ServeHTTP(w, req) }), ConnState: s.connState, } go func() { m := map[net.Conn]http.ConnState{} for connState := range s.csChan { switch connState.cs { case http.StateNew, http.StateActive, http.StateIdle: m[connState.c] = connState.cs default: delete(m, connState.c) } } for c := range m { c.Close() } }() go s.Ready() srv.Serve(l) } func makeHandle(hndlr Handler, cs chunks.ChunkStore) httprouter.Handle { return func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { hndlr(w, req, ps, cs) } } func noopHandle(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { } func corsHandle(f httprouter.Handle) httprouter.Handle { // TODO: Implement full pre-flighting? // See: http://www.html5rocks.com/static/images/cors_server_flowchart.png return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { // Can't use * when clients are using cookies. w.Header().Add("Access-Control-Allow-Origin", r.Header.Get("Origin")) w.Header().Add("Access-Control-Allow-Methods", "GET, POST") w.Header().Add("Access-Control-Allow-Headers", "*") w.Header().Add("Access-Control-Expose-Headers", NomsVersionHeader) w.Header().Add(NomsVersionHeader, constants.NomsVersion) f(w, r, ps) } } func (s *RemoteDatabaseServer) connState(c net.Conn, cs http.ConnState) { if s.closing { d.PanicIfFalse(cs == http.StateClosed) return } s.csChan <- &connectionState{c, cs} } // Will cause the RemoteDatabaseServer to stop listening and an existing call to Run() to continue. func (s *RemoteDatabaseServer) Stop() { s.closing = true (*s.l).Close() (s.cs).Close() close(s.csChan) } ================================================ FILE: go/datas/database_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/merge" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) func TestLocalDatabase(t *testing.T) { suite.Run(t, &LocalDatabaseSuite{}) } func TestRemoteDatabase(t *testing.T) { suite.Run(t, &RemoteDatabaseSuite{}) } func TestValidateRef(t *testing.T) { st := &chunks.TestStorage{} db := NewDatabase(st.NewView()).(*database) defer db.Close() b := types.Bool(true) r := db.WriteValue(b) assert.Panics(t, func() { db.validateRefAsCommit(r) }) assert.Panics(t, func() { db.validateRefAsCommit(types.NewRef(b)) }) } type DatabaseSuite struct { suite.Suite storage *chunks.TestStorage db Database makeDb func(chunks.ChunkStore) Database } type LocalDatabaseSuite struct { DatabaseSuite } func (suite *LocalDatabaseSuite) SetupTest() { suite.storage = &chunks.TestStorage{} suite.makeDb = NewDatabase suite.db = suite.makeDb(suite.storage.NewView()) } type RemoteDatabaseSuite struct { DatabaseSuite } func (suite *RemoteDatabaseSuite) SetupTest() { suite.storage = &chunks.TestStorage{} suite.makeDb = func(cs chunks.ChunkStore) Database { return NewDatabase(newHTTPChunkStoreForTest(cs)) } suite.db = suite.makeDb(suite.storage.NewView()) } func (suite *DatabaseSuite) TearDownTest() { suite.db.Close() } func (suite *RemoteDatabaseSuite) TestWriteRefToNonexistentValue() { ds := suite.db.GetDataset("foo") r := types.NewRef(types.Bool(true)) suite.Panics(func() { suite.db.CommitValue(ds, r) }) } func (suite *DatabaseSuite) TestTolerateUngettableRefs() { suite.Nil(suite.db.ReadValue(hash.Hash{})) } func (suite *DatabaseSuite) TestCompletenessCheck() { datasetID := "ds1" ds1 := suite.db.GetDataset(datasetID) se := types.NewSet(suite.db).Edit() for i := 0; i < 100; i++ { se.Insert(suite.db.WriteValue(types.Number(100))) } s := se.Set() ds1, err := suite.db.CommitValue(ds1, s) suite.NoError(err) s = ds1.HeadValue().(types.Set) s = s.Edit().Insert(types.NewRef(types.Number(1000))).Set() // danging ref suite.Panics(func() { ds1, err = suite.db.CommitValue(ds1, s) }) } func (suite *DatabaseSuite) TestRebase() { datasetID := "ds1" ds1 := suite.db.GetDataset(datasetID) var err error // Setup: // ds1: |a| <- |b| ds1, err = suite.db.CommitValue(ds1, types.String("a")) b := types.String("b") ds1, err = suite.db.CommitValue(ds1, b) suite.NoError(err) suite.True(ds1.HeadValue().Equals(b)) interloper := suite.makeDb(suite.storage.NewView()) defer interloper.Close() // Concurrent change, to move root out from under my feet: // ds1: |a| <- |b| <- |e| e := types.String("e") iDS, concErr := interloper.CommitValue(interloper.GetDataset(datasetID), e) suite.NoError(concErr) suite.True(iDS.HeadValue().Equals(e)) // suite.ds shouldn't see the above change yet suite.True(suite.db.GetDataset(datasetID).HeadValue().Equals(b)) suite.db.Rebase() suite.True(suite.db.GetDataset(datasetID).HeadValue().Equals(e)) cs := suite.storage.NewView() noChangeDB := suite.makeDb(cs) noChangeDB.Datasets() cs.Reads = 0 // New baseline noChangeDB.Rebase() suite.Zero(cs.Reads) } func (suite *DatabaseSuite) TestCommitProperlyTracksRoot() { id1, id2 := "testdataset", "othertestdataset" db1 := suite.makeDb(suite.storage.NewView()) defer db1.Close() ds1 := db1.GetDataset(id1) ds1HeadVal := types.String("Commit value for " + id1) ds1, err := db1.CommitValue(ds1, ds1HeadVal) suite.NoError(err) db2 := suite.makeDb(suite.storage.NewView()) defer db2.Close() ds2 := db2.GetDataset(id2) ds2HeadVal := types.String("Commit value for " + id2) ds2, err = db2.CommitValue(ds2, ds2HeadVal) suite.NoError(err) suite.EqualValues(ds1HeadVal, ds1.HeadValue()) suite.EqualValues(ds2HeadVal, ds2.HeadValue()) suite.False(ds2.HeadValue().Equals(ds1HeadVal)) suite.False(ds1.HeadValue().Equals(ds2HeadVal)) } func (suite *DatabaseSuite) TestDatabaseCommit() { datasetID := "ds1" datasets := suite.db.Datasets() suite.Zero(datasets.Len()) // |a| ds := suite.db.GetDataset(datasetID) a := types.String("a") ds2, err := suite.db.CommitValue(ds, a) suite.NoError(err) // ds2 matches the Datasets Map in suite.db suite.True(ds2.HeadRef().Equals(suite.db.GetDataset(datasetID).HeadRef())) // ds2 has |a| at its head h, ok := ds2.MaybeHeadValue() suite.True(ok) suite.True(h.Equals(a)) suite.Equal(uint64(1), ds2.HeadRef().Height()) ds = ds2 aCommitRef := ds.HeadRef() // to be used to test disallowing of non-fastforward commits below // |a| <- |b| b := types.String("b") ds, err = suite.db.CommitValue(ds, b) suite.NoError(err) suite.True(ds.HeadValue().Equals(b)) suite.Equal(uint64(2), ds.HeadRef().Height()) // |a| <- |b| // \----|c| // Should be disallowed. c := types.String("c") ds, err = suite.db.Commit(ds, c, newOpts(suite.db, aCommitRef)) suite.Error(err) suite.True(ds.HeadValue().Equals(b)) // |a| <- |b| <- |d| d := types.String("d") ds, err = suite.db.CommitValue(ds, d) suite.NoError(err) suite.True(ds.HeadValue().Equals(d)) suite.Equal(uint64(3), ds.HeadRef().Height()) // Attempt to recommit |b| with |a| as parent. // Should be disallowed. ds, err = suite.db.Commit(ds, b, newOpts(suite.db, aCommitRef)) suite.Error(err) suite.True(ds.HeadValue().Equals(d)) // Add a commit to a different datasetId _, err = suite.db.CommitValue(suite.db.GetDataset("otherDS"), a) suite.NoError(err) // Get a fresh database, and verify that both datasets are present newDB := suite.makeDb(suite.storage.NewView()) defer newDB.Close() datasets2 := newDB.Datasets() suite.Equal(uint64(2), datasets2.Len()) } func (suite *DatabaseSuite) TestDatasetsMapType() { dsID1, dsID2 := "ds1", "ds2" datasets := suite.db.Datasets() ds, err := suite.db.CommitValue(suite.db.GetDataset(dsID1), types.String("a")) suite.NoError(err) suite.NotPanics(func() { assertMapOfStringToRefOfCommit(suite.db.Datasets(), datasets, suite.db) }) datasets = suite.db.Datasets() _, err = suite.db.CommitValue(suite.db.GetDataset(dsID2), types.Number(42)) suite.NoError(err) suite.NotPanics(func() { assertMapOfStringToRefOfCommit(suite.db.Datasets(), datasets, suite.db) }) datasets = suite.db.Datasets() _, err = suite.db.Delete(ds) suite.NoError(err) suite.NotPanics(func() { assertMapOfStringToRefOfCommit(suite.db.Datasets(), datasets, suite.db) }) } func newOpts(vrw types.ValueReadWriter, parents ...types.Value) CommitOptions { return CommitOptions{Parents: types.NewSet(vrw, parents...)} } func (suite *DatabaseSuite) TestDatabaseDuplicateCommit() { datasetID := "ds1" ds := suite.db.GetDataset(datasetID) datasets := suite.db.Datasets() suite.Zero(datasets.Len()) v := types.String("Hello") _, err := suite.db.CommitValue(ds, v) suite.NoError(err) _, err = suite.db.CommitValue(ds, v) suite.IsType(ErrMergeNeeded, err) } func (suite *DatabaseSuite) TestDatabaseCommitMerge() { datasetID1, datasetID2 := "ds1", "ds2" ds1, ds2 := suite.db.GetDataset(datasetID1), suite.db.GetDataset(datasetID2) var err error v := types.NewMap(suite.db, types.String("Hello"), types.Number(42)) ds1, err = suite.db.CommitValue(ds1, v) ds1First := ds1 suite.NoError(err) ds1, err = suite.db.CommitValue(ds1, v.Edit().Set(types.String("Friends"), types.Bool(true)).Map()) suite.NoError(err) ds2, err = suite.db.CommitValue(ds2, types.String("Goodbye")) suite.NoError(err) // No common ancestor _, err = suite.db.Commit(ds1, types.Number(47), newOpts(suite.db, ds2.HeadRef())) suite.IsType(ErrMergeNeeded, err, "%s", err) // Unmergeable _, err = suite.db.Commit(ds1, types.Number(47), newOptsWithMerge(suite.db, merge.None, ds1First.HeadRef())) suite.IsType(&merge.ErrMergeConflict{}, err, "%s", err) // Merge policies newV := v.Edit().Set(types.String("Friends"), types.Bool(false)).Map() _, err = suite.db.Commit(ds1, newV, newOptsWithMerge(suite.db, merge.None, ds1First.HeadRef())) suite.IsType(&merge.ErrMergeConflict{}, err, "%s", err) theirs, err := suite.db.Commit(ds1, newV, newOptsWithMerge(suite.db, merge.Theirs, ds1First.HeadRef())) suite.NoError(err) suite.True(types.Bool(true).Equals(theirs.HeadValue().(types.Map).Get(types.String("Friends")))) newV = v.Edit().Set(types.String("Friends"), types.Number(47)).Map() ours, err := suite.db.Commit(ds1First, newV, newOptsWithMerge(suite.db, merge.Ours, ds1First.HeadRef())) suite.NoError(err) suite.True(types.Number(47).Equals(ours.HeadValue().(types.Map).Get(types.String("Friends")))) } func newOptsWithMerge(vrw types.ValueReadWriter, policy merge.ResolveFunc, parents ...types.Value) CommitOptions { return CommitOptions{Parents: types.NewSet(vrw, parents...), Policy: merge.NewThreeWay(policy)} } func (suite *DatabaseSuite) TestDatabaseDelete() { datasetID1, datasetID2 := "ds1", "ds2" ds1, ds2 := suite.db.GetDataset(datasetID1), suite.db.GetDataset(datasetID2) datasets := suite.db.Datasets() suite.Zero(datasets.Len()) // ds1: |a| var err error a := types.String("a") ds1, err = suite.db.CommitValue(ds1, a) suite.NoError(err) suite.True(ds1.HeadValue().Equals(a)) // ds1: |a|, ds2: |b| b := types.String("b") ds2, err = suite.db.CommitValue(ds2, b) suite.NoError(err) suite.True(ds2.HeadValue().Equals(b)) ds1, err = suite.db.Delete(ds1) suite.NoError(err) suite.True(suite.db.GetDataset(datasetID2).HeadValue().Equals(b)) _, present := suite.db.GetDataset(datasetID1).MaybeHead() suite.False(present, "Dataset %s should not be present", datasetID1) // Get a fresh database, and verify that only ds2 is present newDB := suite.makeDb(suite.storage.NewView()) defer newDB.Close() datasets = newDB.Datasets() suite.Equal(uint64(1), datasets.Len()) _, present = newDB.GetDataset(datasetID2).MaybeHeadRef() suite.True(present, "Dataset %s should be present", datasetID2) } type waitDuringUpdateRootChunkStore struct { chunks.ChunkStore preUpdateRootHook func() } func (w *waitDuringUpdateRootChunkStore) Commit(current, last hash.Hash) bool { if w.preUpdateRootHook != nil { w.preUpdateRootHook() } return w.ChunkStore.Commit(current, last) } func (suite *DatabaseSuite) TestCommitWithConcurrentChunkStoreUse() { datasetID := "ds1" ds1 := suite.db.GetDataset(datasetID) var err error // Setup: // ds1: |a| <- |b| ds1, err = suite.db.CommitValue(ds1, types.String("a")) b := types.String("b") ds1, err = suite.db.CommitValue(ds1, b) suite.NoError(err) suite.True(ds1.HeadValue().Equals(b)) // Craft DB that will allow me to move the backing ChunkStore while suite.db isn't looking interloper := suite.makeDb(suite.storage.NewView()) defer interloper.Close() // Change ds2 behind suite.db's back. This shouldn't block changes to ds1 via suite.db below. // ds1: |a| <- |b| // ds2: |stuff| stf := types.String("stuff") ds2, concErr := interloper.CommitValue(interloper.GetDataset("ds2"), stf) suite.NoError(concErr) suite.True(ds2.HeadValue().Equals(stf)) // Change ds1 via suite.db, which should proceed without a problem c := types.String("c") ds1, err = suite.db.CommitValue(ds1, c) suite.NoError(err) suite.True(ds1.HeadValue().Equals(c)) // Change ds1 behind suite.db's back. Will block changes to ds1 below. // ds1: |a| <- |b| <- |c| <- |e| e := types.String("e") interloper.Rebase() iDS, concErr := interloper.CommitValue(interloper.GetDataset("ds1"), e) suite.NoError(concErr) suite.True(iDS.HeadValue().Equals(e)) // Attempted Concurrent change, which should fail due to the above nope := types.String("nope") ds1, err = suite.db.CommitValue(ds1, nope) suite.Error(err) v := ds1.HeadValue() suite.True(v.Equals(e), "%s", v.(types.String)) } func (suite *DatabaseSuite) TestDeleteWithConcurrentChunkStoreUse() { datasetID := "ds1" ds1 := suite.db.GetDataset(datasetID) var err error // Setup: // ds1: |a| <- |b| ds1, err = suite.db.CommitValue(ds1, types.String("a")) b := types.String("b") ds1, err = suite.db.CommitValue(ds1, b) suite.NoError(err) suite.True(ds1.HeadValue().Equals(b)) // Craft DB that will allow me to move the backing ChunkStore while suite.db isn't looking interloper := suite.makeDb(suite.storage.NewView()) defer interloper.Close() // Concurrent change, to move root out from under my feet: // ds1: |a| <- |b| <- |e| e := types.String("e") iDS, concErr := interloper.CommitValue(interloper.GetDataset(datasetID), e) suite.NoError(concErr) suite.True(iDS.HeadValue().Equals(e)) // Attempt to delete ds1 via suite.db, which should fail due to the above ds1, err = suite.db.Delete(ds1) suite.Error(err) suite.True(ds1.HeadValue().Equals(e)) // Concurrent change, but to some other dataset. This shouldn't stop changes to ds1. // ds1: |a| <- |b| <- |e| // ds2: |stuff| stf := types.String("stuff") iDS, concErr = interloper.CommitValue(suite.db.GetDataset("other"), stf) suite.NoError(concErr) suite.True(iDS.HeadValue().Equals(stf)) // Attempted concurrent delete, which should proceed without a problem ds1, err = suite.db.Delete(ds1) suite.NoError(err) _, present := ds1.MaybeHeadRef() suite.False(present, "Dataset %s should not be present", datasetID) } func (suite *DatabaseSuite) TestSetHead() { var err error datasetID := "ds1" // |a| <- |b| ds := suite.db.GetDataset(datasetID) a := types.String("a") ds, err = suite.db.CommitValue(ds, a) suite.NoError(err) aCommitRef := ds.HeadRef() // To use in non-FF SetHead() below. b := types.String("b") ds, err = suite.db.CommitValue(ds, b) suite.NoError(err) suite.True(ds.HeadValue().Equals(b)) bCommitRef := ds.HeadRef() // To use in FF SetHead() below. ds, err = suite.db.SetHead(ds, aCommitRef) suite.NoError(err) suite.True(ds.HeadValue().Equals(a)) ds, err = suite.db.SetHead(ds, bCommitRef) suite.NoError(err) suite.True(ds.HeadValue().Equals(b)) } func (suite *DatabaseSuite) TestFastForward() { var err error datasetID := "ds1" // |a| <- |b| <- |c| ds := suite.db.GetDataset(datasetID) a := types.String("a") ds, err = suite.db.CommitValue(ds, a) suite.NoError(err) aCommitRef := ds.HeadRef() // To use in non-FF cases below. b := types.String("b") ds, err = suite.db.CommitValue(ds, b) suite.NoError(err) suite.True(ds.HeadValue().Equals(b)) c := types.String("c") ds, err = suite.db.CommitValue(ds, c) suite.NoError(err) suite.True(ds.HeadValue().Equals(c)) cCommitRef := ds.HeadRef() // To use in FastForward() below. // FastForward should disallow this, as |a| is not a descendant of |c| ds, err = suite.db.FastForward(ds, aCommitRef) suite.Error(err) suite.True(ds.HeadValue().Equals(c)) // Move Head back to something earlier in the lineage, so we can test FastForward ds, err = suite.db.SetHead(ds, aCommitRef) suite.NoError(err) suite.True(ds.HeadValue().Equals(a)) // This should succeed, because while |a| is not a direct parent of |c|, it is an ancestor. ds, err = suite.db.FastForward(ds, cCommitRef) suite.NoError(err) suite.True(ds.HeadValue().Equals(c)) } func (suite *DatabaseSuite) TestDatabaseHeightOfRefs() { r1 := suite.db.WriteValue(types.String("hello")) suite.Equal(uint64(1), r1.Height()) r2 := suite.db.WriteValue(r1) suite.Equal(uint64(2), r2.Height()) suite.Equal(uint64(3), suite.db.WriteValue(r2).Height()) } func (suite *DatabaseSuite) TestDatabaseHeightOfCollections() { setOfStringType := types.MakeSetType(types.StringType) setOfRefOfStringType := types.MakeSetType(types.MakeRefType(types.StringType)) // Set v1 := types.String("hello") v2 := types.String("world") s1 := types.NewSet(suite.db, v1, v2) suite.Equal(uint64(1), suite.db.WriteValue(s1).Height()) // Set> s2 := types.NewSet(suite.db, suite.db.WriteValue(v1), suite.db.WriteValue(v2)) suite.Equal(uint64(2), suite.db.WriteValue(s2).Height()) // List> v3 := types.String("foo") v4 := types.String("bar") s3 := types.NewSet(suite.db, v3, v4) l1 := types.NewList(suite.db, s1, s3) suite.Equal(uint64(1), suite.db.WriteValue(l1).Height()) // List> l2 := types.NewList(suite.db, suite.db.WriteValue(s1), suite.db.WriteValue(s3)) suite.Equal(uint64(2), suite.db.WriteValue(l2).Height()) // List>> s4 := types.NewSet(suite.db, suite.db.WriteValue(v3), suite.db.WriteValue(v4)) l3 := types.NewList(suite.db, suite.db.WriteValue(s4)) suite.Equal(uint64(3), suite.db.WriteValue(l3).Height()) // List | RefValue>> l4 := types.NewList(suite.db, s1, suite.db.WriteValue(s3)) suite.Equal(uint64(2), suite.db.WriteValue(l4).Height()) l5 := types.NewList(suite.db, suite.db.WriteValue(s1), s3) suite.Equal(uint64(2), suite.db.WriteValue(l5).Height()) // Familiar with the "New Jersey Turnpike" drink? Here's the noms version of that... everything := []types.Value{v1, v2, s1, s2, v3, v4, s3, l1, l2, s4, l3, l4, l5} andMore := make([]types.Value, 0, len(everything)*3+2) for _, v := range everything { andMore = append(andMore, v, types.TypeOf(v), suite.db.WriteValue(v)) } andMore = append(andMore, setOfStringType, setOfRefOfStringType) suite.db.WriteValue(types.NewList(suite.db, andMore...)) } func (suite *DatabaseSuite) TestMetaOption() { ds := suite.db.GetDataset("ds1") m := types.NewStruct("M", types.StructData{ "author": types.String("arv"), }) ds, err := suite.db.Commit(ds, types.String("a"), CommitOptions{Meta: m}) suite.NoError(err) c := ds.Head() suite.Equal(types.String("arv"), c.Get("meta").(types.Struct).Get("author")) } ================================================ FILE: go/datas/dataset.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "regexp" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // DatasetRe is a regexp that matches a legal Dataset name anywhere within the // target string. var DatasetRe = regexp.MustCompile(`[a-zA-Z0-9\-_/]+`) // DatasetFullRe is a regexp that matches a only a target string that is // entirely legal Dataset name. var DatasetFullRe = regexp.MustCompile("^" + DatasetRe.String() + "$") // Dataset is a named Commit within a Database. type Dataset struct { db Database id string head types.Value } func newDataset(db Database, id string, head types.Value) Dataset { d.PanicIfFalse(head == nil || IsCommit(head)) return Dataset{db, id, head} } // Database returns the Database object in which this Dataset is stored. // WARNING: This method is under consideration for deprecation. func (ds Dataset) Database() Database { return ds.db } // ID returns the name of this Dataset. func (ds Dataset) ID() string { return ds.id } // MaybeHead returns the current Head Commit of this Dataset, which contains // the current root of the Dataset's value tree, if available. If not, it // returns a new Commit and 'false'. func (ds Dataset) MaybeHead() (types.Struct, bool) { if ds.head == nil { return types.Struct{}, false } return ds.head.(types.Struct), true } // Head returns the current head Commit, which contains the current root of // the Dataset's value tree. func (ds Dataset) Head() types.Struct { c, ok := ds.MaybeHead() if !ok { d.Panic("Dataset \"%s\" does not exist", ds.id) } return c } // MaybeHeadRef returns the Ref of the current Head Commit of this Dataset, // which contains the current root of the Dataset's value tree, if available. // If not, it returns an empty Ref and 'false'. func (ds Dataset) MaybeHeadRef() (types.Ref, bool) { if ds.head == nil { return types.Ref{}, false } return types.NewRef(ds.head), true } // HasHead() returns 'true' if this dataset has a Head Commit, false otherwise. func (ds Dataset) HasHead() bool { return ds.head != nil } // HeadRef returns the Ref of the current head Commit, which contains the // current root of the Dataset's value tree. func (ds Dataset) HeadRef() types.Ref { r, ok := ds.MaybeHeadRef() if !ok { d.Panic("Dataset \"%s\" does not exist", ds.id) } return r } // MaybeHeadValue returns the Value field of the current head Commit, if // available. If not it returns nil and 'false'. func (ds Dataset) MaybeHeadValue() (types.Value, bool) { if c, ok := ds.MaybeHead(); ok { return c.Get(ValueField), true } return nil, false } // HeadValue returns the Value field of the current head Commit. func (ds Dataset) HeadValue() types.Value { c := ds.Head() return c.Get(ValueField) } func IsValidDatasetName(name string) bool { return DatasetFullRe.MatchString(name) } ================================================ FILE: go/datas/dataset_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestExplicitBranchUsingDatasets(t *testing.T) { assert := assert.New(t) id1 := "testdataset" id2 := "othertestdataset" stg := &chunks.MemoryStorage{} store := NewDatabase(stg.NewView()) defer store.Close() ds1 := store.GetDataset(id1) // ds1: |a| a := types.String("a") ds1, err := store.CommitValue(ds1, a) assert.NoError(err) assert.True(ds1.Head().Get(ValueField).Equals(a)) // ds1: |a| // \ds2 ds2 := store.GetDataset(id2) ds2, err = store.Commit(ds2, ds1.HeadValue(), CommitOptions{Parents: types.NewSet(store, ds1.HeadRef())}) assert.NoError(err) assert.True(ds2.Head().Get(ValueField).Equals(a)) // ds1: |a| <- |b| b := types.String("b") ds1, err = store.CommitValue(ds1, b) assert.NoError(err) assert.True(ds1.Head().Get(ValueField).Equals(b)) // ds1: |a| <- |b| // \ds2 <- |c| c := types.String("c") ds2, err = store.CommitValue(ds2, c) assert.NoError(err) assert.True(ds2.Head().Get(ValueField).Equals(c)) // ds1: |a| <- |b| <--|d| // \ds2 <- |c| <--/ mergeParents := types.NewSet(store, types.NewRef(ds1.Head()), types.NewRef(ds2.Head())) d := types.String("d") ds2, err = store.Commit(ds2, d, CommitOptions{Parents: mergeParents}) assert.NoError(err) assert.True(ds2.Head().Get(ValueField).Equals(d)) ds1, err = store.Commit(ds1, d, CommitOptions{Parents: mergeParents}) assert.NoError(err) assert.True(ds1.Head().Get(ValueField).Equals(d)) } func TestTwoClientsWithEmptyDataset(t *testing.T) { assert := assert.New(t) id1 := "testdataset" stg := &chunks.MemoryStorage{} store := NewDatabase(stg.NewView()) defer store.Close() dsx := store.GetDataset(id1) dsy := store.GetDataset(id1) // dsx: || -> |a| a := types.String("a") dsx, err := store.CommitValue(dsx, a) assert.NoError(err) assert.True(dsx.Head().Get(ValueField).Equals(a)) // dsy: || -> |b| _, ok := dsy.MaybeHead() assert.False(ok) b := types.String("b") dsy, err = store.CommitValue(dsy, b) assert.Error(err) // Commit failed, but dsy now has latest head, so we should be able to just try again. // dsy: |a| -> |b| dsy, err = store.CommitValue(dsy, b) assert.NoError(err) assert.True(dsy.Head().Get(ValueField).Equals(b)) } func TestTwoClientsWithNonEmptyDataset(t *testing.T) { assert := assert.New(t) id1 := "testdataset" stg := &chunks.MemoryStorage{} store := NewDatabase(stg.NewView()) defer store.Close() a := types.String("a") { // ds1: || -> |a| ds1 := store.GetDataset(id1) ds1, err := store.CommitValue(ds1, a) assert.NoError(err) assert.True(ds1.Head().Get(ValueField).Equals(a)) } dsx := store.GetDataset(id1) dsy := store.GetDataset(id1) // dsx: |a| -> |b| assert.True(dsx.Head().Get(ValueField).Equals(a)) b := types.String("b") dsx, err := store.CommitValue(dsx, b) assert.NoError(err) assert.True(dsx.Head().Get(ValueField).Equals(b)) // dsy: |a| -> |c| assert.True(dsy.Head().Get(ValueField).Equals(a)) c := types.String("c") dsy, err = store.CommitValue(dsy, c) assert.Error(err) assert.True(dsy.Head().Get(ValueField).Equals(b)) // Commit failed, but dsy now has latest head, so we should be able to just try again. // dsy: |b| -> |c| dsy, err = store.CommitValue(dsy, c) assert.NoError(err) assert.True(dsy.Head().Get(ValueField).Equals(c)) } func TestIdValidation(t *testing.T) { assert := assert.New(t) stg := &chunks.MemoryStorage{} store := NewDatabase(stg.NewView()) invalidDatasetNames := []string{" ", "", "a ", " a", "$", "#", ":", "\n", "💩"} for _, id := range invalidDatasetNames { assert.Panics(func() { store.GetDataset(id) }) } } func TestHeadValueFunctions(t *testing.T) { assert := assert.New(t) id1 := "testdataset" id2 := "otherdataset" stg := &chunks.MemoryStorage{} store := NewDatabase(stg.NewView()) defer store.Close() ds1 := store.GetDataset(id1) assert.False(ds1.HasHead()) // ds1: |a| a := types.String("a") ds1, err := store.CommitValue(ds1, a) assert.NoError(err) assert.True(ds1.HasHead()) hv := ds1.Head().Get(ValueField) assert.Equal(a, hv) assert.Equal(a, ds1.HeadValue()) hv, ok := ds1.MaybeHeadValue() assert.True(ok) assert.Equal(a, hv) ds2 := store.GetDataset(id2) assert.Panics(func() { ds2.HeadValue() }) _, ok = ds2.MaybeHeadValue() assert.False(ok) } func TestIsValidDatasetName(t *testing.T) { assert := assert.New(t) cases := []struct { name string valid bool }{ {"foo", true}, {"foo/bar", true}, {"f1", true}, {"1f", true}, {"", false}, {"f!!", false}, } for _, c := range cases { assert.Equal(c.valid, IsValidDatasetName(c.name), "Expected %s validity to be %t", c.name, c.valid) } } ================================================ FILE: go/datas/http_chunk_store.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "bufio" "bytes" "compress/gzip" "fmt" "io" "io/ioutil" "net/http" "net/url" "strings" "sync" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/nbs" "github.com/attic-labs/noms/go/util/verbose" "github.com/golang/snappy" "github.com/julienschmidt/httprouter" ) const ( httpChunkStoreConcurrency = 6 readThreshold = 1 << 12 // 4K ) var customHTTPTransport = http.Transport{ // Since we limit ourselves to a maximum of httpChunkStoreConcurrency concurrent http requests, we think it's OK to up MaxIdleConnsPerHost so that one connection stays open for each concurrent request MaxIdleConnsPerHost: httpChunkStoreConcurrency, // This sets, essentially, an idle-timeout. The timer starts counting AFTER the client has finished sending the entire request to the server. As soon as the client receives the server's response headers, the timeout is canceled. ResponseHeaderTimeout: time.Duration(4) * time.Minute, } type httpChunkStore struct { host *url.URL httpClient httpDoer auth string getQueue chan chunks.ReadRequest hasQueue chan chunks.ReadRequest finishedChan chan struct{} rateLimit chan struct{} workerWg *sync.WaitGroup cacheMu *sync.RWMutex unwrittenPuts *nbs.NomsBlockCache rootMu *sync.RWMutex root hash.Hash version string } func NewHTTPChunkStore(baseURL, auth string) chunks.ChunkStore { // Custom http.Client to give control of idle connections and timeouts return newHTTPChunkStoreWithClient(baseURL, auth, &http.Client{Transport: &customHTTPTransport}) } func newHTTPChunkStoreWithClient(baseURL, auth string, client httpDoer) *httpChunkStore { u, err := url.Parse(baseURL) d.PanicIfError(err) if u.Scheme != "http" && u.Scheme != "https" { d.Panic("Unrecognized scheme: %s", u.Scheme) } hcs := &httpChunkStore{ host: u, httpClient: client, auth: auth, getQueue: make(chan chunks.ReadRequest), hasQueue: make(chan chunks.ReadRequest), finishedChan: make(chan struct{}), rateLimit: make(chan struct{}, httpChunkStoreConcurrency), workerWg: &sync.WaitGroup{}, cacheMu: &sync.RWMutex{}, unwrittenPuts: nbs.NewCache(), rootMu: &sync.RWMutex{}, } hcs.root, hcs.version = hcs.getRoot(false) hcs.batchGetRequests() hcs.batchHasRequests() return hcs } type httpDoer interface { Do(req *http.Request) (resp *http.Response, err error) } func (hcs *httpChunkStore) Version() string { return hcs.version } func (hcs *httpChunkStore) Close() (e error) { hcs.rootMu.Lock() defer hcs.rootMu.Unlock() close(hcs.finishedChan) hcs.workerWg.Wait() close(hcs.getQueue) close(hcs.hasQueue) close(hcs.rateLimit) hcs.cacheMu.Lock() defer hcs.cacheMu.Unlock() hcs.unwrittenPuts.Destroy() return } func (hcs *httpChunkStore) Stats() interface{} { return nil } func checkStatus(status int, res *http.Response, body io.Reader) { if status == res.StatusCode { return } buf, _ := ioutil.ReadAll(body) d.Panic("Unexpected response: %s: %s", http.StatusText(res.StatusCode), strings.TrimSpace(string(buf))) } func (hcs *httpChunkStore) StatsSummary() string { // GET http:///stats. Response will be string containing a summary of database stats. u := *hcs.host u.Path = httprouter.CleanPath(hcs.host.Path + constants.StatsPath) res, err := hcs.httpClient.Do(newRequest("GET", hcs.auth, u.String(), nil, nil)) d.PanicIfError(err) defer closeResponse(res.Body) checkStatus(http.StatusOK, res, res.Body) data, err := ioutil.ReadAll(res.Body) d.PanicIfError(err) return string(data) } func (hcs *httpChunkStore) Get(h hash.Hash) chunks.Chunk { checkCache := func(h hash.Hash) chunks.Chunk { hcs.cacheMu.RLock() defer hcs.cacheMu.RUnlock() return hcs.unwrittenPuts.Get(h) } if pending := checkCache(h); !pending.IsEmpty() { return pending } ch := make(chan *chunks.Chunk) defer close(ch) select { case <-hcs.finishedChan: d.Panic("Tried to Get %s from closed ChunkStore", h) case hcs.getQueue <- chunks.NewGetRequest(h, ch): } return *(<-ch) } func (hcs *httpChunkStore) GetMany(hashes hash.HashSet, foundChunks chan *chunks.Chunk) { cachedChunks := make(chan *chunks.Chunk) go func() { hcs.cacheMu.RLock() defer hcs.cacheMu.RUnlock() defer close(cachedChunks) hcs.unwrittenPuts.GetMany(hashes, cachedChunks) }() remaining := hash.HashSet{} for h := range hashes { remaining.Insert(h) } for c := range cachedChunks { remaining.Remove(c.Hash()) foundChunks <- c } if len(remaining) == 0 { return } wg := &sync.WaitGroup{} wg.Add(len(remaining)) select { case <-hcs.finishedChan: d.Panic("Tried to GetMany from closed ChunkStore") case hcs.getQueue <- chunks.NewGetManyRequest(remaining, wg, foundChunks): } wg.Wait() } func (hcs *httpChunkStore) batchGetRequests() { hcs.batchReadRequests(hcs.getQueue, hcs.getRefs) } func (hcs *httpChunkStore) Has(h hash.Hash) bool { checkCache := func(h hash.Hash) bool { hcs.cacheMu.RLock() defer hcs.cacheMu.RUnlock() return hcs.unwrittenPuts.Has(h) } if checkCache(h) { return true } ch := make(chan bool) defer close(ch) select { case <-hcs.finishedChan: d.Panic("Tried to Has %s on closed ChunkStore", h) case hcs.hasQueue <- chunks.NewAbsentRequest(h, ch): } return <-ch } func (hcs *httpChunkStore) HasMany(hashes hash.HashSet) (absent hash.HashSet) { var remaining hash.HashSet func() { hcs.cacheMu.RLock() defer hcs.cacheMu.RUnlock() remaining = hcs.unwrittenPuts.HasMany(hashes) }() if len(remaining) == 0 { return remaining } notFoundChunks := make(chan hash.Hash) wg := &sync.WaitGroup{} wg.Add(len(remaining)) select { case <-hcs.finishedChan: d.Panic("Tried to HasMany on closed ChunkStore") case hcs.hasQueue <- chunks.NewAbsentManyRequest(remaining, wg, notFoundChunks): } go func() { defer close(notFoundChunks); wg.Wait() }() absent = hash.HashSet{} for notFound := range notFoundChunks { absent.Insert(notFound) } return absent } func (hcs *httpChunkStore) batchHasRequests() { hcs.batchReadRequests(hcs.hasQueue, hcs.hasRefs) } type batchGetter func(batch chunks.ReadBatch) func (hcs *httpChunkStore) batchReadRequests(queue <-chan chunks.ReadRequest, getter batchGetter) { hcs.workerWg.Add(1) go func() { defer hcs.workerWg.Done() for done := false; !done; { select { case req := <-queue: hcs.sendReadRequests(req, queue, getter) case <-hcs.finishedChan: done = true } } }() } func (hcs *httpChunkStore) sendReadRequests(req chunks.ReadRequest, queue <-chan chunks.ReadRequest, getter batchGetter) { batch := chunks.ReadBatch{} addReq := func(req chunks.ReadRequest) { for h := range req.Hashes() { batch[h] = append(batch[h], req.Outstanding()) } } addReq(req) for drained := false; !drained && len(batch) < readThreshold; { select { case req := <-queue: addReq(req) default: drained = true } } hcs.rateLimit <- struct{}{} go func() { defer batch.Close() defer func() { <-hcs.rateLimit }() getter(batch) }() } func (hcs *httpChunkStore) getRefs(batch chunks.ReadBatch) { // POST http:///getRefs/. Post body: ref=hash0&ref=hash1& Response will be chunk data if present, 404 if absent. u := *hcs.host u.Path = httprouter.CleanPath(hcs.host.Path + constants.GetRefsPath) // Indicate to the server that we're OK reading chunks from any store that knows about our root q := "root=" + hcs.root.String() if u.RawQuery != "" { q = u.RawQuery + "&" + q } u.RawQuery = q req := newRequest("POST", hcs.auth, u.String(), buildHashesRequest(batch), http.Header{ "Accept-Encoding": {"x-snappy-framed"}, "Content-Type": {"application/octet-stream"}, }) req.ContentLength = int64(serializedLength(batch)) res, err := hcs.httpClient.Do(req) d.Chk.NoError(err) expectVersion(hcs.version, res) reader := resBodyReader(res) defer closeResponse(reader) checkStatus(http.StatusOK, res, reader) chunkChan := make(chan *chunks.Chunk, 16) go func() { defer close(chunkChan); chunks.Deserialize(reader, chunkChan) }() for c := range chunkChan { h := c.Hash() for _, or := range batch[h] { go or.Satisfy(h, c) } delete(batch, c.Hash()) } } func (hcs *httpChunkStore) hasRefs(batch chunks.ReadBatch) { // POST http:///hasRefs/. Post body: ref=sha1---&ref=sha1---& Response will be text of lines containing "|ref| |bool|". u := *hcs.host u.Path = httprouter.CleanPath(hcs.host.Path + constants.HasRefsPath) req := newRequest("POST", hcs.auth, u.String(), buildHashesRequest(batch), http.Header{ "Accept-Encoding": {"x-snappy-framed"}, "Content-Type": {"application/octet-stream"}, }) req.ContentLength = int64(serializedLength(batch)) res, err := hcs.httpClient.Do(req) d.Chk.NoError(err) expectVersion(hcs.version, res) reader := resBodyReader(res) defer closeResponse(reader) checkStatus(http.StatusOK, res, reader) scanner := bufio.NewScanner(reader) scanner.Split(bufio.ScanWords) for scanner.Scan() { h := hash.Parse(scanner.Text()) for _, outstanding := range batch[h] { outstanding.Satisfy(h, &chunks.EmptyChunk) } delete(batch, h) } } func resBodyReader(res *http.Response) (reader io.ReadCloser) { reader = res.Body if strings.Contains(res.Header.Get("Content-Encoding"), "gzip") { gr, err := gzip.NewReader(reader) d.Chk.NoError(err) reader = gr } else if strings.Contains(res.Header.Get("Content-Encoding"), "x-snappy-framed") { sr := snappy.NewReader(reader) reader = ioutil.NopCloser(sr) } return } func (hcs *httpChunkStore) Put(c chunks.Chunk) { hcs.cacheMu.RLock() defer hcs.cacheMu.RUnlock() select { case <-hcs.finishedChan: d.Panic("Tried to Put %s into closed ChunkStore", c.Hash()) default: } hcs.unwrittenPuts.Insert(c) } func sendWriteRequest(u url.URL, auth, vers string, p *nbs.NomsBlockCache, cli httpDoer) { chunkChan := make(chan *chunks.Chunk, 1024) go func() { p.ExtractChunks(chunkChan) close(chunkChan) }() body := buildWriteValueRequest(chunkChan) n := int64(0) // Sad that we have to buffer this, but required for servers that need a content-length. // See: https://spectrum.chat/zeit/now/are-streaming-request-bodies-supported~8f085d13-2e35-4613-9cc0-818abcd04dfe nb := &bytes.Buffer{} var err error n, err = io.Copy(nb, body) d.PanicIfError(err) body = ioutil.NopCloser(nb) req := newRequest("POST", auth, u.String(), body, http.Header{ "Content-Encoding": {"x-snappy-framed"}, "Content-Type": {"application/octet-stream"}, }) req.ContentLength = n res, err := cli.Do(req) d.PanicIfError(err) expectVersion(vers, res) defer closeResponse(res.Body) checkStatus(http.StatusCreated, res, res.Body) } func (hcs *httpChunkStore) Root() hash.Hash { hcs.rootMu.RLock() defer hcs.rootMu.RUnlock() return hcs.root } func (hcs *httpChunkStore) Rebase() { root, _ := hcs.getRoot(true) hcs.rootMu.Lock() defer hcs.rootMu.Unlock() hcs.root = root } func (hcs *httpChunkStore) getRoot(checkVers bool) (root hash.Hash, vers string) { // GET http:///root. Response will be ref of root. res := hcs.requestRoot("GET", hash.Hash{}, hash.Hash{}) if checkVers { expectVersion(hcs.version, res) } defer closeResponse(res.Body) checkStatus(http.StatusOK, res, res.Body) data, err := ioutil.ReadAll(res.Body) d.PanicIfError(err) return hash.Parse(string(data)), res.Header.Get(NomsVersionHeader) } func (hcs *httpChunkStore) Commit(current, last hash.Hash) bool { hcs.rootMu.Lock() defer hcs.rootMu.Unlock() hcs.cacheMu.Lock() defer hcs.cacheMu.Unlock() select { case <-hcs.finishedChan: d.Panic("Tried to Commit %s to closed ChunkStore", current) case hcs.rateLimit <- struct{}{}: defer func() { <-hcs.rateLimit }() } if count := hcs.unwrittenPuts.Count(); count > 0 { url := *hcs.host url.Path = httprouter.CleanPath(hcs.host.Path + constants.WriteValuePath) verbose.Log("Sending %d chunks", count) sendWriteRequest(url, hcs.auth, hcs.version, hcs.unwrittenPuts, hcs.httpClient) verbose.Log("Finished sending %d hashes", count) hcs.unwrittenPuts.Destroy() hcs.unwrittenPuts = nbs.NewCache() } // POST http:///root?current=&last=. Response will be 200 on success, 409 if current is outdated. Regardless, the server returns its current root for this store res := hcs.requestRoot("POST", current, last) expectVersion(hcs.version, res) defer closeResponse(res.Body) var success bool switch res.StatusCode { case http.StatusOK: success = true case http.StatusConflict: success = false default: buf := bytes.Buffer{} buf.ReadFrom(res.Body) body := buf.String() d.Chk.Fail( fmt.Sprintf("Unexpected response: %s: %s", http.StatusText(res.StatusCode), body)) return false } data, err := ioutil.ReadAll(res.Body) d.PanicIfError(err) hcs.root = hash.Parse(string(data)) return success } func (hcs *httpChunkStore) requestRoot(method string, current, last hash.Hash) *http.Response { u := *hcs.host u.Path = httprouter.CleanPath(hcs.host.Path + constants.RootPath) if method == "POST" { params := u.Query() params.Add("last", last.String()) params.Add("current", current.String()) u.RawQuery = params.Encode() } req := newRequest(method, hcs.auth, u.String(), nil, nil) res, err := hcs.httpClient.Do(req) d.PanicIfError(err) return res } func newRequest(method, auth, url string, body io.Reader, header http.Header) *http.Request { req, err := http.NewRequest(method, url, body) d.Chk.NoError(err) req.Header.Set(NomsVersionHeader, constants.NomsVersion) for k, vals := range header { for _, v := range vals { req.Header.Add(k, v) } } if auth != "" { req.Header.Set("Authorization", auth) } return req } func expectVersion(expected string, res *http.Response) { dataVersion := res.Header.Get(NomsVersionHeader) if expected != dataVersion { b, _ := ioutil.ReadAll(res.Body) res.Body.Close() d.Panic( "Version skew\n\r"+ "\tServer data version changed from '%s' to '%s'\n\r"+ "\tHTTP Response: %d (%s): %s\n", expected, dataVersion, res.StatusCode, res.Status, string(b)) } } // In order for keep alive to work we must read to EOF on every response. We may want to add a timeout so that a server that left its connection open can't cause all of ports to be eaten up. func closeResponse(rc io.ReadCloser) error { ioutil.ReadAll(rc) // Bug #2069. It's not clear what the behavior is here. These checks are currently not enabled because they are shadowing information about a failure which occurs earlier. // d.Chk.NoError(err) // d.PanicIfFalse(0 == len(data), string(data)) return rc.Close() } ================================================ FILE: go/datas/http_chunk_store_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "encoding/binary" "fmt" "io/ioutil" "net/http" "net/http/httptest" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" "github.com/julienschmidt/httprouter" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) const testAuthToken = "aToken123" func TestHTTPChunkStore(t *testing.T) { suite.Run(t, &HTTPChunkStoreSuite{}) } type HTTPChunkStoreSuite struct { suite.Suite serverCS *chunks.TestStoreView http *httpChunkStore } type inlineServer struct { *httprouter.Router } func (serv inlineServer) Do(req *http.Request) (resp *http.Response, err error) { w := httptest.NewRecorder() serv.ServeHTTP(w, req) return &http.Response{ StatusCode: w.Code, Status: http.StatusText(w.Code), Header: w.HeaderMap, Body: ioutil.NopCloser(w.Body), }, nil } func (suite *HTTPChunkStoreSuite) SetupTest() { storage := &chunks.TestStorage{} suite.serverCS = storage.NewView() suite.http = newHTTPChunkStoreForTest(suite.serverCS) } func newHTTPChunkStoreForTest(cs chunks.ChunkStore) *httpChunkStore { // Ideally, this function (and its bretheren below) would take a *TestStorage and mint a fresh TestStoreView in each handler call below. That'd break a bunch of tests in pull_test.go that want to pass in a single TestStoreView and then inspect it after doing a bunch of work. The cs.Rebase() calls here are a good compromise for now, but BUG 3415 tracks Making This Right. serv := inlineServer{httprouter.New()} serv.POST( constants.WriteValuePath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleWriteValue(w, req, ps, cs) }, ) serv.POST( constants.GetRefsPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleGetRefs(w, req, ps, cs) }, ) serv.POST( constants.HasRefsPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleHasRefs(w, req, ps, cs) }, ) serv.POST( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleRootPost(w, req, ps, cs) }, ) serv.GET( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleRootGet(w, req, ps, cs) }, ) serv.GET( constants.StatsPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleStats(w, req, ps, cs) }, ) return newHTTPChunkStoreWithClient("http://localhost:9000", "", serv) } func newAuthenticatingHTTPChunkStoreForTest(assert *assert.Assertions, cs chunks.ChunkStore, hostUrl string) *httpChunkStore { authenticate := func(req *http.Request) { assert.Equal(testAuthToken, req.URL.Query().Get("access_token")) } serv := inlineServer{httprouter.New()} serv.POST( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() authenticate(req) HandleRootPost(w, req, ps, cs) }, ) serv.GET( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleRootGet(w, req, ps, cs) }, ) return newHTTPChunkStoreWithClient(hostUrl, "", serv) } func newBadVersionHTTPChunkStoreForTest(cs chunks.ChunkStore) *httpChunkStore { serv := inlineServer{httprouter.New()} serv.POST( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleRootPost(w, req, ps, cs) w.Header().Set(NomsVersionHeader, "BAD") }, ) serv.GET( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { cs.Rebase() HandleRootGet(w, req, ps, cs) }, ) return newHTTPChunkStoreWithClient("http://localhost", "", serv) } func (suite *HTTPChunkStoreSuite) TearDownTest() { suite.http.Close() suite.serverCS.Close() } func (suite *HTTPChunkStoreSuite) TestPutChunk() { c := types.EncodeValue(types.String("abc")) suite.http.Put(c) suite.True(suite.http.Has(c.Hash())) suite.True(suite.http.Commit(hash.Hash{}, hash.Hash{})) suite.Equal(1, suite.serverCS.Writes) } func (suite *HTTPChunkStoreSuite) TestPutChunksInOrder() { vals := []types.Value{ types.String("abc"), types.String("def"), } vs := types.NewValueStore(suite.serverCS) defer vs.Close() le := types.NewList(vs).Edit() for _, val := range vals { suite.http.Put(types.EncodeValue(val)) le.Append(types.NewRef(val)) } suite.http.Put(types.EncodeValue(le.List())) suite.True(suite.http.Commit(hash.Hash{}, hash.Hash{})) suite.Equal(3, suite.serverCS.Writes) } func (suite *HTTPChunkStoreSuite) TestStats() { suite.http.Put(types.EncodeValue(types.String("abc"))) suite.http.Put(types.EncodeValue(types.String("def"))) suite.True(suite.http.Commit(hash.Hash{}, hash.Hash{})) suite.NotEmpty(suite.http.StatsSummary()) } func (suite *HTTPChunkStoreSuite) TestRebase() { suite.Equal(hash.Hash{}, suite.http.Root()) db := NewDatabase(suite.serverCS) defer db.Close() c := types.EncodeValue(types.NewMap(db)) suite.serverCS.Put(c) suite.True(suite.serverCS.Commit(c.Hash(), hash.Hash{})) // change happens behind our backs suite.Equal(hash.Hash{}, suite.http.Root()) // shouldn't be visible yet suite.http.Rebase() suite.Equal(c.Hash(), suite.serverCS.Root()) } func (suite *HTTPChunkStoreSuite) TestRoot() { db := NewDatabase(suite.serverCS) defer db.Close() c := types.EncodeValue(types.NewMap(db)) suite.serverCS.Put(c) suite.True(suite.http.Commit(c.Hash(), hash.Hash{})) suite.Equal(c.Hash(), suite.serverCS.Root()) } func (suite *HTTPChunkStoreSuite) TestVersionMismatch() { store := newBadVersionHTTPChunkStoreForTest(suite.serverCS) vs := types.NewValueStore(store) defer vs.Close() c := types.EncodeValue(types.NewMap(vs)) suite.serverCS.Put(c) suite.Panics(func() { store.Commit(c.Hash(), hash.Hash{}) }) } func (suite *HTTPChunkStoreSuite) TestCommit() { db := NewDatabase(suite.serverCS) defer db.Close() c := types.EncodeValue(types.NewMap(db)) suite.serverCS.Put(c) suite.True(suite.http.Commit(c.Hash(), hash.Hash{})) suite.Equal(c.Hash(), suite.serverCS.Root()) } func (suite *HTTPChunkStoreSuite) TestEmptyHashCommit() { suite.True(suite.http.Commit(hash.Hash{}, hash.Hash{})) suite.Equal(hash.Hash{}, suite.serverCS.Root()) } func (suite *HTTPChunkStoreSuite) TestCommitWithParams() { u := fmt.Sprintf("http://localhost:9000?access_token=%s&other=19", testAuthToken) store := newAuthenticatingHTTPChunkStoreForTest(suite.Assert(), suite.serverCS, u) vs := types.NewValueStore(store) defer vs.Close() c := types.EncodeValue(types.NewMap(vs)) suite.serverCS.Put(c) suite.True(store.Commit(c.Hash(), hash.Hash{})) suite.Equal(c.Hash(), suite.serverCS.Root()) } func (suite *HTTPChunkStoreSuite) TestGet() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.serverCS.Put(c) } got := suite.http.Get(chnx[0].Hash()) suite.Equal(chnx[0].Hash(), got.Hash()) got = suite.http.Get(chnx[1].Hash()) suite.Equal(chnx[1].Hash(), got.Hash()) } func (suite *HTTPChunkStoreSuite) TestGetMany() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } notPresent := chunks.NewChunk([]byte("ghi")).Hash() for _, c := range chnx { suite.serverCS.Put(c) } persistChunks(suite.serverCS) hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash(), notPresent) foundChunks := make(chan *chunks.Chunk) go func() { suite.http.GetMany(hashes, foundChunks); close(foundChunks) }() for c := range foundChunks { hashes.Remove(c.Hash()) } suite.Len(hashes, 1) suite.True(hashes.Has(notPresent)) } func (suite *HTTPChunkStoreSuite) TestOverGetThreshold_Issue3589() { if testing.Short() { suite.T().Skip("Skipping test in short mode.") } // BUG 3589 happened because we requested enough hashes that the body was over 10MB. The new way of encoding getRefs request bodies means that 10MB will no longer be a limitation. This test will generate a request larger than 10MB. count := ((10 * (1 << 20)) / hash.ByteLen) + 1 hashes := make(hash.HashSet, count) for i := 0; i < count-1; i++ { h := hash.Hash{} binary.BigEndian.PutUint64(h[hash.ByteLen-8:], uint64(i)) hashes.Insert(h) } present := chunks.NewChunk([]byte("ghi")) suite.serverCS.Put(present) persistChunks(suite.serverCS) hashes.Insert(present.Hash()) foundChunks := make(chan *chunks.Chunk) go func() { suite.http.GetMany(hashes, foundChunks); close(foundChunks) }() found := hash.HashSet{} for c := range foundChunks { found.Insert(c.Hash()) } suite.Len(found, 1) suite.True(found.Has(present.Hash())) } func (suite *HTTPChunkStoreSuite) TestGetManyAllCached() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.http.Put(c) } hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash()) foundChunks := make(chan *chunks.Chunk) go func() { suite.http.GetMany(hashes, foundChunks); close(foundChunks) }() for c := range foundChunks { hashes.Remove(c.Hash()) } suite.Len(hashes, 0) } func (suite *HTTPChunkStoreSuite) TestGetManySomeCached() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } cached := chunks.NewChunk([]byte("ghi")) for _, c := range chnx { suite.serverCS.Put(c) } persistChunks(suite.serverCS) suite.http.Put(cached) hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash(), cached.Hash()) foundChunks := make(chan *chunks.Chunk) go func() { suite.http.GetMany(hashes, foundChunks); close(foundChunks) }() for c := range foundChunks { hashes.Remove(c.Hash()) } suite.Len(hashes, 0) } func (suite *HTTPChunkStoreSuite) TestGetSame() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("def")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.serverCS.Put(c) } got := suite.http.Get(chnx[0].Hash()) suite.Equal(chnx[0].Hash(), got.Hash()) got = suite.http.Get(chnx[1].Hash()) suite.Equal(chnx[1].Hash(), got.Hash()) } func (suite *HTTPChunkStoreSuite) TestGetWithRoot() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.serverCS.Put(c) } suite.serverCS.Commit(chnx[0].Hash(), hash.Hash{}) serv := inlineServer{httprouter.New()} serv.GET( constants.RootPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { suite.serverCS.Rebase() HandleRootGet(w, req, ps, suite.serverCS) }, ) serv.POST( constants.GetRefsPath, func(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { r := req.URL.Query().Get("root") suite.Equal(chnx[0].Hash().String(), r) suite.serverCS.Rebase() HandleGetRefs(w, req, ps, suite.serverCS) }, ) store := newHTTPChunkStoreWithClient("http://localhost:9000", "", serv) got := store.Get(chnx[1].Hash()) suite.Equal(chnx[1].Hash(), got.Hash()) } func (suite *HTTPChunkStoreSuite) TestHas() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.serverCS.Put(c) } suite.True(suite.http.Has(chnx[0].Hash())) suite.True(suite.http.Has(chnx[1].Hash())) } func (suite *HTTPChunkStoreSuite) TestHasMany() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.serverCS.Put(c) } persistChunks(suite.serverCS) notPresent := chunks.NewChunk([]byte("ghi")).Hash() hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash(), notPresent) absent := suite.http.HasMany(hashes) suite.Len(absent, 1) for _, c := range chnx { suite.False(absent.Has(c.Hash()), "%s present in %v", c.Hash(), absent) } suite.True(absent.Has(notPresent)) } func (suite *HTTPChunkStoreSuite) TestHasManyAllCached() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.http.Put(c) } persistChunks(suite.serverCS) hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash()) absent := suite.http.HasMany(hashes) suite.Len(absent, 0) for _, c := range chnx { suite.False(absent.Has(c.Hash()), "%s present in %v", c.Hash(), absent) } } func (suite *HTTPChunkStoreSuite) TestHasManySomeCached() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } cached := chunks.NewChunk([]byte("ghi")) for _, c := range chnx { suite.serverCS.Put(c) } persistChunks(suite.serverCS) suite.http.Put(cached) hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash(), cached.Hash()) absent := suite.http.HasMany(hashes) suite.Len(absent, 0) for _, c := range chnx { suite.False(absent.Has(c.Hash()), "%s present in %v", c.Hash(), absent) } suite.False(absent.Has(cached.Hash()), "%s present in %v", cached.Hash(), absent) } ================================================ FILE: go/datas/pull.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "math" "math/rand" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" "github.com/golang/snappy" ) type PullProgress struct { DoneCount, KnownCount, ApproxWrittenBytes uint64 } const ( bytesWrittenSampleRate = .10 batchSize = 1 << 12 // 4096 chunks ) // Pull objects that descend from sourceRef from srcDB to sinkDB. func Pull(srcDB, sinkDB Database, sourceRef types.Ref, progressCh chan PullProgress) { // Sanity Check d.PanicIfFalse(srcDB.chunkStore().Has(sourceRef.TargetHash())) if sinkDB.chunkStore().Has(sourceRef.TargetHash()) { return // already up to date } var doneCount, knownCount, approxBytesWritten uint64 updateProgress := func(moreDone, moreKnown, moreApproxBytesWritten uint64) { if progressCh == nil { return } doneCount, knownCount, approxBytesWritten = doneCount+moreDone, knownCount+moreKnown, approxBytesWritten+moreApproxBytesWritten progressCh <- PullProgress{doneCount, knownCount, approxBytesWritten} } var sampleSize, sampleCount uint64 // TODO: This batches based on limiting the _number_ of chunks processed at the same time. We really want to batch based on the _amount_ of chunk data being processed simultaneously. We also want to consider the chunks in a particular order, however, and the current GetMany() interface doesn't provide any ordering guarantees. Once BUG 3750 is fixed, we should be able to revisit this and do a better job. absent := hash.HashSlice{sourceRef.TargetHash()} for absentCount := len(absent); absentCount != 0; absentCount = len(absent) { updateProgress(0, uint64(absentCount), 0) // For gathering up the hashes in the next level of the tree nextLevel := hash.HashSet{} uniqueOrdered := hash.HashSlice{} // Process all absent chunks in this level of the tree in quanta of at most |batchSize| for start, end := 0, batchSize; start < absentCount; start, end = end, end+batchSize { if end > absentCount { end = absentCount } batch := absent[start:end] // Concurrently pull all chunks from this batch that the sink is missing out of the source neededChunks := map[hash.Hash]*chunks.Chunk{} found := make(chan *chunks.Chunk) go func() { defer close(found); srcDB.chunkStore().GetMany(batch.HashSet(), found) }() for c := range found { neededChunks[c.Hash()] = c // Randomly sample amount of data written if rand.Float64() < bytesWrittenSampleRate { sampleSize += uint64(len(snappy.Encode(nil, c.Data()))) sampleCount++ } updateProgress(1, 0, sampleSize/uint64(math.Max(1, float64(sampleCount)))) } // Now, put the absent chunks into the sink IN ORDER. // At the same time, gather up an ordered, uniquified list of all the children of the chunks in |batch| and add them to those in previous batches. This list is what we'll use to descend to the next level of the tree. for _, h := range batch { c := neededChunks[h] sinkDB.chunkStore().Put(*c) types.WalkRefs(*c, func(r types.Ref) { if !nextLevel.Has(r.TargetHash()) { uniqueOrdered = append(uniqueOrdered, r.TargetHash()) nextLevel.Insert(r.TargetHash()) } }) } } // Ask sinkDB which of the next level's hashes it doesn't have. absentSet := sinkDB.chunkStore().HasMany(nextLevel) absent = absent[:0] for _, h := range uniqueOrdered { if absentSet.Has(h) { absent = append(absent, h) } } } persistChunks(sinkDB.chunkStore()) } ================================================ FILE: go/datas/pull_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/suite" ) const datasetID = "ds1" func TestLocalToLocalPulls(t *testing.T) { suite.Run(t, &LocalToLocalSuite{}) } func TestRemoteToLocalPulls(t *testing.T) { suite.Run(t, &RemoteToLocalSuite{}) } func TestLocalToRemotePulls(t *testing.T) { suite.Run(t, &LocalToRemoteSuite{}) } func TestRemoteToRemotePulls(t *testing.T) { suite.Run(t, &RemoteToRemoteSuite{}) } type PullSuite struct { suite.Suite sinkCS *chunks.TestStoreView sourceCS *chunks.TestStoreView sink Database source Database commitReads int // The number of reads triggered by commit differs across chunk store impls } func makeTestStoreViews() (ts1, ts2 *chunks.TestStoreView) { st1, st2 := &chunks.TestStorage{}, &chunks.TestStorage{} return st1.NewView(), st2.NewView() } type LocalToLocalSuite struct { PullSuite } func (suite *LocalToLocalSuite) SetupTest() { suite.sinkCS, suite.sourceCS = makeTestStoreViews() suite.sink = NewDatabase(suite.sinkCS) suite.source = NewDatabase(suite.sourceCS) } type RemoteToLocalSuite struct { PullSuite } func (suite *RemoteToLocalSuite) SetupTest() { suite.sinkCS, suite.sourceCS = makeTestStoreViews() suite.sink = NewDatabase(suite.sinkCS) suite.source = makeRemoteDb(suite.sourceCS) } type LocalToRemoteSuite struct { PullSuite } func (suite *LocalToRemoteSuite) SetupTest() { suite.sinkCS, suite.sourceCS = makeTestStoreViews() suite.sink = makeRemoteDb(suite.sinkCS) suite.source = NewDatabase(suite.sourceCS) suite.commitReads = 1 } type RemoteToRemoteSuite struct { PullSuite } func (suite *RemoteToRemoteSuite) SetupTest() { suite.sinkCS, suite.sourceCS = makeTestStoreViews() suite.sink = makeRemoteDb(suite.sinkCS) suite.source = makeRemoteDb(suite.sourceCS) suite.commitReads = 1 } func makeRemoteDb(cs chunks.ChunkStore) Database { return NewDatabase(newHTTPChunkStoreForTest(cs)) } func (suite *PullSuite) TearDownTest() { suite.sink.Close() suite.source.Close() suite.sinkCS.Close() suite.sourceCS.Close() } type progressTracker struct { Ch chan PullProgress doneCh chan []PullProgress } func startProgressTracker() *progressTracker { pt := &progressTracker{make(chan PullProgress), make(chan []PullProgress)} go func() { progress := []PullProgress{} for info := range pt.Ch { progress = append(progress, info) } pt.doneCh <- progress }() return pt } func (pt *progressTracker) Validate(suite *PullSuite) { close(pt.Ch) progress := <-pt.doneCh // Expecting exact progress would be unreliable and not necessary meaningful. Instead, just validate that it's useful and consistent. suite.NotEmpty(progress) first := progress[0] suite.Zero(first.DoneCount) suite.True(first.KnownCount > 0) suite.Zero(first.ApproxWrittenBytes) last := progress[len(progress)-1] suite.True(last.DoneCount > 0) suite.Equal(last.DoneCount, last.KnownCount) for i, prog := range progress { suite.True(prog.KnownCount >= prog.DoneCount) if i > 0 { prev := progress[i-1] suite.True(prog.DoneCount >= prev.DoneCount) suite.True(prog.ApproxWrittenBytes >= prev.ApproxWrittenBytes) } } } // Source: -3-> C(L2) -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 // // Sink: Nada func (suite *PullSuite) TestPullEverything() { expectedReads := suite.sinkCS.Reads l := buildListOfHeight(2, suite.source) sourceRef := suite.commitToSource(l, types.NewSet(suite.source)) pt := startProgressTracker() Pull(suite.source, suite.sink, sourceRef, pt.Ch) suite.True(expectedReads-suite.sinkCS.Reads <= suite.commitReads) pt.Validate(suite) v := suite.sink.ReadValue(sourceRef.TargetHash()).(types.Struct) suite.NotNil(v) suite.True(l.Equals(v.Get(ValueField))) } // Source: -6-> C3(L5) -1-> N // . \ -5-> L4 -1-> N // . \ -4-> L3 -1-> N // . \ -3-> L2 -1-> N // 5 \ -2-> L1 -1-> N // . \ -1-> L0 // C2(L4) -1-> N // . \ -4-> L3 -1-> N // . \ -3-> L2 -1-> N // . \ -2-> L1 -1-> N // 3 \ -1-> L0 // . // C1(L2) -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 // // Sink: -3-> C1(L2) -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 func (suite *PullSuite) TestPullMultiGeneration() { sinkL := buildListOfHeight(2, suite.sink) suite.commitToSink(sinkL, types.NewSet(suite.sink)) expectedReads := suite.sinkCS.Reads srcL := buildListOfHeight(2, suite.source) sourceRef := suite.commitToSource(srcL, types.NewSet(suite.source)) srcL = buildListOfHeight(4, suite.source) sourceRef = suite.commitToSource(srcL, types.NewSet(suite.source, sourceRef)) srcL = buildListOfHeight(5, suite.source) sourceRef = suite.commitToSource(srcL, types.NewSet(suite.source, sourceRef)) pt := startProgressTracker() Pull(suite.source, suite.sink, sourceRef, pt.Ch) suite.True(expectedReads-suite.sinkCS.Reads <= suite.commitReads) pt.Validate(suite) v := suite.sink.ReadValue(sourceRef.TargetHash()).(types.Struct) suite.NotNil(v) suite.True(srcL.Equals(v.Get(ValueField))) } // Source: -6-> C2(L5) -1-> N // . \ -5-> L4 -1-> N // . \ -4-> L3 -1-> N // . \ -3-> L2 -1-> N // 4 \ -2-> L1 -1-> N // . \ -1-> L0 // C1(L3) -1-> N // \ -3-> L2 -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 // // Sink: -5-> C3(L3') -1-> N // . \ -3-> L2 -1-> N // . \ \ -2-> L1 -1-> N // . \ \ -1-> L0 // . \ - "oy!" // 4 // . // C1(L3) -1-> N // \ -3-> L2 -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 func (suite *PullSuite) TestPullDivergentHistory() { sinkL := buildListOfHeight(3, suite.sink) sinkRef := suite.commitToSink(sinkL, types.NewSet(suite.sink)) srcL := buildListOfHeight(3, suite.source) sourceRef := suite.commitToSource(srcL, types.NewSet(suite.source)) sinkL = sinkL.Edit().Append(types.String("oy!")).List() sinkRef = suite.commitToSink(sinkL, types.NewSet(suite.sink, sinkRef)) srcL = srcL.Edit().Set(1, buildListOfHeight(5, suite.source)).List() sourceRef = suite.commitToSource(srcL, types.NewSet(suite.source, sourceRef)) preReads := suite.sinkCS.Reads pt := startProgressTracker() Pull(suite.source, suite.sink, sourceRef, pt.Ch) suite.True(preReads-suite.sinkCS.Reads <= suite.commitReads) pt.Validate(suite) v := suite.sink.ReadValue(sourceRef.TargetHash()).(types.Struct) suite.NotNil(v) suite.True(srcL.Equals(v.Get(ValueField))) } // Source: -6-> C2(L4) -1-> N // . \ -4-> L3 -1-> N // . \ -3-> L2 -1-> N // . \ - "oy!" // 5 \ -2-> L1 -1-> N // . \ -1-> L0 // C1(L4) -1-> N // \ -4-> L3 -1-> N // \ -3-> L2 -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 // Sink: -5-> C1(L4) -1-> N // \ -4-> L3 -1-> N // \ -3-> L2 -1-> N // \ -2-> L1 -1-> N // \ -1-> L0 func (suite *PullSuite) TestPullUpdates() { sinkL := buildListOfHeight(4, suite.sink) suite.commitToSink(sinkL, types.NewSet(suite.sink)) expectedReads := suite.sinkCS.Reads srcL := buildListOfHeight(4, suite.source) sourceRef := suite.commitToSource(srcL, types.NewSet(suite.source)) L3 := srcL.Get(1).(types.Ref).TargetValue(suite.source).(types.List) L2 := L3.Get(1).(types.Ref).TargetValue(suite.source).(types.List) L2 = L2.Edit().Append(suite.source.WriteValue(types.String("oy!"))).List() L3 = L3.Edit().Set(1, suite.source.WriteValue(L2)).List() srcL = srcL.Edit().Set(1, suite.source.WriteValue(L3)).List() sourceRef = suite.commitToSource(srcL, types.NewSet(suite.source, sourceRef)) pt := startProgressTracker() Pull(suite.source, suite.sink, sourceRef, pt.Ch) suite.True(expectedReads-suite.sinkCS.Reads <= suite.commitReads) pt.Validate(suite) v := suite.sink.ReadValue(sourceRef.TargetHash()).(types.Struct) suite.NotNil(v) suite.True(srcL.Equals(v.Get(ValueField))) } func (suite *PullSuite) commitToSource(v types.Value, p types.Set) types.Ref { ds := suite.source.GetDataset(datasetID) ds, err := suite.source.Commit(ds, v, CommitOptions{Parents: p}) suite.NoError(err) return ds.HeadRef() } func (suite *PullSuite) commitToSink(v types.Value, p types.Set) types.Ref { ds := suite.sink.GetDataset(datasetID) ds, err := suite.sink.Commit(ds, v, CommitOptions{Parents: p}) suite.NoError(err) return ds.HeadRef() } func buildListOfHeight(height int, vrw types.ValueReadWriter) types.List { unique := 0 l := types.NewList(vrw, types.Number(unique), types.Number(unique+1)) unique += 2 for i := 0; i < height; i++ { r1, r2 := vrw.WriteValue(types.Number(unique)), vrw.WriteValue(l) unique++ l = types.NewList(vrw, r1, r2) } return l } ================================================ FILE: go/datas/pulling.md ================================================ # Dataset pulling algorithm The approach is to explore the chunk graph of both sink and source in order of decreasing ref-height. As the code walks, it uses the knowledge gained about which chunks are present in the sink to both prune the source-graph-walk and build up a set of `hints` that can be sent to a remote Database to aid in chunk validation. ## Basic algorithm - let `sink` be the *sink* database - let `source` be the *source* database - let `snkQ` and `srcQ` be priority queues of `Ref` prioritized by highest `Ref.height` - let `hints` be a map of `hash => hash` - let `reachableChunks` be a set of hashes - let `snkHdRef` be the ref (of `Commit`) of the head of the *sink* dataset - let `srcHdRef` be the ref of the *source* `Commit`, which must descend from the `Commit` indicated by `snkHdRef` - let `traverseSource(srcRef, srcQ, sink, source, reachableChunks)` be - pop `srcRef` from `srcQ` - if `!sink.has(srcRef)` - let `c` = `source.batchStore().Get(srcRef.targetHash)` - let `v` = `types.DecodeValue(c, source)` - insert all child refs, `cr`, from `v` into `srcQ` and into reachableRefs - `sink.batchStore().Put(c, srcRef.height, no hints)` - (hints will all be gathered and handed to sink.batchStore at the end) - let `traverseSink(sinkRef, snkQ, sink, hints)` be - pop `snkRef` from `snkQ` - if `snkRef.height` > 1 - let `v` = `sink.readValue(snkRef.targetHash)` - insert all child refs, `cr`, from `v` into `snkQ` and `hints[cr] = snkRef` - let `traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)` be - pop `comRef` from both `snkQ` and `srcQ` - if `comRef.height` > 1 - if `comRef` is a `Ref` of `Commit` - let `v` = `sink.readValue(comRef.targetHash)` - if `comRef` == snkHdRef - *ignore all parent refs* - insert each other child ref `cr` from `v` into `snkQ` *only*, set `hints[cr] = comRef` - else - insert each child ref `cr` from `v` into both `snkQ` and `srcQ`, set `hints[cr] = comRef` - let `pull(source, sink, srcHdRef, sinkHdRef) - insert `snkHdRef` into `snkQ` and `srcHdRef` into `srcQ` - create empty `hints` and `reachableChunks` - while `srcQ` is non-empty - let `srcHt` and `snkHt` be the respective heights of the *top* `Ref` in each of `srcQ` and `snkQ` - if `srcHt` > `snkHt`, for every `srcHdRef` in `srcQ` which is of greater height than `snkHt` - `traverseSource(srcHdRef, srcQ, sink, source)` - else if `snkHt` > `srcHt`, for every `snkHdRef` in `snkQ` which is of greater height than `srcHt` - `traverseSink(snkHdRef, snkQ, sink)` - else - for every `comRef` in which is common to `snkQ` and `srcQ` which is of height `srcHt` (and `snkHt`) - `traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)` - for every `ref` in `srcQ` which is of height `srcHt` - `traverseSource(ref, srcQ, sink, source, reachableChunks)` - for every `ref` in `snkQ` which is of height `snkHt` - `traverseSink(ref, snkQ, sink, hints)` - for all `hash` in `reachableChunks` - sink.batchStore().addHint(hints[hash]) ## Isomorphic, but less clear, algorithm - let all identifiers be as above - let `traverseSource`, `traverseSink`, and `traverseCommon` be as above - let `higherThan(refA, refB)` be - if refA.height == refB.height - return refA.targetHash < refB.targetHash - return refA.height > refB.height - let `pull(source, sink, srcHdRef, sinkHdRef) - insert `snkHdRef` into `snkQ` and `srcHdRef` into `srcQ` - create empty `hints` and `reachableChunks` - while `srcQ` is non-empty - if `sinkQ` is empty - pop `ref` from `srcQ` - `traverseSource(ref, srcQ, sink, source, reachableChunks)) - else if `higherThan(head of srcQ, head of snkQ)` - pop `ref` from `srcQ` - `traverseSource(ref, srcQ, sink, source, reachableChunks)) - else if `higherThan(head of snkQ, head of srcQ)` - pop `ref` from `snkQ` - `traverseSink(ref, snkQ, sink, hints)` - else, heads of both queues are the same - pop `comRef` from `snkQ` and `srcQ` - `traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)` - for all `hash` in `reachableChunks` - sink.batchStore().addHint(hints[hash]) ================================================ FILE: go/datas/remote_database_handlers.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "compress/gzip" "encoding/json" "errors" "fmt" "io" "io/ioutil" "log" "net/http" "os" "runtime" "strings" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/ngql" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/verbose" "github.com/golang/snappy" ) type URLParams interface { ByName(string) string } type Handler func(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) const ( // NomsVersionHeader is the name of the header that Noms clients and // servers must set in every request/response. NomsVersionHeader = "x-noms-vers" nomsBaseHTML = "

Hi. This is a Noms HTTP server.

To learn more, visit our GitHub project.

" maxGetBatchSize = 1 << 14 // Limit GetMany() to ~16k chunks, or ~64MB of data ) var ( // HandleWriteValue is meant to handle HTTP POST requests to the // writeValue/ server endpoint. The payload should be an appropriately- // ordered sequence of Chunks to be validated and stored on the server. // TODO: Nice comment about what headers it expects/honors, payload // format, and error responses. HandleWriteValue = createHandler(handleWriteValue, true) // HandleGetRefs is meant to handle HTTP POST requests to the getRefs/ // server endpoint. Given a sequence of Chunk hashes, the server will // fetch and return them. // TODO: Nice comment about what headers it // expects/honors, payload format, and responses. HandleGetRefs = createHandler(handleGetRefs, true) // HandleGetBlob is a custom endpoint whose sole purpose is to directly // fetch the *bytes* contained in a Blob value. It expects a single query // param of `h` to be the ref of the Blob. // TODO: Support retrieving blob contents via a path. HandleGetBlob = createHandler(handleGetBlob, false) // HandleWriteValue is meant to handle HTTP POST requests to the hasRefs/ // server endpoint. Given a sequence of Chunk hashes, the server check for // their presence and return a list of true/false responses. // TODO: Nice comment about what headers it expects/honors, payload // format, and responses. HandleHasRefs = createHandler(handleHasRefs, true) // HandleRootGet is meant to handle HTTP GET requests to the root/ server // endpoint. The server returns the hash of the Root as a string. // TODO: Nice comment about what headers it expects/honors, payload // format, and responses. HandleRootGet = createHandler(handleRootGet, true) // HandleWriteValue is meant to handle HTTP POST requests to the root/ // server endpoint. This is used to update the Root to point to a new // Chunk. // TODO: Nice comment about what headers it expects/honors, payload // format, and error responses. HandleRootPost = createHandler(handleRootPost, true) // HandleBaseGet is meant to handle HTTP GET requests to the / server // endpoint. This is used to give a friendly message to users. // TODO: Nice comment about what headers it expects/honors, payload // format, and error responses. HandleBaseGet = handleBaseGet HandleGraphQL = createHandler(handleGraphQL, false) HandleStats = createHandler(handleStats, false) writeValueConcurrency = runtime.NumCPU() ) func createHandler(hndlr Handler, versionCheck bool) Handler { return func(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { w.Header().Set(NomsVersionHeader, constants.NomsVersion) if versionCheck && req.Header.Get(NomsVersionHeader) != constants.NomsVersion { log.Printf("returning version mismatch error") http.Error( w, fmt.Sprintf("Error: SDK version %s is incompatible with data of version %s", req.Header.Get(NomsVersionHeader), constants.NomsVersion), http.StatusBadRequest, ) return } err := d.Try(func() { hndlr(w, req, ps, cs) }) if err != nil { err = d.Unwrap(err) log.Printf("returning bad request error: %v", err) http.Error(w, fmt.Sprintf("Error: %v", err), http.StatusBadRequest) return } } } func handleWriteValue(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { if req.Method != "POST" { d.Panic("Expected post method.") } t1 := time.Now() totalDataWritten := 0 chunkCount := 0 verbose.Log("Handling WriteValue from " + req.RemoteAddr) defer func() { verbose.Log("Wrote %d Kb as %d chunks from %s in %s", totalDataWritten/1024, chunkCount, req.RemoteAddr, time.Since(t1)) }() reader := bodyReader(req) defer func() { // Ensure all data on reader is consumed io.Copy(ioutil.Discard, reader) reader.Close() }() vdc := types.NewValidatingDecoder(cs) // Deserialize chunks from reader in background, recovering from errors errChan := make(chan error) chunkChan := make(chan *chunks.Chunk, writeValueConcurrency) go func() { var err error defer func() { errChan <- err; close(errChan) }() defer close(chunkChan) err = chunks.Deserialize(reader, chunkChan) }() decoded := make(chan chan types.DecodedChunk, writeValueConcurrency) go func() { defer close(decoded) for c := range chunkChan { ch := make(chan types.DecodedChunk) decoded <- ch go func(ch chan types.DecodedChunk, c *chunks.Chunk) { ch <- vdc.Decode(c) }(ch, c) } }() unresolvedRefs := hash.HashSet{} for ch := range decoded { dc := <-ch if dc.Chunk != nil && dc.Value != nil { (*dc.Value).WalkRefs(func(r types.Ref) { unresolvedRefs.Insert(r.TargetHash()) }) totalDataWritten += len(dc.Chunk.Data()) cs.Put(*dc.Chunk) chunkCount++ if chunkCount%100 == 0 { verbose.Log("Enqueued %d chunks", chunkCount) } } } // If there was an error during chunk deserialization, raise so it can be logged and responded to. if err := <-errChan; err != nil { d.Panic("Deserialization failure: %v", err) } if chunkCount > 0 { types.PanicIfDangling(unresolvedRefs, cs) persistChunks(cs) } w.WriteHeader(http.StatusCreated) } // Contents of the returned io.ReadCloser are snappy-compressed. func buildWriteValueRequest(chunkChan chan *chunks.Chunk) io.ReadCloser { body, pw := io.Pipe() go func() { sw := snappy.NewBufferedWriter(pw) defer checkClose(pw) defer checkClose(sw) for c := range chunkChan { chunks.Serialize(*c, sw) } }() return body } func checkClose(c io.Closer) { d.PanicIfError(c.Close()) } func bodyReader(req *http.Request) (reader io.ReadCloser) { reader = req.Body if strings.Contains(req.Header.Get("Content-Encoding"), "gzip") { gr, err := gzip.NewReader(reader) d.PanicIfError(err) reader = gr } else if strings.Contains(req.Header.Get("Content-Encoding"), "x-snappy-framed") { sr := snappy.NewReader(reader) reader = ioutil.NopCloser(sr) } return } func respWriter(req *http.Request, w http.ResponseWriter) (writer io.WriteCloser) { writer = wc{w.(io.Writer)} if strings.Contains(req.Header.Get("Accept-Encoding"), "gzip") { w.Header().Add("Content-Encoding", "gzip") gw := gzip.NewWriter(w) writer = gw } else if strings.Contains(req.Header.Get("Accept-Encoding"), "x-snappy-framed") { w.Header().Add("Content-Encoding", "x-snappy-framed") sw := snappy.NewBufferedWriter(w) writer = sw } return } type wc struct { io.Writer } func (wc wc) Close() error { return nil } func persistChunks(cs chunks.ChunkStore) { for !cs.Commit(cs.Root(), cs.Root()) { } } func handleGetRefs(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { if req.Method != "POST" { d.Panic("Expected post method.") } hashes := extractHashes(req) verbose.Log("Handling getRefs request for: %v\n", hashes) w.Header().Add("Content-Type", "application/octet-stream") writer := respWriter(req, w) defer writer.Close() for len(hashes) > 0 { batch := hashes // Limit RAM consumption by streaming chunks in ~8MB batches if len(batch) > maxGetBatchSize { batch = batch[:maxGetBatchSize] } chunkChan := make(chan *chunks.Chunk, maxGetBatchSize) absent := batch.HashSet() go func() { cs.GetMany(batch.HashSet(), chunkChan) close(chunkChan) }() for c := range chunkChan { chunks.Serialize(*c, writer) absent.Remove(c.Hash()) } if len(absent) > 0 { fmt.Fprintf(os.Stderr, "ERROR: Could not get chunks: %v\n", absent) } hashes = hashes[len(batch):] } } func handleGetBlob(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { refStr := req.URL.Query().Get("h") if refStr == "" { d.Panic("Expected h param") } h := hash.Parse(refStr) if (h == hash.Hash{}) { d.Panic("h failed to parse") } vs := types.NewValueStore(cs) v := vs.ReadValue(h) b, ok := v.(types.Blob) if !ok { d.Panic("h is not a Blob") } w.Header().Add("Content-Type", "application/octet-stream") w.Header().Add("Content-Length", fmt.Sprintf("%d", b.Len())) w.Header().Add("Cache-Control", fmt.Sprintf("max-age=%d", 60*60*24*365)) b.Copy(w) } func extractHashes(req *http.Request) hash.HashSlice { reader := bodyReader(req) defer reader.Close() defer io.Copy(ioutil.Discard, reader) // Ensure all data on reader is consumed return deserializeHashes(reader) } func BuildHashesRequestForTest(hashes hash.HashSet) io.ReadCloser { batch := chunks.ReadBatch{} for h := range hashes { batch[h] = nil } return buildHashesRequest(batch) } func buildHashesRequest(batch chunks.ReadBatch) io.ReadCloser { body, pw := io.Pipe() go func() { defer checkClose(pw) serializeHashes(pw, batch) }() return body } func handleHasRefs(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { if req.Method != "POST" { d.Panic("Expected post method.") } hashes := extractHashes(req) w.Header().Add("Content-Type", "text/plain") writer := respWriter(req, w) defer writer.Close() absent := cs.HasMany(hashes.HashSet()) for h := range absent { fmt.Fprintln(writer, h.String()) } } func handleRootGet(w http.ResponseWriter, req *http.Request, ps URLParams, rt chunks.ChunkStore) { if req.Method != "GET" { d.Panic("Expected get method.") } fmt.Fprintf(w, "%v", rt.Root().String()) w.Header().Add("content-type", "text/plain") } func handleStats(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { if req.Method != "GET" { d.Panic("Expected get method.") } fmt.Fprint(w, cs.StatsSummary()) w.Header().Add("content-type", "text/plain") } func handleRootPost(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { if req.Method != "POST" { d.Panic("Expected post method.") } params := req.URL.Query() tokens := params["last"] if len(tokens) != 1 { d.Panic(`Expected "last" query param value`) } last := hash.Parse(tokens[0]) // "current" should really, really be called "proposed" or something in the wire API tokens = params["current"] if len(tokens) != 1 { d.Panic(`Expected "current" query param value`) } proposed := hash.Parse(tokens[0]) vs := types.NewValueStore(cs) // Even though the Root is actually a Map>, its Noms Type is Map> in order to prevent the root chunk from getting bloated with type info. That means that the Value of the proposed new Root needs to be manually type-checked. The simplest way to do that would be to iterate over the whole thing and pull the target of each Ref from |cs|. That's a lot of reads, though, and it's more efficient to just read the Value indicated by |last|, diff the proposed new root against it, and validate whatever new entries appear. lastMap := validateLast(last, vs) proposedMap := validateProposed(proposed, last, vs) if !proposedMap.Empty() { assertMapOfStringToRefOfCommit(proposedMap, lastMap, vs) } // If some other client has committed to |vs| since it had |from| at the // root, this call to vs.Commit() will fail. Used to be that we'd always // propagate that failure back to the client and let them try again. This // made one very common operation annoyingly expensive, though, as clients // simultaneously committing to different Datasets would cause conflicts // with this vs.Commit() right here. In this common case, the server // already knows everything it needs to try again, so now we cut out the // round trip to the client and just retry inline. for to, from := proposed, last; !vs.Commit(to, from); { // If committing failed, we go read out the map of Datasets at the root of the store, which is a Map[string]Ref rootMap := types.NewMap(vs) root := vs.Root() if v := vs.ReadValue(root); v != nil { rootMap = v.(types.Map) } // Since we know that lastMap is an ancestor of both proposedMap and // rootMap, we can try to do a three-way merge here. We don't want to // traverse the Refs stored in the maps, though, just // basically merge the maps together as long the changes to rootMap // and proposedMap were in different Datasets. merged, err := mergeDatasetMaps(proposedMap, rootMap, lastMap, vs) if err != nil { verbose.Log("Attempted root map auto-merge failed: %s", err) w.WriteHeader(http.StatusConflict) break } to, from = vs.WriteValue(merged).TargetHash(), root } // If committing succeeded, the root of the store might be |proposed|...or // it might be some result of the merge performed above. So, we need to // tell the client what the new root is. If the commit failed, obviously // we need to inform the client of the actual current root. w.Header().Add("content-type", "text/plain") fmt.Fprintf(w, "%v", vs.Root().String()) } func validateLast(last hash.Hash, vrw types.ValueReadWriter) types.Map { if last.IsEmpty() { return types.NewMap(vrw) } lastVal := vrw.ReadValue(last) if lastVal == nil { d.Panic("Can't Commit from a non-present Chunk") } return lastVal.(types.Map) } func validateProposed(proposed, last hash.Hash, vrw types.ValueReadWriter) types.Map { // Only allowed to skip this check if both last and proposed are empty, because that represents the special case of someone flushing chunks into an empty store. if last.IsEmpty() && proposed.IsEmpty() { return types.NewMap(vrw) } // Ensure that proposed new Root is present in vr, is a Map and, if it has anything in it, that it's > proposedVal := vrw.ReadValue(proposed) if proposedVal == nil { d.Panic("Can't set Root to a non-present Chunk") } proposedMap, ok := proposedVal.(types.Map) if !ok { d.Panic("Root of a Database must be a Map") } return proposedMap } func assertMapOfStringToRefOfCommit(proposed, datasets types.Map, vr types.ValueReader) { stopChan := make(chan struct{}) defer close(stopChan) changes := make(chan types.ValueChanged) go func() { defer close(changes) proposed.Diff(datasets, changes, stopChan) }() for change := range changes { switch change.ChangeType { case types.DiffChangeAdded, types.DiffChangeModified: // Since this is a Map Diff, change.V is the key at which a change was detected. // Go get the Value there, which should be a Ref, deref it, and then ensure the target is a Commit. val := change.NewValue ref, ok := val.(types.Ref) if !ok { d.Panic("Root of a Database must be a Map>, but key %s maps to a %s", change.Key.(types.String), types.TypeOf(val).Describe()) } if targetValue := ref.TargetValue(vr); !IsCommit(targetValue) { d.Panic("Root of a Database must be a Map>, but the ref at key %s points to a %s", change.Key.(types.String), types.TypeOf(targetValue).Describe()) } } } } func mergeDatasetMaps(a, b, parent types.Map, vrw types.ValueReadWriter) (types.Map, error) { aChangeChan, bChangeChan := make(chan types.ValueChanged), make(chan types.ValueChanged) stopChan := make(chan struct{}) go func() { defer close(aChangeChan) a.Diff(parent, aChangeChan, stopChan) }() go func() { defer close(bChangeChan) b.Diff(parent, bChangeChan, stopChan) }() defer func() { close(stopChan) for range aChangeChan { } for range bChangeChan { } }() apply := func(target *types.MapEditor, change types.ValueChanged, newVal types.Value) *types.MapEditor { switch change.ChangeType { case types.DiffChangeAdded, types.DiffChangeModified: return target.Set(change.Key, newVal) case types.DiffChangeRemoved: return target.Remove(change.Key) default: panic("Not Reached") } } merged := parent.Edit() aChange, bChange := types.ValueChanged{}, types.ValueChanged{} for { if aChange.Key == nil { aChange = <-aChangeChan } if bChange.Key == nil { bChange = <-bChangeChan } // Both channels are producing zero values, so we're done. if aChange.Key == nil && bChange.Key == nil { break } if aChange.Key != nil && (bChange.Key == nil || aChange.Key.Less(bChange.Key)) { merged = apply(merged, aChange, a.Get(aChange.Key)) aChange = types.ValueChanged{} continue } else if bChange.Key != nil && (aChange.Key == nil || bChange.Key.Less(aChange.Key)) { merged = apply(merged, bChange, b.Get(bChange.Key)) bChange = types.ValueChanged{} continue } d.PanicIfFalse(aChange.Key.Equals(bChange.Key)) // If the two diffs generate different kinds of changes at the same key, conflict. if aChange.ChangeType != bChange.ChangeType { return parent, errors.New("Incompatible changes at " + types.EncodedValue(aChange.Key)) } // Otherwise, we're OK IFF the two diffs made exactly the same change aValue := a.Get(aChange.Key) if aChange.ChangeType != types.DiffChangeRemoved && !aValue.Equals(b.Get(bChange.Key)) { return parent, errors.New("Incompatible changes at " + types.EncodedValue(aChange.Key)) } merged = apply(merged, aChange, aValue) aChange, bChange = types.ValueChanged{}, types.ValueChanged{} } return merged.Map(), nil } func handleGraphQL(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { if req.Method != http.MethodGet && req.Method != http.MethodPost { d.Panic("Unexpected method") } ds := req.FormValue("ds") h := req.FormValue("h") if (ds == "") == (h == "") { d.Panic("Must specify one (and only one) of ds (dataset) or h (hash)") } var query string if req.Header.Get("Content-Type") == "application/json" { var body struct { Query string } err := json.NewDecoder(req.Body).Decode(&body) if err != nil { d.Panic("invalid query: %s", err) } query = body.Query } else { query = req.FormValue("query") if query == "" { d.Panic("Expected query") } } // Note: we don't close this becaues |cs| will be closed by the generic endpoint handler db := NewDatabase(cs) var rootValue types.Value var err error if ds != "" { dataset := db.GetDataset(ds) var ok bool rootValue, ok = dataset.MaybeHead() if !ok { err = fmt.Errorf("Dataset %s not found", ds) } } else { rootValue = db.ReadValue(hash.Parse(h)) if rootValue == nil { err = errors.New("Root value not found") } } w.Header().Add("Content-Type", "application/json") writer := respWriter(req, w) defer writer.Close() if err != nil { ngql.Error(err, writer) } else { ngql.Query(rootValue, query, db, writer) } } func handleBaseGet(w http.ResponseWriter, req *http.Request, ps URLParams, rt chunks.ChunkStore) { if req.Method != "GET" { d.Panic("Expected get method.") } w.Header().Add("Content-Type", "text/html") fmt.Fprintf(w, nomsBaseHTML) } ================================================ FILE: go/datas/remote_database_handlers_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "bufio" "bytes" "fmt" "io" "io/ioutil" "net/http" "net/http/httptest" "net/url" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" "github.com/golang/snappy" "github.com/stretchr/testify/assert" ) func TestHandleWriteValue(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} db := NewDatabase(storage.NewView()) l := types.NewList( db, db.WriteValue(types.Bool(true)), db.WriteValue(types.Bool(false)), ) r := db.WriteValue(l) _, err := db.CommitValue(db.GetDataset("datasetID"), r) assert.NoError(err) newItem := types.NewEmptyBlob(db) itemChunk := types.EncodeValue(newItem) l2 := l.Edit().Insert(1, types.NewRef(newItem)).List() listChunk := types.EncodeValue(l2) body := &bytes.Buffer{} chunks.Serialize(itemChunk, body) chunks.Serialize(listChunk, body) w := httptest.NewRecorder() HandleWriteValue(w, newRequest("POST", "", "", body, nil), params{}, storage.NewView()) if assert.Equal(http.StatusCreated, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { db2 := NewDatabase(storage.NewView()) v := db2.ReadValue(l2.Hash()) if assert.NotNil(v) { assert.True(v.Equals(l2), "%+v != %+v", v, l2) } } } func TestHandleWriteValuePanic(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} body := &bytes.Buffer{} body.WriteString("Bogus") w := httptest.NewRecorder() HandleWriteValue(w, newRequest("POST", "", "", body, nil), params{}, storage.NewView()) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) } func TestHandleWriteValueDupChunks(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := NewDatabase(storage.NewView()) defer db.Close() newItem := types.NewEmptyBlob(db) itemChunk := types.EncodeValue(newItem) body := &bytes.Buffer{} // Write the same chunk to body enough times to be certain that at least one of the concurrent deserialize/decode passes completes before the last one can continue. for i := 0; i <= writeValueConcurrency; i++ { chunks.Serialize(itemChunk, body) } w := httptest.NewRecorder() HandleWriteValue(w, newRequest("POST", "", "", body, nil), params{}, storage.NewView()) if assert.Equal(http.StatusCreated, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { db := NewDatabase(storage.NewView()) v := db.ReadValue(newItem.Hash()) if assert.NotNil(v) { assert.True(v.Equals(newItem), "%+v != %+v", v, newItem) } } } func TestBuildWriteValueRequest(t *testing.T) { assert := assert.New(t) input1, input2 := "abc", "def" chnx := []chunks.Chunk{ chunks.NewChunk([]byte(input1)), chunks.NewChunk([]byte(input2)), } inChunkChan := make(chan *chunks.Chunk, 2) inChunkChan <- &chnx[0] inChunkChan <- &chnx[1] close(inChunkChan) compressed := buildWriteValueRequest(inChunkChan) gr := snappy.NewReader(compressed) outChunkChan := make(chan *chunks.Chunk, len(chnx)) chunks.Deserialize(gr, outChunkChan) close(outChunkChan) for c := range outChunkChan { assert.Equal(chnx[0].Hash(), c.Hash()) chnx = chnx[1:] } assert.Empty(chnx) } func serializeChunks(chnx []chunks.Chunk, assert *assert.Assertions) io.Reader { body := &bytes.Buffer{} sw := snappy.NewBufferedWriter(body) for _, chunk := range chnx { chunks.Serialize(chunk, sw) } assert.NoError(sw.Close()) return body } func TestBuildHashesRequest(t *testing.T) { assert := assert.New(t) batch := chunks.ReadBatch{ hash.Parse("00000000000000000000000000000002"): nil, hash.Parse("00000000000000000000000000000003"): nil, } r := buildHashesRequest(batch) defer r.Close() requested := deserializeHashes(r) for _, h := range requested { _, present := batch[h] assert.True(present, "Query contains %s, which is not in initial refs", h) } } func TestHandleGetRefs(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} cs := storage.NewView() input1, input2 := "abc", "def" chnx := []chunks.Chunk{ chunks.NewChunk([]byte(input1)), chunks.NewChunk([]byte(input2)), } for _, c := range chnx { cs.Put(c) } persistChunks(cs) body := buildHashesRequest(chunks.ReadBatch{chnx[0].Hash(): nil, chnx[1].Hash(): nil}) w := httptest.NewRecorder() HandleGetRefs( w, newRequest("POST", "", "", body, http.Header{ "Content-Type": {"application/octet-stream"}, }), params{}, storage.NewView(), ) if assert.Equal(http.StatusOK, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { chunkChan := make(chan *chunks.Chunk, len(chnx)) chunks.Deserialize(w.Body, chunkChan) close(chunkChan) foundHashes := hash.HashSet{} for c := range chunkChan { foundHashes[c.Hash()] = struct{}{} } assert.True(len(foundHashes) == 2) _, hasC1 := foundHashes[chnx[0].Hash()] assert.True(hasC1) _, hasC2 := foundHashes[chnx[1].Hash()] assert.True(hasC2) } } func TestHandleGetBlob(t *testing.T) { assert := assert.New(t) blobContents := "I am a blob" storage := &chunks.MemoryStorage{} db := NewDatabase(storage.NewView()) ds := db.GetDataset("foo") // Test missing h w := httptest.NewRecorder() HandleGetBlob( w, newRequest("GET", "", "/getBlob/", strings.NewReader(""), http.Header{}), params{}, storage.NewView(), ) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) b := types.NewBlob(db, bytes.NewBuffer([]byte(blobContents))) // Test non-present hash w = httptest.NewRecorder() HandleGetBlob( w, newRequest("GET", "", fmt.Sprintf("/getBlob/?h=%s", b.Hash().String()), strings.NewReader(""), http.Header{}), params{}, storage.NewView(), ) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) r := db.WriteValue(b) ds, err := db.CommitValue(ds, r) assert.NoError(err) // Valid w = httptest.NewRecorder() HandleGetBlob( w, newRequest("GET", "", fmt.Sprintf("/getBlob/?h=%s", r.TargetHash().String()), strings.NewReader(""), http.Header{}), params{}, storage.NewView(), ) if assert.Equal(http.StatusOK, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { out, _ := ioutil.ReadAll(w.Body) assert.Equal(string(out), blobContents) } // Test non-blob r2 := db.WriteValue(types.Number(1)) _, err = db.CommitValue(ds, r2) assert.NoError(err) w = httptest.NewRecorder() HandleGetBlob( w, newRequest("GET", "", fmt.Sprintf("/getBlob/?h=%s", r2.TargetHash().String()), strings.NewReader(""), http.Header{}), params{}, storage.NewView(), ) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) } func TestHandleHasRefs(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} input1, input2, input3 := "abc", "def", "ghi" chnx := []chunks.Chunk{ chunks.NewChunk([]byte(input1)), chunks.NewChunk([]byte(input2)), } present := chunks.NewChunk([]byte(input3)) cs := storage.NewView() cs.Put(present) persistChunks(cs) body := buildHashesRequest(chunks.ReadBatch{ chnx[0].Hash(): nil, chnx[1].Hash(): nil, present.Hash(): nil, }) w := httptest.NewRecorder() HandleHasRefs( w, newRequest("POST", "", "", body, http.Header{ "Content-Type": {"application/octet-stream"}, }), params{}, storage.NewView(), ) absent := hash.HashSet{} if assert.Equal(http.StatusOK, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { scanner := bufio.NewScanner(w.Body) scanner.Split(bufio.ScanWords) for scanner.Scan() { absent.Insert(hash.Parse(scanner.Text())) } } if assert.Len(absent, len(chnx)) { for _, c := range chnx { assert.True(absent.Has(c.Hash())) } assert.False(absent.Has(present.Hash())) } } func TestHandleGetRoot(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} cs := storage.NewView() c := chunks.NewChunk([]byte("abc")) cs.Put(c) assert.True(cs.Commit(c.Hash(), hash.Hash{})) w := httptest.NewRecorder() HandleRootGet(w, newRequest("GET", "", "", nil, nil), params{}, storage.NewView()) if assert.Equal(http.StatusOK, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { root := hash.Parse(string(w.Body.Bytes())) assert.Equal(c.Hash(), root) } } func TestHandleGetBase(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} w := httptest.NewRecorder() HandleBaseGet(w, newRequest("GET", "", "", nil, nil), params{}, storage.NewView()) if assert.Equal(http.StatusOK, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) { assert.Equal([]byte(nomsBaseHTML), w.Body.Bytes()) } } func TestHandlePostRoot(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} cs := storage.NewView() vs := types.NewValueStore(cs) validate := func(code int, root hash.Hash, w *httptest.ResponseRecorder) { assert.Equal(code, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) assert.Equal(root, hash.Parse(string(w.Body.Bytes()))) } // Empty -> Empty should be OK. url := buildPostRootURL(hash.Hash{}, hash.Hash{}) w := httptest.NewRecorder() HandleRootPost(w, newRequest("POST", "", url, nil, nil), params{}, storage.NewView()) validate(http.StatusOK, hash.Hash{}, w) commit := buildTestCommit(vs, types.String("head")) commitRef := vs.WriteValue(commit) firstHead := types.NewMap(vs, types.String("dataset1"), types.ToRefOfValue(commitRef)) firstHeadRef := vs.WriteValue(firstHead) vs.Commit(vs.Root(), vs.Root()) commit = buildTestCommit(vs, types.String("second"), commitRef) newHead := types.NewMap(vs, types.String("dataset1"), types.ToRefOfValue(vs.WriteValue(commit))) newHeadRef := vs.WriteValue(newHead) vs.Commit(vs.Root(), vs.Root()) // First attempt should fail, as 'last' won't match. url = buildPostRootURL(newHeadRef.TargetHash(), firstHeadRef.TargetHash()) w = httptest.NewRecorder() HandleRootPost(w, newRequest("POST", "", url, nil, nil), params{}, storage.NewView()) validate(http.StatusConflict, hash.Hash{}, w) // Now, update the root manually to 'last' and try again. assert.True(cs.Commit(firstHeadRef.TargetHash(), hash.Hash{})) w = httptest.NewRecorder() HandleRootPost(w, newRequest("POST", "", url, nil, nil), params{}, storage.NewView()) validate(http.StatusOK, newHeadRef.TargetHash(), w) } func buildPostRootURL(current, last hash.Hash) string { u := &url.URL{} queryParams := url.Values{} queryParams.Add("last", last.String()) queryParams.Add("current", current.String()) u.RawQuery = queryParams.Encode() return u.String() } func buildTestCommit(vrw types.ValueReadWriter, v types.Value, parents ...types.Value) types.Struct { return NewCommit(v, types.NewSet(vrw, parents...), types.NewStruct("Meta", types.StructData{})) } func TestRejectPostRoot(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} cs := storage.NewView() vs := types.NewValueStore(cs) defer vs.Close() newHead := types.NewMap(vs, types.String("dataset1"), types.String("Not a Head")) chunk := types.EncodeValue(newHead) cs.Put(chunk) persistChunks(cs) // Attempt should fail, as newHead isn't the right type. url := buildPostRootURL(chunk.Hash(), hash.Hash{}) w := httptest.NewRecorder() HandleRootPost(w, newRequest("POST", "", url, nil, nil), params{}, storage.NewView()) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) // Put in a legit commit commit := buildTestCommit(vs, types.String("commit")) head := types.NewMap(vs, types.String("dataset1"), types.ToRefOfValue(vs.WriteValue(commit))) headRef := vs.WriteValue(head) assert.True(vs.Commit(headRef.TargetHash(), vs.Root())) // Attempt to update head to empty hash should fail url = buildPostRootURL(hash.Hash{}, headRef.TargetHash()) w = httptest.NewRecorder() HandleRootPost(w, newRequest("POST", "", url, nil, nil), params{}, storage.NewView()) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) // Attempt to update from a non-present chunks should fail url = buildPostRootURL(headRef.TargetHash(), chunks.EmptyChunk.Hash()) w = httptest.NewRecorder() HandleRootPost(w, newRequest("POST", "", url, nil, nil), params{}, storage.NewView()) assert.Equal(http.StatusBadRequest, w.Code, "Handler error:\n%s", string(w.Body.Bytes())) } type params map[string]string func (p params) ByName(k string) string { return p[k] } ================================================ FILE: go/datas/serialize_hashes.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "encoding/binary" "io" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) func serializedLength(batch chunks.ReadBatch) uint32 { return uint32(len(batch)*hash.ByteLen + binary.Size(uint32(0))) } func serializeHashes(w io.Writer, batch chunks.ReadBatch) { err := binary.Write(w, binary.BigEndian, uint32(len(batch))) // 4 billion hashes is probably absurd. Maybe this should be smaller? d.PanicIfError(err) for h := range batch { serializeHash(w, h) } } func serializeHash(w io.Writer, h hash.Hash) { _, err := w.Write(h[:]) d.PanicIfError(err) } func deserializeHashes(reader io.Reader) hash.HashSlice { count := uint32(0) err := binary.Read(reader, binary.BigEndian, &count) d.PanicIfError(err) hashes := make(hash.HashSlice, count) for i := range hashes { hashes[i] = deserializeHash(reader) } return hashes } func deserializeHash(reader io.Reader) hash.Hash { h := hash.Hash{} n, err := io.ReadFull(reader, h[:]) d.PanicIfError(err) d.PanicIfFalse(int(hash.ByteLen) == n) return h } ================================================ FILE: go/datas/serialize_hashes_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datas import ( "bytes" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestHashRoundTrip(t *testing.T) { b := &bytes.Buffer{} input := chunks.ReadBatch{ hash.Parse("00000000000000000000000000000000"): nil, hash.Parse("00000000000000000000000000000001"): nil, hash.Parse("00000000000000000000000000000002"): nil, hash.Parse("00000000000000000000000000000003"): nil, } defer input.Close() serializeHashes(b, input) serializedLen := b.Len() output := deserializeHashes(b) assert.Len(t, output, len(input), "Output has different number of elements than input: %v, %v", output, input) for _, h := range output { _, present := input[h] assert.True(t, present, "%s is in output but not in input", h) } assert.Equal(t, uint32(serializedLen), serializedLength(input)) } ================================================ FILE: go/diff/apply_patch.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "fmt" "sort" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // Apply applies a Patch (list of diffs) to a graph. It fulfills the // following contract: // Given 2 Noms graphs: a1 and a2: // ApplyPatch(a1, Diff(a1, a2)) == a2 // This is useful for IncrementalUpdate() and possibly other problems. See // updater.go for more information. // // This function uses a patchStack to maintain state of the graph as it cycles // through the diffs in a patch, applying them to 'root' one by one. Because the // Difference objects in the patch can be sorted according to their path, each // one is applied in order. When done in combination with the stack, this enables // all Differences that change a particular node to be applied to that node // before it gets assigned back to it's parent. func Apply(root types.Value, patch Patch) types.Value { if len(patch) == 0 { return root } var lastPath types.Path stack := patchStack{} sort.Sort(patch) // Push the element on the stack that corresponds to the root // node. stack.push(nil, nil, types.DiffChangeModified, root, nil, nil) for _, dif := range patch { // get the path where this dif needs to be applied p := dif.Path // idx will hold the index of the last common element between p and // lastPath (p from the last iteration). var idx int // p can be identical to lastPath in certain cases. For example, when // one item gets removed from a list at the same place another item // is added to it. In this case, we need pop the last operation of the // stack early and set the idx to be the len(p) - 1. // Otherwise, if the paths are different we can call commonPrefixCount() if len(p) > 0 && p.Equals(lastPath) { stack.pop() idx = len(p) - 1 } else { idx = commonPrefixCount(lastPath, p) } lastPath = p // if the stack has elements on it leftover from the last iteration. Pop // those elements until the stack only has values in it that are // referenced by this p. Popping an element on the stack, folds that // value into it's parent. for idx < stack.Len()-1 { stack.pop() } // tail is the part of the current path that has not yet been pushed // onto the stack. Iterate over those pathParts and push those values // onto the stack. tail := p[idx:] for i, pp := range tail { top := stack.top() parent := top.newestValue() oldValue := pp.Resolve(parent, nil) var newValue types.Value if i == len(tail)-1 { // last pathPart in this path newValue = oldValue oldValue = dif.OldValue } // Any intermediate elements on the stack will have a changeType // of modified. Leaf elements will be updated below to reflect the // actual changeType. stack.push(p, pp, types.DiffChangeModified, oldValue, newValue, dif.NewKeyValue) } // Update the top element in the stack with changeType from the dif and // the NewValue from the diff se := stack.top() se.newValue = dif.NewValue se.changeType = dif.ChangeType } // We're done applying diffs to the graph. Pop any elements left on the // stack and return the new root. var newRoot stackElem for stack.Len() > 0 { newRoot = stack.pop() } return newRoot.newValue } // updateNode handles the actual update of a node. It uses 'pp' to get the // information that it needs to update 'parent' with 'newVal'. 'oldVal' is also // passed in so that Sets can be updated correctly. This function is used by // the patchStack Pop() function to merge values into a new graph. func (stack *patchStack) updateNode(top *stackElem, parent types.Value) types.Value { d.PanicIfTrue(parent == nil) switch part := top.pathPart.(type) { case types.FieldPath: switch top.changeType { case types.DiffChangeAdded: return parent.(types.Struct).Set(part.Name, top.newValue) case types.DiffChangeRemoved: return parent.(types.Struct).Delete(part.Name) case types.DiffChangeModified: return parent.(types.Struct).Set(part.Name, top.newValue) } case types.IndexPath: switch el := parent.(type) { case types.List: idx := uint64(part.Index.(types.Number)) offset := stack.adjustIndexOffset(top.path, top.changeType) realIdx := idx + uint64(offset) var nv types.Value switch top.changeType { case types.DiffChangeAdded: if realIdx > el.Len() { nv = el.Edit().Append(top.newValue).List() } else { nv = el.Edit().Insert(realIdx, top.newValue).List() } case types.DiffChangeRemoved: nv = el.Edit().RemoveAt(realIdx).List() case types.DiffChangeModified: nv = el.Edit().Set(realIdx, top.newValue).List() } return nv case types.Map: switch top.changeType { case types.DiffChangeAdded: return el.Edit().Set(part.Index, top.newValue).Map() case types.DiffChangeRemoved: return el.Edit().Remove(part.Index).Map() case types.DiffChangeModified: if part.IntoKey { newPart := types.IndexPath{Index: part.Index} ov := newPart.Resolve(parent, nil) return el.Edit().Remove(part.Index).Set(top.newValue, ov).Map() } return el.Edit().Set(part.Index, top.newValue).Map() } case types.Set: if top.oldValue != nil { el = el.Edit().Remove(top.oldValue).Set() } if top.newValue != nil { el = el.Edit().Insert(top.newValue).Set() } return el } case types.HashIndexPath: switch el := parent.(type) { case types.Set: switch top.changeType { case types.DiffChangeAdded: return el.Edit().Insert(top.newValue).Set() case types.DiffChangeRemoved: return el.Edit().Remove(top.oldValue).Set() case types.DiffChangeModified: return el.Edit().Remove(top.oldValue).Insert(top.newValue).Set() } case types.Map: keyPart := types.HashIndexPath{Hash: part.Hash, IntoKey: true} k := keyPart.Resolve(parent, nil) switch top.changeType { case types.DiffChangeAdded: k := top.newKeyValue return el.Edit().Set(k, top.newValue).Map() case types.DiffChangeRemoved: return el.Edit().Remove(k).Map() case types.DiffChangeModified: if part.IntoKey { v := el.Get(k) return el.Edit().Remove(k).Set(top.newValue, v).Map() } return el.Edit().Set(k, top.newValue).Map() } } } panic(fmt.Sprintf("unreachable, pp.(type): %T", top.pathPart)) } // Returns the count of the number of PathParts that two paths have in a common // prefix. The paths '.field1' and '.field2' have a 0 length common prefix. // Todo: move to types.Path? func commonPrefixCount(p1, p2 types.Path) int { cnt := 0 for i, pp1 := range p1 { var pp2 types.PathPart if i < len(p2) { pp2 = p2[i] } if pp1 != pp2 { return cnt } cnt += 1 } return cnt } type stackElem struct { path types.Path pathPart types.PathPart // from parent Value to this Value changeType types.DiffChangeType oldValue types.Value // can be nil if newValue is not nil newValue types.Value // can be nil if oldValue is not nil newKeyValue types.Value } // newestValue returns newValue if not nil, otherwise oldValue. This is useful // when merging. Elements on the stack were 'push'ed there with the oldValue. // newValue may have been set when a value was 'pop'ed above it. This method // returns the last value that has been set. func (se stackElem) newestValue() types.Value { if se.newValue != nil { return se.newValue } return se.oldValue } type patchStack struct { vals []stackElem lastPath types.Path addCnt int rmCnt int } func (stack *patchStack) push(p types.Path, pp types.PathPart, changeType types.DiffChangeType, oldValue, newValue, newKeyValue types.Value) { stack.vals = append(stack.vals, stackElem{path: p, pathPart: pp, changeType: changeType, oldValue: oldValue, newValue: newValue, newKeyValue: newKeyValue}) } func (stack *patchStack) top() *stackElem { return &stack.vals[len(stack.vals)-1] } // pop applies the change to the graph. When an element is 'pop'ed from the stack, // this function uses the pathPart to merge that value into it's parent. func (stack *patchStack) pop() stackElem { top := stack.top() stack.vals = stack.vals[:len(stack.vals)-1] if stack.Len() > 0 { newTop := stack.top() parent := newTop.newestValue() newTop.newValue = stack.updateNode(top, parent) } return *top } func (stack *patchStack) Len() int { return len(stack.vals) } // adjustIndexOffset returns an offset that needs to be added to list indexes // when applying diffs to lists. Diffs are applied to lists beginning at the 0th // element. Changes to the list mean that subsequent changes to the same list // have to be adjusted accordingly. The stack keeps state for each list as it's // processed so updateNode() can get the correct index. // Whenever a list is encountered, diffs consist of add & remove operations. The // offset is calculated by keeping a count of each add & remove. Due to the way // way diffs are calculated, no offset is ever needed for 'add' operations. The // offset for 'remove' operations are calculated as: // stack.addCnt - stack.rmCnt func (stack *patchStack) adjustIndexOffset(p types.Path, changeType types.DiffChangeType) (res int) { parentPath := p[:len(p)-1] // parentPath is different than the last parentPath so reset counters if stack.lastPath == nil || !stack.lastPath.Equals(parentPath) { stack.lastPath = parentPath stack.addCnt = 0 stack.rmCnt = 0 } // offset for 'Add' operations are always 0, 'Remove' ops offset are // calculated here if changeType == types.DiffChangeRemoved { res = stack.addCnt - stack.rmCnt } // Bump up the appropriate cnt for this operation. switch changeType { case types.DiffChangeAdded: stack.addCnt += 1 case types.DiffChangeRemoved: stack.rmCnt += 1 } return } ================================================ FILE: go/diff/apply_patch_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestCommonPrefixCount(t *testing.T) { assert := assert.New(t) testCases := [][]interface{}{ {".value[#94a2oa20oka0jdv5lha03vuvvumul1vb].sizes[#316j9oc39b09fbc2qf3klenm6p1o1d7h]", 0}, {".value[#94a2oa20oka0jdv5lha03vuvvumul1vb].sizes[#77eavttned7llu1pkvhaei9a9qgcagir]", 3}, {".value[#94a2oa20oka0jdv5lha03vuvvumul1vb].sizes[#hboaq9581drq4g9jf62d3s06al3us49s]", 3}, {".value[#94a2oa20oka0jdv5lha03vuvvumul1vb].sizes[#l0hpa7sbr7qutrcfn5173kar4j2847m1]", 3}, {".value[#9vj5m3049mav94bttcujhgfdfqcavsbn].sizes[#33f6tb4h8agh57s2bqlmi9vbhlkbtmct]", 1}, {".value[#9vj5m3049mav94bttcujhgfdfqcavsbn].sizes[#a43ne9a8kotcqph4up5pqqdmr1e1qcsl]", 3}, {".value[#9vj5m3049mav94bttcujhgfdfqcavsbn].sizes[#ppqg6pem2sb64h2i2ptnh8ckj8gogj9h]", 3}, {".value[#9vj5m3049mav94bttcujhgfdfqcavsbn].sizes[#s7r2vpnqlk20sd72mg8ijerg9cmauaqo]", 3}, {".value[#bpspmmlc41pk0r144a7682oah0tmge1e].sizes[#9vuc1gg3c3eude5v3j5deqopjsobe3no]", 1}, {".value[#bpspmmlc41pk0r144a7682oah0tmge1e].sizes[#qo3gfdsf14v3dh0oer82vn1bg4o8nlsc]", 3}, {".value[#bpspmmlc41pk0r144a7682oah0tmge1e].sizes[#rlidki5ipbjdofsm2rq3a66v908m5fpl]", 3}, {".value[#bpspmmlc41pk0r144a7682oah0tmge1e].sizes[#st1n96rh89c2vgo090dt9lknd5ip4kck]", 3}, {".value[#hjh5hpn55591k0gjvgckc14erli968ao].sizes[#267889uv3mtih6fij3fhio2jiqtl6nho]", 1}, {".value[#hjh5hpn55591k0gjvgckc14erli968ao].sizes[#7ncb7guoip9e400bm2lcvr0dda29o9jn]", 3}, {".value[#hjh5hpn55591k0gjvgckc14erli968ao].sizes[#afscb0on7rt8bq6eutup8juusmid7i96]", 3}, {".value[#hjh5hpn55591k0gjvgckc14erli968ao].sizes[#drqe4lr0vdfdtmvejsjun1l3mfv6ums5]", 3}, } var lastPath types.Path for i, tc := range testCases { path, expected := tc[0].(string), tc[1].(int) p, err := types.ParsePath(path) assert.NoError(err) assert.Equal(expected, commonPrefixCount(lastPath, p), "failed for paths[%d]: %s", i, path) lastPath = p } } type testFunc func(parent types.Value) types.Value type testKey struct { X, Y int } var ( vm map[string]types.Value ) func vfk(keys ...string) []types.Value { var values []types.Value for _, k := range keys { values = append(values, vm[k]) } return values } func testValues(vrw types.ValueReadWriter) map[string]types.Value { if vm == nil { vm = map[string]types.Value{ "k1": types.String("k1"), "k2": types.String("k2"), "k3": types.String("k3"), "s1": types.String("string1"), "s2": types.String("string2"), "s3": types.String("string3"), "s4": types.String("string4"), "n1": types.Number(1), "n2": types.Number(2), "n3": types.Number(3.3), "n4": types.Number(4.4), "b1": mustMarshal(true), "b2": mustMarshal(false), "l1": mustMarshal([]string{}), "l2": mustMarshal([]string{"one", "two", "three", "four"}), "l3": mustMarshal([]string{"two", "three", "four", "five"}), "l4": mustMarshal([]string{"two", "three", "four"}), "l5": mustMarshal([]string{"one", "two", "three", "four", "five"}), "l6": mustMarshal([]string{"one", "four"}), "struct1": types.NewStruct("test1", types.StructData{"f1": types.Number(1), "f2": types.Number(2)}), "struct2": types.NewStruct("test1", types.StructData{"f1": types.Number(11111), "f2": types.Number(2)}), "struct3": types.NewStruct("test1", types.StructData{"f1": types.Number(1), "f2": types.Number(2), "f3": types.Number(3)}), "struct4": types.NewStruct("test1", types.StructData{"f2": types.Number(2)}), "m1": mustMarshal(map[string]int{}), "m2": mustMarshal(map[string]int{"k1": 1, "k2": 2, "k3": 3}), "m3": mustMarshal(map[string]int{"k2": 2, "k3": 3, "k4": 4}), "m4": mustMarshal(map[string]int{"k1": 1, "k3": 3}), "m5": mustMarshal(map[string]int{"k1": 1, "k2": 2222, "k3": 3}), "ms1": mustMarshal(map[testKey]int{{1, 1}: 1, {2, 2}: 2, {3, 3}: 3}), "ms2": mustMarshal(map[testKey]int{{1, 1}: 1, {4, 4}: 4, {5, 5}: 5}), } vm["mh1"] = types.NewMap(vrw, vfk("k1", "struct1", "k2", "l1")...) vm["mh2"] = types.NewMap(vrw, vfk("k1", "n1", "k2", "l2", "k3", "l3")...) vm["set1"] = types.NewSet(vrw) vm["set2"] = types.NewSet(vrw, vfk("s1", "s2")...) vm["set3"] = types.NewSet(vrw, vfk("s1", "s2", "s3")...) vm["set1"] = types.NewSet(vrw, vfk("s2")...) vm["seth1"] = types.NewSet(vrw, vfk("struct1", "struct2", "struct3")...) vm["seth2"] = types.NewSet(vrw, vfk("struct2", "struct3")...) vm["setj3"] = types.NewSet(vrw, vfk("struct1")...) vm["mk1"] = types.NewMap(vrw, vfk("struct1", "s1", "struct2", "s2")...) vm["mk2"] = types.NewMap(vrw, vfk("struct1", "s3", "struct4", "s4")...) } return vm } func newTestValueStore() *types.ValueStore { st := &chunks.TestStorage{} return types.NewValueStore(st.NewView()) } func getPatch(g1, g2 types.Value) Patch { dChan := make(chan Difference) sChan := make(chan struct{}) go func() { Diff(g1, g2, dChan, sChan, true) close(dChan) }() patch := Patch{} for dif := range dChan { patch = append(patch, dif) } return patch } func checkApplyPatch(assert *assert.Assertions, g1, expectedG2 types.Value, k1, k2 string) { patch := getPatch(g1, expectedG2) g2 := Apply(g1, patch) assert.True(expectedG2.Equals(g2), "failed to apply diffs for k1: %s and k2: %s", k1, k2) } func TestPatches(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() cnt := 0 for k1, g1 := range testValues(vs) { for k2, expectedG2 := range testValues(vs) { if k1 != k2 { cnt++ checkApplyPatch(assert, g1, expectedG2, k1, k2) } } } } func TestNestedLists(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() ol1 := mustMarshal([]string{"one", "two", "three", "four"}) nl1 := mustMarshal([]string{"two", "three"}) ol2 := mustMarshal([]int{2, 3}) nl2 := mustMarshal([]int{1, 2, 3, 4}) nl3 := mustMarshal([]bool{true, false, true}) g1 := types.NewList(vs, ol1, ol2) g2 := types.NewList(vs, nl1, nl2, nl3) checkApplyPatch(assert, g1, g2, "g1", "g2") } func TestUpdateNode(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() doTest := func(pp types.PathPart, parent, ov, nv, exp types.Value, f testFunc) { stack := &patchStack{} se := &stackElem{path: []types.PathPart{pp}, pathPart: pp, changeType: types.DiffChangeModified, oldValue: ov, newValue: nv} updated := stack.updateNode(se, parent) testVal := f(updated) assert.True(exp.Equals(testVal), "%s != %s", nv, testVal) } var pp types.PathPart oldVal := types.String("Yo") newVal := types.String("YooHoo") s1 := types.NewStruct("TestStruct", types.StructData{"f1": types.Number(1), "f2": oldVal}) pp = types.FieldPath{Name: "f2"} doTest(pp, s1, oldVal, newVal, newVal, func(parent types.Value) types.Value { return parent.(types.Struct).Get("f2") }) l1 := types.NewList(vs, types.String("one"), oldVal, types.String("three")) pp = types.IndexPath{Index: types.Number(1)} doTest(pp, l1, oldVal, newVal, newVal, func(parent types.Value) types.Value { return parent.(types.List).Get(1) }) m1 := types.NewMap(vs, types.String("k1"), types.Number(1), types.String("k2"), oldVal) pp = types.IndexPath{Index: types.String("k2")} doTest(pp, m1, oldVal, newVal, newVal, func(parent types.Value) types.Value { return parent.(types.Map).Get(types.String("k2")) }) k1 := types.NewStruct("Sizes", types.StructData{"height": types.Number(200), "width": types.Number(300)}) vs.WriteValue(k1) m1 = types.NewMap(vs, k1, oldVal) pp = types.HashIndexPath{Hash: k1.Hash()} doTest(pp, m1, oldVal, newVal, newVal, func(parent types.Value) types.Value { return parent.(types.Map).Get(k1) }) set1 := types.NewSet(vs, oldVal, k1) pp = types.IndexPath{Index: oldVal} exp := types.NewSet(vs, newVal, k1) doTest(pp, set1, oldVal, newVal, exp, func(parent types.Value) types.Value { return parent }) k2 := types.NewStruct("Sizes", types.StructData{"height": types.Number(300), "width": types.Number(500)}) set1 = types.NewSet(vs, oldVal, k1) pp = types.HashIndexPath{Hash: k1.Hash()} exp = types.NewSet(vs, oldVal, k2) doTest(pp, set1, k1, k2, exp, func(parent types.Value) types.Value { return parent }) } func checkApplyDiffs(a *assert.Assertions, n1, n2 types.Value, leftRight bool) { dChan := make(chan Difference) sChan := make(chan struct{}) go func() { Diff(n1, n2, dChan, sChan, leftRight) close(dChan) }() difs := Patch{} for dif := range dChan { difs = append(difs, dif) } res := Apply(n1, difs) a.True(n2.Equals(res)) } func tryApplyDiff(a *assert.Assertions, a1, a2 interface{}) { n1 := mustMarshal(a1) n2 := mustMarshal(a2) checkApplyDiffs(a, n1, n2, true) checkApplyDiffs(a, n1, n2, false) checkApplyDiffs(a, n2, n1, true) checkApplyDiffs(a, n2, n1, false) } func TestUpdateList(t *testing.T) { a := assert.New(t) // insert at beginning a1 := []interface{}{"five", "ten", "fifteen"} a2 := []interface{}{"one", "two", "three", "five", "ten", "fifteen"} tryApplyDiff(a, a1, a2) // append at end a1 = []interface{}{"five", "ten", "fifteen"} a2 = []interface{}{"five", "ten", "fifteen", "twenty", "twenty-five"} tryApplyDiff(a, a1, a2) // insert interleaved a1 = []interface{}{"one", "three", "five", "seven"} a2 = []interface{}{"one", "two", "three", "four", "five", "six", "seven"} tryApplyDiff(a, a1, a2) // delete from beginning and append to end a1 = []interface{}{"one", "two", "three", "four", "five"} a2 = []interface{}{"four", "five", "six", "seven"} tryApplyDiff(a, a1, a2) // replace entries at beginning a1 = []interface{}{"one", "two", "three", "four", "five"} a2 = []interface{}{"3.5", "four", "five"} tryApplyDiff(a, a1, a2) // replace entries at end a1 = []interface{}{"one", "two", "three"} a2 = []interface{}{"one", "four"} tryApplyDiff(a, a1, a2) // insert at beginning, replace at end a1 = []interface{}{"five", "ten", "fifteen"} a2 = []interface{}{"one", "two", "five", "eight", "eleven", "sixteen", "twenty"} tryApplyDiff(a, a1, a2) // remove everything a1 = []interface{}{"five", "ten", "fifteen"} a2 = []interface{}{} tryApplyDiff(a, a1, a2) } func TestUpdateMap(t *testing.T) { a := assert.New(t) // insertions, deletions, and replacements a1 := map[string]int{"five": 5, "ten": 10, "fifteen": 15, "twenty": 20} a2 := map[string]int{"one": 1, "two": 2, "three": 3, "five": 5, "ten": 10, "fifteen": 15, "twenty": 2020} tryApplyDiff(a, a1, a2) // delete everything a1 = map[string]int{"five": 5, "ten": 10, "fifteen": 15, "twenty": 20} a2 = map[string]int{} tryApplyDiff(a, a1, a2) } func TestUpdateStruct(t *testing.T) { a := assert.New(t) a1 := types.NewStruct("tStruct", types.StructData{ "f1": types.Number(1), "f2": types.String("two"), "f3": mustMarshal([]string{"one", "two", "three"}), }) a2 := types.NewStruct("tStruct", types.StructData{ "f1": types.Number(2), "f2": types.String("twotwo"), "f3": mustMarshal([]interface{}{0, "one", 1, "two", 2, "three", 3}), }) checkApplyDiffs(a, a1, a2, true) checkApplyDiffs(a, a1, a2, false) a2 = types.NewStruct("tStruct", types.StructData{ "f1": types.Number(2), "f2": types.String("two"), "f3": mustMarshal([]interface{}{0, "one", 1, "two", 2, "three", 3}), "f4": types.Bool(true), }) checkApplyDiffs(a, a1, a2, true) checkApplyDiffs(a, a1, a2, false) } func TestUpdateSet(t *testing.T) { a := assert.New(t) vs := newTestValueStore() defer vs.Close() a1 := types.NewSet(vs, types.Number(1), types.String("two"), mustMarshal([]string{"one", "two", "three"})) a2 := types.NewSet(vs, types.Number(3), types.String("three"), mustMarshal([]string{"one", "two", "three", "four"})) checkApplyDiffs(a, a1, a2, true) checkApplyDiffs(a, a1, a2, false) checkApplyDiffs(a, a2, a1, true) checkApplyDiffs(a, a2, a1, false) } func mustMarshal(v interface{}) types.Value { vs := newTestValueStore() defer vs.Close() v1, err := marshal.Marshal(vs, v) d.Chk.NoError(err) return v1 } ================================================ FILE: go/diff/diff.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "github.com/attic-labs/noms/go/types" ) type ( diffFunc func(changeChan chan<- types.ValueChanged, stopChan <-chan struct{}) pathPartFunc func(v types.Value) types.PathPart valueFunc func(k types.Value) types.Value ) // Difference represents a "diff" between two Noms graphs. type Difference struct { // Path to the Value that has changed Path types.Path // ChangeType indicates the type of diff: modified, added, deleted ChangeType types.DiffChangeType // OldValue is Value before the change, can be nil if Value was added OldValue types.Value // NewValue is Value after the change, can be nil if Value was removed NewValue types.Value // NewKeyValue is used for when elements are added to diffs with a // non-primitive key. The new key must available when the map gets updated. NewKeyValue types.Value } func (dif Difference) IsEmpty() bool { return dif.Path == nil && dif.OldValue == nil && dif.NewValue == nil } // differ is used internally to hold information necessary for diffing two graphs. type differ struct { // Channel used to send Difference objects back to caller diffChan chan<- Difference // Channel that caller should close() to terminate Diff function. stopChan chan struct{} // Use LeftRight diff as opposed to TopDown leftRight bool } // Diff traverses two graphs simultaneously looking for differences. It returns // two channels: a DiffReceiveChan that the caller can use to iterate over the // diffs in the graph and a StopSendChanel that a caller can use to signal the // Diff function to stop processing. // Diff returns the Differences in depth-first first order. A 'diff' is defined // as one of the following conditions: // * a Value is Added or Removed from a node in the graph // * the type of a Value has changed in the graph // * a primitive (i.e. Bool, Number, String, Ref or Blob) Value has changed // // A Difference is not returned when a non-primitive value has been modified. For // example, a struct field has been changed from one Value of type Employee to // another. Those modifications are accounted for by the Differences described // above at a lower point in the graph. // // If leftRight is true then the left-right diff is used for ordered sequences // - see Diff vs DiffLeftRight in Set and Map. // // Note: the function sends messages on diffChan and checks whether stopChan has // been closed to know if it needs to terminate diffing early. To function // properly it needs to be executed concurrently with code that reads values from // diffChan. The following is a typical invocation of Diff(): // dChan := make(chan Difference) // sChan := make(chan struct{}) // go func() { // d.Diff(s3, s4, dChan, sChan, leftRight) // close(dChan) // }() // for dif := range dChan { // // } func Diff(v1, v2 types.Value, dChan chan<- Difference, stopChan chan struct{}, leftRight bool) { d := differ{diffChan: dChan, stopChan: stopChan, leftRight: leftRight} if !v1.Equals(v2) { if !shouldDescend(v1, v2) { d.sendDiff(Difference{Path: nil, ChangeType: types.DiffChangeModified, OldValue: v1, NewValue: v2}) } else { d.diff(nil, v1, v2) } } } func (d differ) diff(p types.Path, v1, v2 types.Value) bool { switch v1.Kind() { case types.ListKind: return d.diffLists(p, v1.(types.List), v2.(types.List)) case types.MapKind: return d.diffMaps(p, v1.(types.Map), v2.(types.Map)) case types.SetKind: return d.diffSets(p, v1.(types.Set), v2.(types.Set)) case types.StructKind: return d.diffStructs(p, v1.(types.Struct), v2.(types.Struct)) default: panic("Unrecognized type in diff function") } } func (d differ) diffLists(p types.Path, v1, v2 types.List) (stop bool) { spliceChan := make(chan types.Splice) stopChan := make(chan struct{}, 1) // buffer size of 1s, so this won't block if diff already finished go func() { v2.Diff(v1, spliceChan, stopChan) close(spliceChan) }() for splice := range spliceChan { if stop { break } if splice.SpRemoved == splice.SpAdded { // Heuristic: list only has modifications. for i := uint64(0); i < splice.SpRemoved; i++ { lastEl := v1.Get(splice.SpAt + i) newEl := v2.Get(splice.SpFrom + i) if shouldDescend(lastEl, newEl) { idx := types.Number(splice.SpAt + i) stop = d.diff(append(p, types.NewIndexPath(idx)), lastEl, newEl) } else { p1 := p.Append(types.NewIndexPath(types.Number(splice.SpAt + i))) dif := Difference{p1, types.DiffChangeModified, v1.Get(splice.SpAt + i), v2.Get(splice.SpFrom + i), nil} stop = !d.sendDiff(dif) } } continue } // Heuristic: list only has additions/removals. for i := uint64(0); i < splice.SpRemoved && !stop; i++ { p1 := p.Append(types.NewIndexPath(types.Number(splice.SpAt + i))) dif := Difference{Path: p1, ChangeType: types.DiffChangeRemoved, OldValue: v1.Get(splice.SpAt + i), NewValue: nil} stop = !d.sendDiff(dif) } for i := uint64(0); i < splice.SpAdded && !stop; i++ { p1 := p.Append(types.NewIndexPath(types.Number(splice.SpFrom + i))) dif := Difference{Path: p1, ChangeType: types.DiffChangeAdded, OldValue: nil, NewValue: v2.Get(splice.SpFrom + i)} stop = !d.sendDiff(dif) } } if stop { stopChan <- struct{}{} // Wait for diff to stop. for range spliceChan { } } return } func (d differ) diffMaps(p types.Path, v1, v2 types.Map) bool { return d.diffOrdered(p, func(v types.Value) types.PathPart { if types.ValueCanBePathIndex(v) { return types.NewIndexPath(v) } else { return types.NewHashIndexPath(v.Hash()) } }, func(cc chan<- types.ValueChanged, sc <-chan struct{}) { if d.leftRight { v2.DiffLeftRight(v1, cc, sc) } else { v2.DiffHybrid(v1, cc, sc) } }, func(k types.Value) types.Value { return k }, func(k types.Value) types.Value { return v1.Get(k) }, func(k types.Value) types.Value { return v2.Get(k) }, ) } func (d differ) diffStructs(p types.Path, v1, v2 types.Struct) bool { str := func(v types.Value) string { return string(v.(types.String)) } return d.diffOrdered(p, func(v types.Value) types.PathPart { return types.NewFieldPath(str(v)) }, func(cc chan<- types.ValueChanged, sc <-chan struct{}) { v2.Diff(v1, cc, sc) }, func(k types.Value) types.Value { return k }, func(k types.Value) types.Value { return v1.Get(str(k)) }, func(k types.Value) types.Value { return v2.Get(str(k)) }, ) } func (d differ) diffSets(p types.Path, v1, v2 types.Set) bool { return d.diffOrdered(p, func(v types.Value) types.PathPart { if types.ValueCanBePathIndex(v) { return types.NewIndexPath(v) } return types.NewHashIndexPath(v.Hash()) }, func(cc chan<- types.ValueChanged, sc <-chan struct{}) { if d.leftRight { v2.DiffLeftRight(v1, cc, sc) } else { v2.DiffHybrid(v1, cc, sc) } }, func(k types.Value) types.Value { return k }, func(k types.Value) types.Value { return k }, func(k types.Value) types.Value { return k }, ) } func (d differ) diffOrdered(p types.Path, ppf pathPartFunc, df diffFunc, kf, v1, v2 valueFunc) (stop bool) { changeChan := make(chan types.ValueChanged) stopChan := make(chan struct{}, 1) // buffer size of 1, so this won't block if diff already finished go func() { df(changeChan, stopChan) close(changeChan) }() for change := range changeChan { if stop { break } k := kf(change.Key) p1 := p.Append(ppf(k)) switch change.ChangeType { case types.DiffChangeAdded: dif := Difference{Path: p1, ChangeType: types.DiffChangeAdded, OldValue: nil, NewValue: v2(change.Key), NewKeyValue: k} stop = !d.sendDiff(dif) case types.DiffChangeRemoved: dif := Difference{Path: p1, ChangeType: types.DiffChangeRemoved, OldValue: v1(change.Key), NewValue: nil} stop = !d.sendDiff(dif) case types.DiffChangeModified: c1, c2 := v1(change.Key), v2(change.Key) if shouldDescend(c1, c2) { stop = d.diff(p1, c1, c2) } else { dif := Difference{Path: p1, ChangeType: types.DiffChangeModified, OldValue: c1, NewValue: c2} stop = !d.sendDiff(dif) } default: panic("unknown change type") } } if stop { stopChan <- struct{}{} for range changeChan { } } return } // shouldDescend returns true, if Value is not primitive or is a Ref. func shouldDescend(v1, v2 types.Value) bool { kind := v1.Kind() return !types.IsPrimitiveKind(kind) && kind == v2.Kind() && kind != types.RefKind } // stopSent returns true if a message has been sent to this StopChannel func (d differ) sendDiff(dif Difference) bool { select { case <-d.stopChan: return false case d.diffChan <- dif: return true } } ================================================ FILE: go/diff/diff_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "bytes" "fmt" "strings" "testing" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/test" "github.com/attic-labs/noms/go/util/writers" "github.com/stretchr/testify/assert" ) var ( aa1 = createMap("a1", "a-one", "a2", "a-two", "a3", "a-three", "a4", "a-four") aa1x = createMap("a1", "a-one-diff", "a2", "a-two", "a3", "a-three", "a4", "a-four") mm1 = createMap("k1", "k-one", "k2", "k-two", "k3", "k-three", "k4", aa1) mm2 = createMap("l1", "l-one", "l2", "l-two", "l3", "l-three", "l4", aa1) mm3 = createMap("m1", "m-one", "v2", "m-two", "m3", "m-three", "m4", aa1) mm3x = createMap("m1", "m-one", "v2", "m-two", "m3", "m-three-diff", "m4", aa1x) mm4 = createMap("n1", "n-one", "n2", "n-two", "n3", "n-three", "n4", aa1) ) func valToTypesValue(v interface{}) types.Value { var v1 types.Value switch t := v.(type) { case string: v1 = types.String(t) case int: v1 = types.Number(t) case types.Value: v1 = t } return v1 } func valsToTypesValues(kv ...interface{}) []types.Value { keyValues := []types.Value{} for _, e := range kv { v := valToTypesValue(e) keyValues = append(keyValues, v) } return keyValues } func createMap(kv ...interface{}) types.Map { vs := newTestValueStore() defer vs.Close() keyValues := valsToTypesValues(kv...) return types.NewMap(vs, keyValues...) } func createSet(kv ...interface{}) types.Set { vs := newTestValueStore() defer vs.Close() keyValues := valsToTypesValues(kv...) return types.NewSet(vs, keyValues...) } func createList(kv ...interface{}) types.List { vs := newTestValueStore() defer vs.Close() keyValues := valsToTypesValues(kv...) return types.NewList(vs, keyValues...) } func createStruct(name string, kv ...interface{}) types.Struct { fields := types.StructData{} for i := 0; i < len(kv); i += 2 { fields[kv[i].(string)] = valToTypesValue(kv[i+1]) } return types.NewStruct(name, fields) } func pathsFromDiff(v1, v2 types.Value, leftRight bool) []string { dChan := make(chan Difference) sChan := make(chan struct{}) go func() { Diff(v1, v2, dChan, sChan, leftRight) close(dChan) }() var paths []string for d := range dChan { paths = append(paths, d.Path.String()) } return paths } func mustParsePath(assert *assert.Assertions, s string) types.Path { if s == "" { return nil } p, err := types.ParsePath(s) assert.NoError(err) return p } func TestNomsDiffPrintMap(t *testing.T) { assert := assert.New(t) expected := `["map-3"] { - "m3": "m-three" + "m3": "m-three-diff" } ["map-3"]["m4"] { - "a1": "a-one" + "a1": "a-one-diff" } ` expectedPaths := []string{ `["map-3"]["m3"]`, `["map-3"]["m4"]["a1"]`, } tf := func(leftRight bool) { m1 := createMap("map-1", mm1, "map-2", mm2, "map-3", mm3, "map-4", mm4) m2 := createMap("map-1", mm1, "map-2", mm2, "map-3", mm3x, "map-4", mm4) buf := &bytes.Buffer{} PrintDiff(buf, m1, m2, leftRight) assert.Equal(expected, buf.String()) paths := pathsFromDiff(m1, m2, leftRight) assert.Equal(expectedPaths, paths) } tf(true) tf(false) } func TestNomsDiffPrintSet(t *testing.T) { assert := assert.New(t) expected1 := `(root) { - "five" + "five-diff" } ` expectedPaths1 := []string{ `["five"]`, `["five-diff"]`, } expected2 := `(root) { - map { // 4 items - "m1": "m-one", - "m3": "m-three", - "m4": map { // 4 items - "a1": "a-one", - "a2": "a-two", - "a3": "a-three", - "a4": "a-four", - }, - "v2": "m-two", - } + map { // 4 items + "m1": "m-one", + "m3": "m-three-diff", + "m4": map { // 4 items + "a1": "a-one-diff", + "a2": "a-two", + "a3": "a-three", + "a4": "a-four", + }, + "v2": "m-two", + } } ` expectedPaths2 := []string{ fmt.Sprintf("[#%s]", mm3.Hash()), fmt.Sprintf("[#%s]", mm3x.Hash()), } s1 := createSet("one", "three", "five", "seven", "nine") s2 := createSet("one", "three", "five-diff", "seven", "nine") s3 := createSet(mm1, mm2, mm3, mm4) s4 := createSet(mm1, mm2, mm3x, mm4) tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, s1, s2, leftRight) assert.Equal(expected1, buf.String()) paths := pathsFromDiff(s1, s2, leftRight) assert.Equal(expectedPaths1, paths) buf = &bytes.Buffer{} PrintDiff(buf, s3, s4, leftRight) assert.Equal(expected2, buf.String()) paths = pathsFromDiff(s3, s4, leftRight) assert.Equal(expectedPaths2, paths) } tf(true) tf(false) } // This function tests stop functionality in PrintDiff and Diff. func TestNomsDiffPrintStop(t *testing.T) { assert := assert.New(t) expected1 := `(root) { - "five" ` expected2 := `(root) { - map { // 4 items ` s1 := createSet("one", "three", "five", "seven", "nine") s2 := createSet("one", "three", "five-diff", "seven", "nine") s3 := createSet(mm1, mm2, mm3, mm4) s4 := createSet(mm1, mm2, mm3x, mm4) tf := func(leftRight bool) { buf := &bytes.Buffer{} mlw := &writers.MaxLineWriter{Dest: buf, MaxLines: 2} PrintDiff(mlw, s1, s2, leftRight) assert.Equal(expected1, buf.String()) buf = &bytes.Buffer{} mlw = &writers.MaxLineWriter{Dest: buf, MaxLines: 2} PrintDiff(mlw, s3, s4, leftRight) assert.Equal(expected2, buf.String()) } tf(true) tf(false) } func TestNomsDiffPrintStruct(t *testing.T) { assert := assert.New(t) expected1 := `(root) { - "four": "four" + "four": "four-diff" } ["three"] { - field1: "field1-data" - field3: "field3-data" + field3: "field3-data-diff" + field4: "field4-data" } ` expectedPaths1 := []string{ `["four"]`, `["three"].field1`, `["three"].field3`, `["three"].field4`, } expected2 := `(root) { - four: "four" + four: "four-diff" } .three { - field1: "field1-data" - field3: "field3-data" + field3: "field3-data-diff" + field4: "field4-data" } ` expectedPaths2 := []string{ `.four`, `.three.field1`, `.three.field3`, `.three.field4`, } s1 := createStruct("TestData", "field1", "field1-data", "field2", "field2-data", "field3", "field3-data", ) s2 := createStruct("TestData", "field2", "field2-data", "field3", "field3-data-diff", "field4", "field4-data", ) m1 := createMap("one", 1, "two", 2, "three", s1, "four", "four") m2 := createMap("one", 1, "two", 2, "three", s2, "four", "four-diff") s3 := createStruct("", "one", 1, "two", 2, "three", s1, "four", "four") s4 := createStruct("", "one", 1, "two", 2, "three", s2, "four", "four-diff") tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, m1, m2, leftRight) assert.Equal(expected1, buf.String()) paths := pathsFromDiff(m1, m2, leftRight) assert.Equal(expectedPaths1, paths) buf = &bytes.Buffer{} PrintDiff(buf, s3, s4, leftRight) assert.Equal(expected2, buf.String()) paths = pathsFromDiff(s3, s4, leftRight) assert.Equal(expectedPaths2, paths) } tf(true) tf(false) } func TestNomsDiffPrintMapWithStructKeys(t *testing.T) { a := assert.New(t) vs := newTestValueStore() defer vs.Close() k1 := createStruct("TestKey", "name", "n1", "label", "l1") expected1 := `(root) { - struct TestKey { - label: "l1", - name: "n1", - }: true + struct TestKey { + label: "l1", + name: "n1", + }: false } ` m1 := types.NewMap(vs, k1, types.Bool(true)) m2 := types.NewMap(vs, k1, types.Bool(false)) tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, m1, m2, leftRight) a.Equal(expected1, buf.String()) } tf(true) tf(false) } func TestNomsDiffPrintList(t *testing.T) { assert := assert.New(t) expected1 := `(root) { - 2 + 22 - 44 } ` expectedPaths1 := []string{ `[1]`, `[4]`, } l1 := createList(1, 2, 3, 4, 44, 5, 6) l2 := createList(1, 22, 3, 4, 5, 6) expected2 := `(root) { + "seven" } ` expectedPaths2 := []string{ `[6]`, } l3 := createList("one", "two", "three", "four", "five", "six") l4 := createList("one", "two", "three", "four", "five", "six", "seven") expected3 := `[2] { - "m3": "m-three" + "m3": "m-three-diff" } [2]["m4"] { - "a1": "a-one" + "a1": "a-one-diff" } ` expectedPaths3 := []string{ `[2]["m3"]`, `[2]["m4"]["a1"]`, } l5 := createList(mm1, mm2, mm3, mm4) l6 := createList(mm1, mm2, mm3x, mm4) tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, l1, l2, leftRight) assert.Equal(expected1, buf.String()) paths := pathsFromDiff(l1, l2, leftRight) assert.Equal(expectedPaths1, paths) buf = &bytes.Buffer{} PrintDiff(buf, l3, l4, leftRight) assert.Equal(expected2, buf.String()) paths = pathsFromDiff(l3, l4, leftRight) assert.Equal(expectedPaths2, paths) buf = &bytes.Buffer{} PrintDiff(buf, l5, l6, leftRight) assert.Equal(expected3, buf.String()) paths = pathsFromDiff(l5, l6, leftRight) assert.Equal(expectedPaths3, paths) } tf(true) tf(false) } func TestNomsDiffPrintBlob(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() expected := "- Blob (2.0 kB)\n+ Blob (11 B)\n" expectedPaths1 := []string{``} b1 := types.NewBlob(vs, strings.NewReader(strings.Repeat("x", 2*1024))) b2 := types.NewBlob(vs, strings.NewReader("Hello World")) tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, b1, b2, leftRight) assert.Equal(expected, buf.String()) paths := pathsFromDiff(b1, b2, leftRight) assert.Equal(expectedPaths1, paths) } tf(true) tf(false) } func TestNomsDiffPrintType(t *testing.T) { assert := assert.New(t) expected1 := "- List\n+ List\n" expectedPaths1 := []string{""} t1 := types.MakeListType(types.NumberType) t2 := types.MakeListType(types.StringType) expected2 := "- List\n+ Set\n" expectedPaths2 := []string{``} t3 := types.MakeListType(types.NumberType) t4 := types.MakeSetType(types.StringType) tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, t1, t2, leftRight) assert.Equal(expected1, buf.String()) paths := pathsFromDiff(t1, t2, leftRight) assert.Equal(expectedPaths1, paths) buf = &bytes.Buffer{} PrintDiff(buf, t3, t4, leftRight) assert.Equal(expected2, buf.String()) paths = pathsFromDiff(t3, t4, leftRight) assert.Equal(expectedPaths2, paths) } tf(true) tf(false) } func TestNomsDiffPrintRef(t *testing.T) { assert := assert.New(t) expected := "- #fckcbt7nk5jl4arco2dk7r9nj7abb6ci\n+ #i7d3u5gekm48ot419t2cot6cnl7ltcah\n" expectedPaths1 := []string{``} l1 := createList(1) l2 := createList(2) r1 := types.NewRef(l1) r2 := types.NewRef(l2) tf := func(leftRight bool) { buf := &bytes.Buffer{} PrintDiff(buf, r1, r2, leftRight) test.EqualsIgnoreHashes(t, expected, buf.String()) paths := pathsFromDiff(r1, r2, leftRight) assert.Equal(expectedPaths1, paths) } tf(true) tf(false) } ================================================ FILE: go/diff/patch.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "bytes" "github.com/attic-labs/noms/go/types" ) // Patch is a list of difference objects that can be applied to a graph // using ApplyPatch(). Patch implements a sort order that is useful for // applying the patch in an efficient way. type Patch []Difference func (r Patch) Swap(i, j int) { r[i], r[j] = r[j], r[i] } func (r Patch) Len() int { return len(r) } var vals = map[types.DiffChangeType]int{types.DiffChangeRemoved: 0, types.DiffChangeModified: 1, types.DiffChangeAdded: 2} func (r Patch) Less(i, j int) bool { if r[i].Path.Equals(r[j].Path) { return vals[r[i].ChangeType] < vals[r[j].ChangeType] } return pathIsLess(r[i].Path, r[j].Path) } // Utility methods on path // TODO: Should these be on types.Path & types.PathPart? func pathIsLess(p1, p2 types.Path) bool { for i, pp1 := range p1 { if len(p2) == i { return false // p1 > p2 } switch pathPartCompare(pp1, p2[i]) { case -1: return true // p1 < p2 case 1: return false // p1 > p2 } } return len(p2) > len(p1) // if true p1 < p2, else p1 == p2 } func fieldPathCompare(pp types.FieldPath, o types.PathPart) int { switch opp := o.(type) { case types.FieldPath: if pp.Name == opp.Name { return 0 } if pp.Name < opp.Name { return -1 } return 1 case types.IndexPath: return -1 case types.HashIndexPath: return -1 } panic("unreachable") } func indexPathCompare(pp types.IndexPath, o types.PathPart) int { switch opp := o.(type) { case types.FieldPath: return 1 case types.IndexPath: if pp.Index.Equals(opp.Index) { if pp.IntoKey == opp.IntoKey { return 0 } if pp.IntoKey { return -1 } return 1 } if pp.Index.Less(opp.Index) { return -1 } return 1 case types.HashIndexPath: return -1 } panic("unreachable") } func hashIndexPathCompare(pp types.HashIndexPath, o types.PathPart) int { switch opp := o.(type) { case types.FieldPath: return 1 case types.IndexPath: return 1 case types.HashIndexPath: switch bytes.Compare(pp.Hash[:], opp.Hash[:]) { case -1: return -1 case 0: if pp.IntoKey == opp.IntoKey { return 0 } if pp.IntoKey { return -1 } return 1 case 1: return 1 } } panic("unreachable") } func pathPartCompare(pp, pp2 types.PathPart) int { switch pp1 := pp.(type) { case types.FieldPath: return fieldPathCompare(pp1, pp2) case types.IndexPath: return indexPathCompare(pp1, pp2) case types.HashIndexPath: return hashIndexPathCompare(pp1, pp2) } panic("unreachable") } ================================================ FILE: go/diff/patch_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "math/rand" "sort" "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestPatchPathPartCompare(t *testing.T) { assert := assert.New(t) fieldPath1 := mustParsePath(assert, `.field1`)[0] fieldPath2 := mustParsePath(assert, `.field2`)[0] indexPath1 := mustParsePath(assert, `["field1"]`)[0] indexPath2 := mustParsePath(assert, `["field2"]`)[0] indexPathKey1 := mustParsePath(assert, `["field1"]@key`)[0] indexPathKey2 := mustParsePath(assert, `["field2"]@key`)[0] hashIndexPath1 := mustParsePath(assert, `[#01234567890123456789012345678901]`)[0] hashIndexPath2 := mustParsePath(assert, `[#0123456789abcdef0123456789abcdef]`)[0] hashIndexPathKey1 := mustParsePath(assert, `[#01234567890123456789012345678901]`)[0] hashIndexPathKey2 := mustParsePath(assert, `[#0123456789abcdef0123456789abcdef]`)[0] testCases := [][]types.PathPart{ {fieldPath1, fieldPath2}, {indexPath1, indexPath2}, {indexPathKey1, indexPathKey2}, {hashIndexPath1, hashIndexPath2}, {hashIndexPathKey1, hashIndexPathKey2}, {fieldPath2, indexPath1}, {fieldPath2, indexPathKey1}, {fieldPath2, hashIndexPath1}, {fieldPath2, hashIndexPathKey1}, {indexPath2, hashIndexPath1}, {indexPath2, hashIndexPathKey1}, } for i, tc := range testCases { assert.Equal(-1, pathPartCompare(tc[0], tc[1]), "test case %d failed, pp0: %s, pp1: %s", i, tc[0], tc[1]) assert.Equal(0, pathPartCompare(tc[0], tc[0]), "test case %d failed, pp0: %s, pp1: %s", i, tc[0], tc[1]) assert.Equal(1, pathPartCompare(tc[1], tc[0]), "test case %d failed, pp0: %s, pp1: %s", i, tc[0], tc[1]) } } func TestPatchPathIsLess(t *testing.T) { assert := assert.New(t) testCases := [][]string{ {``, `["field1"]`}, {`["field1"]`, `["field1"].f1`}, {`["field1"].f1`, `["field1"]["f1"]`}, {`["field1"]["f1"]@key`, `["field1"]["f1"]`}, {`["field1"]["f1"]`, `["field1"][#01234567890123456789012345678901]`}, {`["field1"][#01234567890123456789012345678901]`, `["field1"][#0123456789abcdef0123456789abcdef]`}, } for i, tc := range testCases { p0 := mustParsePath(assert, tc[0]) p1 := mustParsePath(assert, tc[1]) assert.True(pathIsLess(p0, p1), "test case %d failed", i) assert.False(pathIsLess(p0, p0), "test case %d failed", i) assert.False(pathIsLess(p1, p0), "test case %d failed", i) } //p := mustParsePath(assert, `#0123456789abcdef0123456789abcdef.value`) //fmt.Printf("p[0]: %s, type: %T\n", p[0], p[0]) } func TestPatchSort(t *testing.T) { assert := assert.New(t) sortedPaths := Patch{ {Path: mustParsePath(assert, `["field1"]`)}, {Path: mustParsePath(assert, `["field1"].f1`)}, {Path: mustParsePath(assert, `["field1"]["f1"]`), ChangeType: types.DiffChangeRemoved}, {Path: mustParsePath(assert, `["field1"]["f1"]`), ChangeType: types.DiffChangeModified}, {Path: mustParsePath(assert, `["field1"]["f1"]`), ChangeType: types.DiffChangeAdded}, {Path: mustParsePath(assert, `["field1"][#01234567890123456789012345678901]`)}, {Path: mustParsePath(assert, `["field1"][#0123456789abcdef0123456789abcdef]`)}, } rand.Perm(len(sortedPaths)) shuffledPaths := Patch{} for _, idx := range rand.Perm(len(sortedPaths)) { shuffledPaths = append(shuffledPaths, sortedPaths[idx]) } sort.Sort(shuffledPaths) assert.Equal(sortedPaths, shuffledPaths) } ================================================ FILE: go/diff/print_diff.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "io" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/writers" "github.com/dustin/go-humanize" ) type prefixOp string const ( ADD = "+ " DEL = "- " ) type ( printFunc func(w io.Writer, op prefixOp, key, val types.Value) error ) // PrintDiff writes a textual reprensentation of the diff from |v1| to |v2| // to |w|. If |leftRight| is true then the left-right diff is used for ordered // sequences - see Diff vs DiffLeftRight in Set and Map. func PrintDiff(w io.Writer, v1, v2 types.Value, leftRight bool) (err error) { // In the case where the diff involves two simple values, just print out the // diff and return. This is needed because the code below assumes that the // values being compared have a parent. if !shouldDescend(v1, v2) { line(w, DEL, nil, v1) return line(w, ADD, nil, v2) } dChan := make(chan Difference, 16) stopChan := make(chan struct{}) stopDiff := func() { close(stopChan) for range dChan { } } // From here on, we can assume that every Difference will have at least one // element in the Path go func() { Diff(v1, v2, dChan, stopChan, leftRight) close(dChan) }() var lastParentPath types.Path wroteHdr := false firstTime := true for d := range dChan { parentPath := d.Path[:len(d.Path)-1] parentPathChanged := !parentPath.Equals(lastParentPath) lastParentPath = parentPath if parentPathChanged && wroteHdr { err = writeFooter(w, &wroteHdr) } if parentPathChanged || firstTime { firstTime = false err = writeHeader(w, parentPath, &wroteHdr) } lastPart := d.Path[len(d.Path)-1] parentEl := parentPath.Resolve(v1, nil) var key types.Value var pfunc printFunc = line switch parent := parentEl.(type) { case types.Map: if indexPath, ok := lastPart.(types.IndexPath); ok { key = indexPath.Index } else if hip, ok := lastPart.(types.HashIndexPath); ok { // In this case, the map has a non-primitive key so the value // is a ref to the key. We need the actual key, not a ref to it. hip1 := hip hip1.IntoKey = true key = hip1.Resolve(parent, nil) } else { panic("unexpected Path type") } case types.Set: // default values are ok case types.Struct: key = types.String(lastPart.(types.FieldPath).Name) pfunc = field case types.List: // default values are ok } if d.OldValue != nil { err = pfunc(w, DEL, key, d.OldValue) } if d.NewValue != nil { err = pfunc(w, ADD, key, d.NewValue) } if err != nil { stopDiff() break } } err = writeFooter(w, &wroteHdr) return } func writeHeader(w io.Writer, p types.Path, wroteHdr *bool) error { if *wroteHdr { return nil } *wroteHdr = true hdr := "(root)" if len(p) > 0 { hdr = p.String() } return write(w, []byte(hdr+" {\n")) } func writeFooter(w io.Writer, wroteHdr *bool) error { if !*wroteHdr { return nil } *wroteHdr = false return write(w, []byte(" }\n")) } func line(w io.Writer, op prefixOp, key, val types.Value) error { genPrefix := func(w *writers.PrefixWriter) []byte { return []byte(op) } pw := &writers.PrefixWriter{Dest: w, PrefixFunc: genPrefix, NeedsPrefix: true} if key != nil { writeEncodedValue(pw, key) write(w, []byte(": ")) } writeEncodedValue(pw, val) return write(w, []byte("\n")) } func field(w io.Writer, op prefixOp, name, val types.Value) error { genPrefix := func(w *writers.PrefixWriter) []byte { return []byte(op) } pw := &writers.PrefixWriter{Dest: w, PrefixFunc: genPrefix, NeedsPrefix: true} write(pw, []byte(name.(types.String))) write(w, []byte(": ")) writeEncodedValue(pw, val) return write(w, []byte("\n")) } func writeEncodedValue(w io.Writer, v types.Value) error { if v.Kind() != types.BlobKind { return types.WriteEncodedValue(w, v) } write(w, []byte("Blob (")) write(w, []byte(humanize.Bytes(v.(types.Blob).Len()))) return write(w, []byte(")")) } func write(w io.Writer, b []byte) error { _, err := w.Write(b) return err } ================================================ FILE: go/diff/summary.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package diff import ( "fmt" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/status" humanize "github.com/dustin/go-humanize" ) // Summary prints a summary of the diff between two values to stdout. func Summary(value1, value2 types.Value) { if datas.IsCommit(value1) && datas.IsCommit(value2) { fmt.Println("Comparing commit values") value1 = value1.(types.Struct).Get(datas.ValueField) value2 = value2.(types.Struct).Get(datas.ValueField) } var singular, plural string if value1.Kind() == value2.Kind() { switch value1.Kind() { case types.StructKind: singular = "field" plural = "fields" case types.MapKind: singular = "entry" plural = "entries" default: singular = "value" plural = "values" } } ch := make(chan diffSummaryProgress) go func() { diffSummary(ch, value1, value2) close(ch) }() acc := diffSummaryProgress{} for p := range ch { acc.Adds += p.Adds acc.Removes += p.Removes acc.Changes += p.Changes acc.NewSize += p.NewSize acc.OldSize += p.OldSize if status.WillPrint() { formatStatus(acc, singular, plural) } } formatStatus(acc, singular, plural) status.Done() } type diffSummaryProgress struct { Adds, Removes, Changes, NewSize, OldSize uint64 } func diffSummary(ch chan diffSummaryProgress, v1, v2 types.Value) { if !v1.Equals(v2) { if shouldDescend(v1, v2) { switch v1.Kind() { case types.ListKind: diffSummaryList(ch, v1.(types.List), v2.(types.List)) case types.MapKind: diffSummaryMap(ch, v1.(types.Map), v2.(types.Map)) case types.SetKind: diffSummarySet(ch, v1.(types.Set), v2.(types.Set)) case types.StructKind: diffSummaryStructs(ch, v1.(types.Struct), v2.(types.Struct)) default: panic("Unrecognized type in diff function: " + types.TypeOf(v1).Describe() + " and " + types.TypeOf(v2).Describe()) } } else { ch <- diffSummaryProgress{Adds: 1, Removes: 1, NewSize: 1, OldSize: 1} } } } func diffSummaryList(ch chan<- diffSummaryProgress, v1, v2 types.List) { ch <- diffSummaryProgress{OldSize: v1.Len(), NewSize: v2.Len()} spliceChan := make(chan types.Splice) stopChan := make(chan struct{}, 1) // buffer size of 1, so this won't block if diff already finished go func() { v2.Diff(v1, spliceChan, stopChan) close(spliceChan) }() for splice := range spliceChan { if splice.SpRemoved == splice.SpAdded { ch <- diffSummaryProgress{Changes: splice.SpRemoved} } else { ch <- diffSummaryProgress{Adds: splice.SpAdded, Removes: splice.SpRemoved} } } } func diffSummaryMap(ch chan<- diffSummaryProgress, v1, v2 types.Map) { diffSummaryValueChanged(ch, v1.Len(), v2.Len(), func(changeChan chan<- types.ValueChanged, stopChan <-chan struct{}) { v2.Diff(v1, changeChan, stopChan) }) } func diffSummarySet(ch chan<- diffSummaryProgress, v1, v2 types.Set) { diffSummaryValueChanged(ch, v1.Len(), v2.Len(), func(changeChan chan<- types.ValueChanged, stopChan <-chan struct{}) { v2.Diff(v1, changeChan, stopChan) }) } func diffSummaryStructs(ch chan<- diffSummaryProgress, v1, v2 types.Struct) { // TODO: Operate on values directly size1 := uint64(types.TypeOf(v1).Desc.(types.StructDesc).Len()) size2 := uint64(types.TypeOf(v2).Desc.(types.StructDesc).Len()) diffSummaryValueChanged(ch, size1, size2, func(changeChan chan<- types.ValueChanged, stopChan <-chan struct{}) { v2.Diff(v1, changeChan, stopChan) }) } func diffSummaryValueChanged(ch chan<- diffSummaryProgress, oldSize, newSize uint64, f diffFunc) { ch <- diffSummaryProgress{OldSize: oldSize, NewSize: newSize} changeChan := make(chan types.ValueChanged) stopChan := make(chan struct{}, 1) // buffer size of 1, so this won't block if diff already finished go func() { f(changeChan, stopChan) close(changeChan) }() reportChanges(ch, changeChan) } func reportChanges(ch chan<- diffSummaryProgress, changeChan chan types.ValueChanged) { for change := range changeChan { switch change.ChangeType { case types.DiffChangeAdded: ch <- diffSummaryProgress{Adds: 1} case types.DiffChangeRemoved: ch <- diffSummaryProgress{Removes: 1} case types.DiffChangeModified: ch <- diffSummaryProgress{Changes: 1} default: panic("unknown change type") } } } func formatStatus(acc diffSummaryProgress, singular, plural string) { pluralize := func(singular, plural string, n uint64) string { var noun string if n != 1 { noun = plural } else { noun = singular } return fmt.Sprintf("%s %s", humanize.Comma(int64(n)), noun) } insertions := pluralize("insertion", "insertions", acc.Adds) deletions := pluralize("deletion", "deletions", acc.Removes) changes := pluralize("change", "changes", acc.Changes) oldValues := pluralize(singular, plural, acc.OldSize) newValues := pluralize(singular, plural, acc.NewSize) status.Printf("%s (%.2f%%), %s (%.2f%%), %s (%.2f%%), (%s vs %s)", insertions, (float64(100*acc.Adds) / float64(acc.OldSize)), deletions, (float64(100*acc.Removes) / float64(acc.OldSize)), changes, (float64(100*acc.Changes) / float64(acc.OldSize)), oldValues, newValues) } ================================================ FILE: go/hash/base32.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package hash import "encoding/base32" var encoding = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv") func encode(data []byte) string { return encoding.EncodeToString(data) } func decode(s string) []byte { slice, _ := encoding.DecodeString(s) return slice } ================================================ FILE: go/hash/base32_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package hash import ( "testing" "github.com/stretchr/testify/assert" ) func TestBase32Encode(t *testing.T) { assert := assert.New(t) d := make([]byte, 20, 20) assert.Equal("00000000000000000000000000000000", encode(d)) d[19] = 1 assert.Equal("00000000000000000000000000000001", encode(d)) d[19] = 10 assert.Equal("0000000000000000000000000000000a", encode(d)) d[19] = 20 assert.Equal("0000000000000000000000000000000k", encode(d)) d[19] = 31 assert.Equal("0000000000000000000000000000000v", encode(d)) d[19] = 32 assert.Equal("00000000000000000000000000000010", encode(d)) d[19] = 63 assert.Equal("0000000000000000000000000000001v", encode(d)) d[19] = 64 assert.Equal("00000000000000000000000000000020", encode(d)) // Largest! for i := 0; i < 20; i++ { d[i] = 0xff } assert.Equal("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv", encode(d)) } func TestBase32Decode(t *testing.T) { assert := assert.New(t) d := make([]byte, 20, 20) assert.Equal(d, decode("00000000000000000000000000000000")) d[19] = 1 assert.Equal(d, decode("00000000000000000000000000000001")) d[19] = 10 assert.Equal(d, decode("0000000000000000000000000000000a")) d[19] = 20 assert.Equal(d, decode("0000000000000000000000000000000k")) d[19] = 31 assert.Equal(d, decode("0000000000000000000000000000000v")) d[19] = 32 assert.Equal(d, decode("00000000000000000000000000000010")) d[19] = 63 assert.Equal(d, decode("0000000000000000000000000000001v")) d[19] = 64 assert.Equal(d, decode("00000000000000000000000000000020")) // Largest! for i := 0; i < 20; i++ { d[i] = 0xff } assert.Equal(d, decode("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")) } ================================================ FILE: go/hash/hash.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package hash implements the hash function used throughout Noms. // // Noms serialization from version 4-onward uses the first 20 bytes of sha-512 for hashes. // // sha-512 was chosen because: // // - sha-1 is no longer recommended. // - sha-3 is brand new, not a lot of platform support. // - blake is not commonly used, not a lot of platform support. // - within sha-2, sha-512 is faster than sha-256 on 64 bit. // // Our specific truncation scheme (first 20 bytes) was chosen because: // // - The "standard" truncation schemes are not widely supported. For example, at time of writing, there is no fast native implementation of sha512/256 on Node. // - The smallest standard truncation of sha512 is 28 bytes, but we don't need this many. And because we are a database, the size of the hashes matters. Bigger hashes mean less data in each chunk, which means less tree fan-out, which means slower iteration and searching. 20 bytes is a good balance between collision resistance and wide trees. // - 20 bytes leads to a nice round number of base32 digits: 32. // // The textual serialization of hashes uses big-endian base32 with the alphabet {0-9,a-v}. This scheme was chosen because: // // - It's easy to convert to and from base32 without bignum arithemetic. // - No special chars: you can double-click to select in GUIs. // - Sorted hashes will be sorted textually, making it easy to scan for humans. // // In Noms, the hash function is a component of the serialization version, which is constant over the entire lifetime of a single database. So clients do not need to worry about encountering multiple hash functions in the same database. package hash import ( "bytes" "crypto/sha512" "fmt" "regexp" "strconv" "github.com/attic-labs/noms/go/d" ) const ( // ByteLen is the number of bytes used to represent the Hash. ByteLen = 20 // StringLen is the number of characters need to represent the Hash using Base32. StringLen = 32 // 20 * 8 / log2(32) ) var ( pattern = regexp.MustCompile("^([0-9a-v]{" + strconv.Itoa(StringLen) + "})$") emptyHash = Hash{} ) // Hash is used to represent the hash of a Noms Value. type Hash [ByteLen]byte // IsEmpty determines if this Hash is equal to the empty hash (all zeroes). func (h Hash) IsEmpty() bool { return h == emptyHash } // String returns a string representation of the hash using Base32 encoding. func (h Hash) String() string { return encode(h[:]) } // Of computes a new Hash from data. func Of(data []byte) Hash { r := sha512.Sum512(data) h := Hash{} copy(h[:], r[:ByteLen]) return h } // New creates a new Hash backed by data, ensuring that data is an acceptable length. func New(data []byte) Hash { d.PanicIfFalse(len(data) == ByteLen) h := Hash{} copy(h[:], data) return h } // MaybeParse parses a string representing a hash as a Base32 encoded byte array. // If the string is not well formed then this returns (emptyHash, false). func MaybeParse(s string) (Hash, bool) { match := pattern.FindStringSubmatch(s) if match == nil { return emptyHash, false } return New(decode(s)), true } // Parse parses a string representing a hash as a Base32 encoded byte array. // If the string is not well formed then this panics. func Parse(s string) Hash { r, ok := MaybeParse(s) if !ok { d.PanicIfError(fmt.Errorf("Cound not parse Hash: %s", s)) } return r } // Less compares two hashes returning whether this Hash is less than other. func (h Hash) Less(other Hash) bool { return bytes.Compare(h[:], other[:]) < 0 } // Greater compares two hashes returning whether this Hash is greater than other. func (h Hash) Greater(other Hash) bool { // TODO: Remove this return bytes.Compare(h[:], other[:]) > 0 } // HashSet is a set of Hashes. type HashSet map[Hash]struct{} func NewHashSet(hashes ...Hash) HashSet { out := make(HashSet, len(hashes)) for _, h := range hashes { out.Insert(h) } return out } // Insert adds a Hash to the set. func (hs HashSet) Insert(hash Hash) { hs[hash] = struct{}{} } // Has returns true if the HashSet contains hash. func (hs HashSet) Has(hash Hash) (has bool) { _, has = hs[hash] return } // Remove removes hash from the HashSet. func (hs HashSet) Remove(hash Hash) { delete(hs, hash) } ================================================ FILE: go/hash/hash_slice.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package hash type HashSlice []Hash func (rs HashSlice) Len() int { return len(rs) } func (rs HashSlice) Less(i, j int) bool { return rs[i].Less(rs[j]) } func (rs HashSlice) Swap(i, j int) { rs[i], rs[j] = rs[j], rs[i] } func (rs HashSlice) Equals(other HashSlice) bool { if len(rs) != len(other) { return false } for i := 0; i < len(rs); i++ { if rs[i] != other[i] { return false } } return true } func (rs HashSlice) HashSet() HashSet { hs := make(HashSet, len(rs)) for _, h := range rs { hs[h] = struct{}{} } return hs } ================================================ FILE: go/hash/hash_slice_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package hash import ( "sort" "testing" "github.com/stretchr/testify/assert" ) func TestHashSliceSort(t *testing.T) { assert := assert.New(t) rs := HashSlice{} for i := 1; i <= 3; i++ { for j := 1; j <= 3; j++ { h := Hash{} for k := 1; k <= j; k++ { h[k-1] = byte(i) } rs = append(rs, h) } } rs2 := HashSlice(make([]Hash, len(rs))) copy(rs2, rs) sort.Sort(sort.Reverse(rs2)) assert.False(rs.Equals(rs2)) sort.Sort(rs2) assert.True(rs.Equals(rs2)) } ================================================ FILE: go/hash/hash_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package hash import ( "testing" "github.com/attic-labs/noms/go/d" "github.com/stretchr/testify/assert" ) func TestParseError(t *testing.T) { assert := assert.New(t) assertParseError := func(s string) { e := d.Try(func() { Parse(s) }) _, ok := e.(d.WrappedError) assert.True(ok) } assertParseError("foo") // too few digits assertParseError("0000000000000000000000000000000") // too many digits assertParseError("000000000000000000000000000000000") // 'w' not valid base32 assertParseError("00000000000000000000000000000000w") // no prefix assertParseError("sha1-00000000000000000000000000000000") assertParseError("sha2-00000000000000000000000000000000") r := Parse("00000000000000000000000000000000") assert.NotNil(r) } func TestMaybeParse(t *testing.T) { assert := assert.New(t) parse := func(s string, success bool) { r, ok := MaybeParse(s) assert.Equal(success, ok, "Expected success=%t for %s", success, s) if ok { assert.Equal(s, r.String()) } else { assert.Equal(emptyHash, r) } } parse("00000000000000000000000000000000", true) parse("00000000000000000000000000000001", true) parse("", false) parse("adsfasdf", false) parse("sha2-00000000000000000000000000000000", false) parse("0000000000000000000000000000000w", false) } func TestEquals(t *testing.T) { assert := assert.New(t) r0 := Parse("00000000000000000000000000000000") r01 := Parse("00000000000000000000000000000000") r1 := Parse("00000000000000000000000000000001") assert.Equal(r0, r01) assert.Equal(r01, r0) assert.NotEqual(r0, r1) assert.NotEqual(r1, r0) } func TestString(t *testing.T) { s := "0123456789abcdefghijklmnopqrstuv" r := Parse(s) assert.Equal(t, s, r.String()) } func TestOf(t *testing.T) { r := Of([]byte("abc")) assert.Equal(t, "rmnjb8cjc5tblj21ed4qs821649eduie", r.String()) } func TestIsEmpty(t *testing.T) { r1 := Hash{} assert.True(t, r1.IsEmpty()) r2 := Parse("00000000000000000000000000000000") assert.True(t, r2.IsEmpty()) r3 := Parse("rmnjb8cjc5tblj21ed4qs821649eduie") assert.False(t, r3.IsEmpty()) } func TestLess(t *testing.T) { assert := assert.New(t) r1 := Parse("00000000000000000000000000000001") r2 := Parse("00000000000000000000000000000002") assert.False(r1.Less(r1)) assert.True(r1.Less(r2)) assert.False(r2.Less(r1)) assert.False(r2.Less(r2)) r0 := Hash{} assert.False(r0.Less(r0)) assert.True(r0.Less(r2)) assert.False(r2.Less(r0)) } func TestGreater(t *testing.T) { assert := assert.New(t) r1 := Parse("00000000000000000000000000000001") r2 := Parse("00000000000000000000000000000002") assert.False(r1.Greater(r1)) assert.False(r1.Greater(r2)) assert.True(r2.Greater(r1)) assert.False(r2.Greater(r2)) r0 := Hash{} assert.False(r0.Greater(r0)) assert.False(r0.Greater(r2)) assert.True(r2.Greater(r0)) } ================================================ FILE: go/marshal/decode.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package marshal import ( "fmt" "reflect" "sync" "github.com/attic-labs/noms/go/types" ) // Unmarshal converts a Noms value into a Go value. It decodes v and stores the // result in the value pointed to by out. // // Unmarshal uses the inverse of the encodings that Marshal uses with the // following additional rules: // // To unmarshal a Noms struct into a Go struct, Unmarshal matches incoming // object fields to the fields used by Marshal (either the struct field name or // its tag). Unmarshal will only set exported fields of the struct. The name // of the Go struct must match (ignoring case) the name of the Noms struct. All // exported fields on the Go struct must be present in the Noms struct, unless // the field on the Go struct is marked with the "omitempty" tag. Go struct // fields also support the "original" tag which causes the Go field to receive // the entire original unmarshaled Noms struct. // // To unmarshal a Noms list or set into a slice, Unmarshal resets the slice // length to zero and then appends each element to the slice. If the Go slice // was nil a new slice is created when an element is added. // // To unmarshal a Noms list into a Go array, Unmarshal decodes Noms list // elements into corresponding Go array elements. // // To unmarshal a Noms map into a Go map, Unmarshal decodes Noms key and values // into corresponding Go array elements. If the Go map was nil a new map is // created if any value is set. // // To unmarshal a Noms set into a Go map, the field must be tagged with `noms:",set"`, // and it must have a type of map[]struct{}. Unmarshal decodes into // Go map keys corresponding to the set values and assigns each key a value of struct{}{}. // // When unmarshalling onto interface{} the following rules are used: // - types.Bool -> bool // - types.List -> []T, where T is determined recursively using the same rules. // - types.Set -> depends on `noms:",set"` annotation and field type: // - without the annotation, same as types.List // - with the annotation, same as types.Map for map[T]struct{} fields and same as types.List for slice fields // - types.Map -> map[T]V, where T and V is determined recursively using the // same rules. // - types.Number -> float64 // - types.String -> string // - *types.Type -> *types.Type // - types.Union -> interface // - Everything else an error // // Unmarshal returns an UnmarshalTypeMismatchError if: // - a Noms value is not appropriate for a given target type // - a Noms number overflows the target type // - a Noms list is decoded into a Go array of a different length func Unmarshal(v types.Value, out interface{}) (err error) { return UnmarshalOpt(v, Opt{}, out) } // UnmarshalOpt is like Unmarshal but provides additional options. func UnmarshalOpt(v types.Value, opt Opt, out interface{}) (err error) { defer func() { if r := recover(); r != nil { switch r := r.(type) { case *UnmarshalTypeMismatchError, *UnsupportedTypeError, *InvalidTagError, *InvalidUnmarshalError: err = r.(error) case *unmarshalNomsError: err = r.err default: panic(r) } } }() MustUnmarshalOpt(v, opt, out) return } // Unmarshals a Noms value into a Go value using the same rules as Unmarshal(). // Panics on failure. func MustUnmarshal(v types.Value, out interface{}) { MustUnmarshalOpt(v, Opt{}, out) } // MustUnmarshalOpt is like MustUnmarshal but with additional options. func MustUnmarshalOpt(v types.Value, opt Opt, out interface{}) { rv := reflect.ValueOf(out) if rv.Kind() != reflect.Ptr || rv.IsNil() { panic(&InvalidUnmarshalError{reflect.TypeOf(out)}) } rv = rv.Elem() nt := nomsTags{ set: opt.Set, } d := typeDecoder(rv.Type(), nt) d(v, rv) } // Unmarshaler is an interface types can implement to provide their own // decoding. // // You probably want to implement this on a pointer to a type, otherwise // calling UnmarshalNoms will effectively do nothing. For example, to unmarshal // a MyType you would define: // // func (t *MyType) UnmarshalNoms(v types.Value) error {} type Unmarshaler interface { // UnmarshalNoms decodes v, or returns an error. UnmarshalNoms(v types.Value) error } var unmarshalerInterface = reflect.TypeOf((*Unmarshaler)(nil)).Elem() // InvalidUnmarshalError describes an invalid argument passed to Unmarshal. (The // argument to Unmarshal must be a non-nil pointer.) type InvalidUnmarshalError struct { Type reflect.Type } func (e *InvalidUnmarshalError) Error() string { if e.Type == nil { return "Cannot unmarshal into Go nil value" } if e.Type.Kind() != reflect.Ptr { return "Cannot unmarshal into Go non pointer of type " + e.Type.String() } return "Cannot unmarshal into Go nil pointer of type " + e.Type.String() } // UnmarshalTypeMismatchError describes a Noms value that was not appropriate // for a value of a specific Go type. type UnmarshalTypeMismatchError struct { Value types.Value Type reflect.Type // type of Go value it could not be assigned to details string } func (e *UnmarshalTypeMismatchError) Error() string { var ts string if e.Type == nil { ts = "nil" } else { ts = e.Type.String() } return fmt.Sprintf("Cannot unmarshal %s into Go value of type %s%s", types.TypeOf(e.Value).Describe(), ts, e.details) } func overflowError(v types.Number, t reflect.Type) *UnmarshalTypeMismatchError { return &UnmarshalTypeMismatchError{v, t, fmt.Sprintf(" (%g does not fit in %s)", v, t)} } // unmarshalNomsError wraps errors from Marshaler.UnmarshalNoms. These should // be unwrapped and never leak to the caller of Unmarshal. type unmarshalNomsError struct { err error } func (e *unmarshalNomsError) Error() string { return e.err.Error() } type decoderFunc func(v types.Value, rv reflect.Value) func typeDecoder(t reflect.Type, tags nomsTags) decoderFunc { if reflect.PtrTo(t).Implements(unmarshalerInterface) { return marshalerDecoder(t) } switch t.Kind() { case reflect.Bool: return boolDecoder case reflect.Float32, reflect.Float64: return floatDecoder case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: return intDecoder case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: return uintDecoder case reflect.String: return stringDecoder case reflect.Struct: return structDecoder(t) case reflect.Interface: return interfaceDecoder(t) case reflect.Slice: return sliceDecoder(t) case reflect.Array: return arrayDecoder(t) case reflect.Map: if shouldMapDecodeFromSet(t, tags) { return mapFromSetDecoder(t) } return mapDecoder(t, tags) case reflect.Ptr: // Allow implementations of types.Value (like *types.Type) if t.Implements(nomsValueInterface) { return nomsValueDecoder } fallthrough default: panic(&UnsupportedTypeError{Type: t}) } } func boolDecoder(v types.Value, rv reflect.Value) { if b, ok := v.(types.Bool); ok { rv.SetBool(bool(b)) } else { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ""}) } } func stringDecoder(v types.Value, rv reflect.Value) { if s, ok := v.(types.String); ok { rv.SetString(string(s)) } else { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ""}) } } func floatDecoder(v types.Value, rv reflect.Value) { if n, ok := v.(types.Number); ok { rv.SetFloat(float64(n)) } else { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ""}) } } func intDecoder(v types.Value, rv reflect.Value) { if n, ok := v.(types.Number); ok { i := int64(n) if rv.OverflowInt(i) { panic(overflowError(n, rv.Type())) } rv.SetInt(i) } else { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ""}) } } func uintDecoder(v types.Value, rv reflect.Value) { if n, ok := v.(types.Number); ok { u := uint64(n) if rv.OverflowUint(u) { panic(overflowError(n, rv.Type())) } rv.SetUint(u) } else { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ""}) } } type decoderCacheT struct { sync.RWMutex m map[reflect.Type]decoderFunc } var decoderCache = &decoderCacheT{} // Separate Set decoder cache because the same type with and without the // `noms:",set"` tag decode differently (Set vs Map). var setDecoderCache = &decoderCacheT{} func (c *decoderCacheT) get(t reflect.Type) decoderFunc { c.RLock() defer c.RUnlock() return c.m[t] } func (c *decoderCacheT) set(t reflect.Type, d decoderFunc) { c.Lock() defer c.Unlock() if c.m == nil { c.m = map[reflect.Type]decoderFunc{} } c.m[t] = d } type decField struct { name string decoder decoderFunc index []int omitEmpty bool original bool } func structDecoderFields(t reflect.Type) []decField { fields := make([]decField, 0, t.NumField()) for i := 0; i < t.NumField(); i++ { index := make([]int, 1) index[0] = i f := t.Field(i) tags := getTags(f) if tags.skip { continue } if f.Anonymous && f.PkgPath == "" && !tags.hasName { embeddedFields := structDecoderFields(f.Type) for _, ef := range embeddedFields { ef.index = append(index, ef.index...) fields = append(fields, ef) } continue } validateField(f, t) fields = append(fields, decField{ name: tags.name, decoder: typeDecoder(f.Type, tags), index: index, omitEmpty: tags.omitEmpty, original: tags.original, }) } return fields } func structDecoder(t reflect.Type) decoderFunc { if t.Implements(nomsValueInterface) { return nomsValueDecoder } d := decoderCache.get(t) if d != nil { return d } fields := structDecoderFields(t) d = func(v types.Value, rv reflect.Value) { s, ok := v.(types.Struct) if !ok { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", expected struct"}) } for _, f := range fields { sf := rv.FieldByIndex(f.index) if f.original { if sf.Type() != reflect.TypeOf(s) { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", field with tag \"original\" must have type Struct"}) } sf.Set(reflect.ValueOf(s)) continue } fv, ok := s.MaybeGet(f.name) if ok { f.decoder(fv, sf) } else if !f.omitEmpty { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ", missing field \"" + f.name + "\""}) } } } decoderCache.set(t, d) return d } func nomsValueDecoder(v types.Value, rv reflect.Value) { if !reflect.TypeOf(v).AssignableTo(rv.Type()) { panic(&UnmarshalTypeMismatchError{v, rv.Type(), ""}) } rv.Set(reflect.ValueOf(v)) } func marshalerDecoder(t reflect.Type) decoderFunc { return func(v types.Value, rv reflect.Value) { ptr := reflect.New(t) err := ptr.Interface().(Unmarshaler).UnmarshalNoms(v) if err != nil { panic(&unmarshalNomsError{err}) } rv.Set(ptr.Elem()) } } func iterListOrSlice(v types.Value, t reflect.Type, f func(c types.Value, i uint64)) { switch v := v.(type) { case types.List: v.IterAll(f) case types.Set: i := uint64(0) v.IterAll(func(cv types.Value) { f(cv, i) i++ }) default: panic(&UnmarshalTypeMismatchError{v, t, ""}) } } func sliceDecoder(t reflect.Type) decoderFunc { d := decoderCache.get(t) if d != nil { return d } var decoder decoderFunc var init sync.RWMutex init.Lock() defer init.Unlock() d = func(v types.Value, rv reflect.Value) { var slice reflect.Value if rv.IsNil() { slice = rv } else { slice = rv.Slice(0, 0) } init.RLock() defer init.RUnlock() iterListOrSlice(v, t, func(v types.Value, _ uint64) { elemRv := reflect.New(t.Elem()).Elem() decoder(v, elemRv) slice = reflect.Append(slice, elemRv) }) rv.Set(slice) } decoderCache.set(t, d) decoder = typeDecoder(t.Elem(), nomsTags{}) return d } func arrayDecoder(t reflect.Type) decoderFunc { d := decoderCache.get(t) if d != nil { return d } var decoder decoderFunc var init sync.RWMutex init.Lock() defer init.Unlock() d = func(v types.Value, rv reflect.Value) { size := t.Len() list, ok := v.(types.Collection) if !ok { panic(&UnmarshalTypeMismatchError{v, t, ""}) } l := int(list.Len()) if l != size { panic(&UnmarshalTypeMismatchError{v, t, ", length does not match"}) } init.RLock() defer init.RUnlock() iterListOrSlice(list, t, func(v types.Value, i uint64) { decoder(v, rv.Index(int(i))) }) } decoderCache.set(t, d) decoder = typeDecoder(t.Elem(), nomsTags{}) return d } func mapFromSetDecoder(t reflect.Type) decoderFunc { d := setDecoderCache.get(t) if d != nil { return d } var decoder decoderFunc var init sync.RWMutex init.Lock() defer init.Unlock() d = func(v types.Value, rv reflect.Value) { m := rv nomsSet, ok := v.(types.Set) if !ok { panic(&UnmarshalTypeMismatchError{v, t, `, field has "set" tag`}) } init.RLock() defer init.RUnlock() nomsSet.IterAll(func(v types.Value) { keyRv := reflect.New(t.Key()).Elem() decoder(v, keyRv) if m.IsNil() { m = reflect.MakeMap(t) } m.SetMapIndex(keyRv, reflect.New(t.Elem()).Elem()) }) rv.Set(m) } setDecoderCache.set(t, d) decoder = typeDecoder(t.Key(), nomsTags{}) return d } func mapDecoder(t reflect.Type, tags nomsTags) decoderFunc { d := decoderCache.get(t) if d != nil { return d } var keyDecoder decoderFunc var valueDecoder decoderFunc var init sync.RWMutex init.Lock() defer init.Unlock() d = func(v types.Value, rv reflect.Value) { m := rv // Special case decoding failure if it looks like the "set" tag is missing, // because it's helpful. if _, ok := v.(types.Set); ok && !tags.set { panic(&UnmarshalTypeMismatchError{v, t, `, field missing "set" tag`}) } nomsMap, ok := v.(types.Map) if !ok { panic(&UnmarshalTypeMismatchError{v, t, ""}) } init.RLock() defer init.RUnlock() nomsMap.IterAll(func(k, v types.Value) { keyRv := reflect.New(t.Key()).Elem() keyDecoder(k, keyRv) valueRv := reflect.New(t.Elem()).Elem() valueDecoder(v, valueRv) if m.IsNil() { m = reflect.MakeMap(t) } m.SetMapIndex(keyRv, valueRv) }) rv.Set(m) } decoderCache.set(t, d) keyDecoder = typeDecoder(t.Key(), nomsTags{}) valueDecoder = typeDecoder(t.Elem(), nomsTags{}) return d } func interfaceDecoder(t reflect.Type) decoderFunc { if t.Implements(nomsValueInterface) { return nomsValueDecoder } if t != emptyInterface { panic(&UnsupportedTypeError{Type: t}) } return func(v types.Value, rv reflect.Value) { // TODO: Go directly from value to go type t := getGoTypeForNomsType(types.TypeOf(v), rv.Type(), v) i := reflect.New(t).Elem() typeDecoder(t, nomsTags{})(v, i) rv.Set(i) } } func getGoTypeForNomsType(nt *types.Type, rt reflect.Type, v types.Value) reflect.Type { switch nt.TargetKind() { case types.BoolKind: return reflect.TypeOf(false) case types.NumberKind: return reflect.TypeOf(float64(0)) case types.StringKind: return reflect.TypeOf("") case types.ListKind, types.SetKind: et := getGoTypeForNomsType(nt.Desc.(types.CompoundDesc).ElemTypes[0], rt, v) return reflect.SliceOf(et) case types.MapKind: kt := getGoTypeForNomsType(nt.Desc.(types.CompoundDesc).ElemTypes[0], rt, v) vt := getGoTypeForNomsType(nt.Desc.(types.CompoundDesc).ElemTypes[1], rt, v) return reflect.MapOf(kt, vt) case types.UnionKind: // Visit union types to raise potential errors for _, ut := range nt.Desc.(types.CompoundDesc).ElemTypes { getGoTypeForNomsType(ut, rt, v) } return emptyInterface // case types.StructKind: // reflect.StructOf was not added until Go 1.7 default: panic(&UnmarshalTypeMismatchError{Value: v, Type: rt}) } } func shouldMapDecodeFromSet(rt reflect.Type, tags nomsTags) bool { // map[T]struct{} `noms:,"set"` return tags.set && rt.Elem().Kind() == reflect.Struct && rt.Elem().NumField() == 0 } ================================================ FILE: go/marshal/decode_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package marshal import ( "bytes" "errors" "fmt" "math" "reflect" "regexp" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestDecode(tt *testing.T) { assert := assert.New(tt) vs := newTestValueStore() defer vs.Close() t := func(v types.Value, ptr interface{}, expected interface{}) { p := reflect.ValueOf(ptr) assert.Equal(reflect.Ptr, p.Type().Kind()) err := Unmarshal(v, p.Interface()) assert.NoError(err) if expectedValue, ok := expected.(types.Value); ok { assert.True(expectedValue.Equals(p.Elem().Interface().(types.Value))) } else { assert.Equal(expected, p.Elem().Interface()) } // Also test that types.Value is passed through var v2 types.Value err = Unmarshal(v, &v2) assert.NoError(err) assert.True(v.Equals(v2)) } for _, n := range []float32{0, 42, 3.14159265359, math.MaxFloat32} { var f32 float32 t(types.Number(n), &f32, float32(n)) } for _, n := range []float64{0, 42, 3.14159265359, math.MaxFloat64} { var f64 float64 t(types.Number(n), &f64, float64(n)) } for _, n := range []int8{0, 42, math.MaxInt8} { var i8 int8 t(types.Number(n), &i8, int8(n)) } for _, n := range []int16{0, 42, math.MaxInt16} { var i16 int16 t(types.Number(n), &i16, int16(n)) } for _, n := range []int32{0, 42, math.MaxInt32} { var i32 int32 t(types.Number(n), &i32, int32(n)) } // int is at least int32 for _, n := range []int{0, 42, math.MaxInt32} { var i int t(types.Number(n), &i, int(n)) } // There is precision loss for values above Math.pow(2, 53) - 1 for _, n := range []int64{0, 42, int64(math.Pow(2, 53) - 1)} { var i64 int64 t(types.Number(n), &i64, int64(n)) } for _, n := range []uint8{0, 42, math.MaxUint8} { var ui8 uint8 t(types.Number(n), &ui8, uint8(n)) } for _, n := range []uint16{0, 42, math.MaxUint16} { var ui16 uint16 t(types.Number(n), &ui16, uint16(n)) } for _, n := range []uint32{0, 42, math.MaxInt32} { var ui32 uint32 t(types.Number(n), &ui32, uint32(n)) } // uint is at least uint32 for _, n := range []uint{0, 42, math.MaxInt32} { var ui uint t(types.Number(n), &ui, uint(n)) } // There is precision loss for values above Math.pow(2, 53) - 1 for _, n := range []uint64{0, 42, uint64(math.Pow(2, 53) - 1)} { var ui64 uint64 t(types.Number(n), &ui64, uint64(n)) } var b bool t(types.Bool(true), &b, true) t(types.Bool(false), &b, false) for _, s := range []string{"", "s", "hello", "💩"} { var s2 string t(types.String(s), &s2, s) } var list types.List list2 := types.NewList(vs, types.Number(42)) t(list2, &list, list2) var m types.Map map2 := types.NewMap(vs, types.Number(42), types.String("Hi")) t(map2, &m, map2) var set types.Set set2 := types.NewSet(vs, types.String("Bye")) t(set2, &set, set2) var blob types.Blob blob2 := types.NewBlob(vs, bytes.NewBufferString("hello")) t(blob2, &blob, blob2) type TestStruct struct { B bool A float64 C string } var ts TestStruct t(types.NewStruct("TestStruct", types.StructData{ "b": types.Bool(true), "a": types.Number(42), "c": types.String("hi"), }), &ts, TestStruct{true, 42, "hi"}) // again to test the caching t(types.NewStruct("TestStruct", types.StructData{ "b": types.Bool(false), "a": types.Number(555), "c": types.String("hello"), }), &ts, TestStruct{false, 555, "hello"}) var as struct { X int32 Y bool } t(types.NewStruct("", types.StructData{ "y": types.Bool(true), "x": types.Number(42), }), &as, struct { X int32 Y bool }{ 42, true, }) // extra fields type T3 struct { B string } var t3 T3 t(types.NewStruct("T3", types.StructData{ "b": types.String("abc"), "a": types.Number(42), }), &t3, T3{"abc"}) // Case of struct name is not relevant when unmarshalling. type aBc struct { E bool } var t4 aBc t(types.NewStruct("abc", types.StructData{ "e": types.Bool(true), }), &t4, aBc{true}) t(types.NewStruct("Abc", types.StructData{ "e": types.Bool(false), }), &t4, aBc{false}) // Name of struct is irrelevant to unmarshalling structs. type SomeOtherName struct { A int } var t5 SomeOtherName t(types.NewStruct("aeiou", types.StructData{ "a": types.Number(42), }), &t5, SomeOtherName{42}) var t6 SomeOtherName t(types.NewStruct("SomeOtherName", types.StructData{ "a": types.Number(42), }), &t6, SomeOtherName{42}) var t7 struct { A int } t(types.NewStruct("SomeOtherName", types.StructData{ "a": types.Number(42), }), &t7, struct{ A int }{42}) } func TestDecodeStructWithNomsValue(t *testing.T) { // This is split out of TestDecode because we cannot use testify Equal // on a go struct with a field that is a Noms value. vs := newTestValueStore() defer vs.Close() type TestStruct struct { B bool A float64 C string } type T2 struct { Abc TestStruct Def types.List } v := types.NewStruct("T2", types.StructData{ "abc": types.NewStruct("TestStruct", types.StructData{ "a": types.Number(1), "b": types.Bool(false), "c": types.String("bye"), }), "def": types.NewList(vs, types.Number(42)), }) var t2 T2 MustUnmarshal(v, &t2) assert.IsType(t, T2{}, t2) assert.Equal(t, TestStruct{false, 1, "bye"}, t2.Abc) assert.True(t, t2.Def.Equals(types.NewList(vs, types.Number(42)))) } func TestDecodeNilPointer(t *testing.T) { var x *bool assertDecodeErrorMessage(t, types.Bool(true), x, "Cannot unmarshal into Go nil pointer of type *bool") } func TestDecodeNonPointer(t *testing.T) { b := true assertDecodeErrorMessage(t, types.Bool(true), b, "Cannot unmarshal into Go non pointer of type bool") } func TestDecodeNil(t *testing.T) { err := Unmarshal(types.Bool(true), nil) assert.Error(t, err) assert.Equal(t, "Cannot unmarshal into Go nil value", err.Error()) } func newTestValueStore() *types.ValueStore { st := &chunks.TestStorage{} return types.NewValueStore(st.NewView()) } func TestDecodeTypeMismatch(t *testing.T) { vs := newTestValueStore() defer vs.Close() var b bool assertDecodeErrorMessage(t, types.Number(42), &b, "Cannot unmarshal Number into Go value of type bool") var blob types.Blob assertDecodeErrorMessage(t, types.NewList(vs), &blob, "Cannot unmarshal List<> into Go value of type types.Blob") type S struct { X int } var s S assertDecodeErrorMessage(t, types.String("hi!"), &s, "Cannot unmarshal String into Go value of type marshal.S, expected struct") assertDecodeErrorMessage(t, types.NewStruct("S", types.StructData{ "x": types.String("hi"), }), &s, "Cannot unmarshal String into Go value of type int") } func assertDecodeErrorMessage(t *testing.T, v types.Value, ptr interface{}, msg string) { p := reflect.ValueOf(ptr) err := Unmarshal(v, p.Interface()) assert.Error(t, err) assert.Equal(t, msg, err.Error()) } func TestDecodeInvalidTypes(tt *testing.T) { t := func(p interface{}, ts string) { assertDecodeErrorMessage(tt, types.Number(42), p, "Type is not supported, type: "+ts) } var ptr *bool t(&ptr, "*bool") var c chan bool t(&c, "chan bool") type Nested struct { X *bool } var n Nested t(&n, "*bool") } func TestDecodeOverflows(tt *testing.T) { t := func(p interface{}, n float64, ts string) { assertDecodeErrorMessage(tt, types.Number(n), p, fmt.Sprintf("Cannot unmarshal Number into Go value of type %s (%g does not fit in %s)", ts, n, ts)) } var ui8 uint8 t(&ui8, 256, "uint8") t(&ui8, -1, "uint8") var ui16 uint16 t(&ui16, math.Pow(2, 16), "uint16") t(&ui16, -1, "uint16") var ui32 uint32 t(&ui32, math.Pow(2, 32), "uint32") t(&ui32, -1, "uint32") var i8 int8 t(&i8, 128, "int8") t(&i8, -128-1, "int8") var i16 int16 t(&i16, math.Pow(2, 15), "int16") t(&i16, -math.Pow(2, 15)-1, "int16") var i32 int32 t(&i32, math.Pow(2, 31), "int32") t(&i32, -math.Pow(2, 31)-1, "int32") } func TestDecodeMissingField(t *testing.T) { type S struct { A int32 B bool } var s S assertDecodeErrorMessage(t, types.NewStruct("S", types.StructData{ "a": types.Number(42), }), &s, "Cannot unmarshal Struct S {\n a: Number,\n} into Go value of type marshal.S, missing field \"b\"") } func TestDecodeEmbeddedStruct(tt *testing.T) { assert := assert.New(tt) type EmbeddedStruct struct { X int } type TestStruct struct { EmbeddedStruct } var ts TestStruct err := Unmarshal(types.NewStruct("S", types.StructData{ "x": types.Number(1), }), &ts) assert.NoError(err) assert.Equal(TestStruct{EmbeddedStruct{1}}, ts) type OuterTest struct { Y bool TestStruct } var ts2 OuterTest err = Unmarshal(types.NewStruct("S", types.StructData{ "x": types.Number(2), "y": types.Bool(true), }), &ts2) assert.NoError(err) assert.Equal(OuterTest{true, TestStruct{EmbeddedStruct{2}}}, ts2) } func TestDecodeEmbeddedStructSkip(tt *testing.T) { assert := assert.New(tt) type EmbeddedStruct struct { X int } type TestStruct struct { EmbeddedStruct `noms:"-"` Y int } ts := TestStruct{EmbeddedStruct: EmbeddedStruct{42}} err := Unmarshal(types.NewStruct("S", types.StructData{ "y": types.Number(2), }), &ts) assert.NoError(err) assert.Equal(TestStruct{EmbeddedStruct{42}, 2}, ts) } func TestDecodeEmbeddedStructNamed(tt *testing.T) { assert := assert.New(tt) type EmbeddedStruct struct { X int } type TestStruct struct { EmbeddedStruct `noms:"em"` Y int } ts := TestStruct{EmbeddedStruct: EmbeddedStruct{42}} err := Unmarshal(types.NewStruct("S", types.StructData{ "em": types.NewStruct("S", types.StructData{ "x": types.Number(1), }), "y": types.Number(2), }), &ts) assert.NoError(err) assert.Equal(TestStruct{EmbeddedStruct{1}, 2}, ts) } func TestDecodeEmbeddedStructOriginal(tt *testing.T) { assert := assert.New(tt) type EmbeddedStruct struct { X int O types.Struct `noms:",original"` } type TestStruct struct { EmbeddedStruct } var ts TestStruct nomsStruct := types.NewStruct("S", types.StructData{ "x": types.Number(1), }) err := Unmarshal(nomsStruct, &ts) assert.NoError(err) expected := TestStruct{ EmbeddedStruct: EmbeddedStruct{ X: 1, O: nomsStruct, }, } assert.Equal(expected, ts) } func TestDecodeNonExportedField(tt *testing.T) { type TestStruct struct { x int } var ts TestStruct assertDecodeErrorMessage(tt, types.String("hi"), &ts, "Non exported fields are not supported, type: marshal.TestStruct") } func TestDecodeTaggingSkip(t *testing.T) { assert := assert.New(t) type S struct { A int32 `noms:"-"` B bool } var s S err := Unmarshal(types.NewStruct("S", types.StructData{ "b": types.Bool(true), }), &s) assert.NoError(err) assert.Equal(S{0, true}, s) var s2 S Unmarshal(types.NewStruct("S", types.StructData{ "a": types.Number(42), "b": types.Bool(true), }), &s2) assert.Equal(S{0, true}, s2) s3 := S{555, true} err = Unmarshal(types.NewStruct("S", types.StructData{ "a": types.Number(42), "b": types.Bool(false), }), &s3) assert.NoError(err) assert.Equal(S{555, false}, s3) } func TestDecodeNamedFields(t *testing.T) { assert := assert.New(t) type S struct { Aaa int `noms:"a"` Bbb bool `noms:"B"` Ccc string } var s S err := Unmarshal(types.NewStruct("S", types.StructData{ "a": types.Number(42), "B": types.Bool(true), "ccc": types.String("Hi"), }), &s) assert.NoError(err) assert.Equal(S{42, true, "Hi"}, s) } func TestDecodeInvalidNamedFields(t *testing.T) { type S struct { A int `noms:"1a"` } var s S assertDecodeErrorMessage(t, types.NewStruct("S", types.StructData{ "a": types.Number(42), }), &s, "Invalid struct field name: 1a") } func TestDecodeInvalidNomsType(t *testing.T) { vs := newTestValueStore() defer vs.Close() type S struct { A types.List } var s S assertDecodeErrorMessage(t, types.NewStruct("S", types.StructData{ "a": types.NewMap(vs, types.String("A"), types.Number(1)), }), &s, "Cannot unmarshal Map into Go value of type types.List") } func TestDecodeNomsTypePtr(t *testing.T) { assert := assert.New(t) testUnmarshal := func(v types.Value, dest interface{}, expected interface{}) { err := Unmarshal(v, dest) assert.NoError(err) assert.Equal(expected, dest) } type S struct{ Type *types.Type } var s S primitive := types.StringType testUnmarshal(types.NewStruct("S", types.StructData{"type": primitive}), &s, &S{primitive}) complex := types.MakeStructType("Complex", types.StructField{ Name: "stuff", Type: types.StringType, }, ) testUnmarshal(types.NewStruct("S", types.StructData{"type": complex}), &s, &S{complex}) } func ExampleUnmarshal() { type Person struct { Given string Male bool } var rickon Person err := Unmarshal(types.NewStruct("Person", types.StructData{ "given": types.String("Rickon"), "male": types.Bool(true), }), &rickon) if err != nil { fmt.Println(err) return } fmt.Printf("Given: %s, Male: %t\n", rickon.Given, rickon.Male) // Output: Given: Rickon, Male: true } func TestDecodeSlice(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var s []string err := Unmarshal(types.NewList(vs, types.String("a"), types.String("b"), types.String("c")), &s) assert.NoError(err) assert.Equal([]string{"a", "b", "c"}, s) err = Unmarshal(types.NewSet(vs, types.String("a"), types.String("b"), types.String("c")), &s) assert.NoError(err) assert.Equal([]string{"a", "b", "c"}, s) } func TestDecodeSliceEmpty(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var s []string err := Unmarshal(types.NewList(vs), &s) assert.NoError(err) assert.Equal([]string(nil), s) err = Unmarshal(types.NewSet(vs), &s) assert.NoError(err) assert.Equal([]string(nil), s) s2 := []string{} err = Unmarshal(types.NewList(vs), &s2) assert.NoError(err) assert.Equal([]string{}, s2) err = Unmarshal(types.NewSet(vs), &s2) assert.NoError(err) assert.Equal([]string{}, s2) } func TestDecodeSliceReuse(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() s := []string{"A", "B", "C", "D"} s2 := s[1:3] err := Unmarshal(types.NewList(vs, types.String("a"), types.String("b")), &s) assert.NoError(err) assert.Equal([]string{"a", "b"}, s) assert.Equal([]string{"b", "C"}, s2) err = Unmarshal(types.NewSet(vs, types.String("a"), types.String("b")), &s) assert.NoError(err) assert.Equal([]string{"a", "b"}, s) assert.Equal([]string{"b", "C"}, s2) } func TestDecodeArray(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() s := [3]string{"", "", ""} err := Unmarshal(types.NewList(vs, types.String("a"), types.String("b"), types.String("c")), &s) assert.NoError(err) assert.Equal([3]string{"a", "b", "c"}, s) err = Unmarshal(types.NewSet(vs, types.String("a"), types.String("b"), types.String("c")), &s) assert.NoError(err) assert.Equal([3]string{"a", "b", "c"}, s) } func TestDecodeArrayEmpty(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var s [0]string err := Unmarshal(types.NewList(vs), &s) assert.NoError(err) assert.Equal([0]string{}, s) err = Unmarshal(types.NewSet(vs), &s) assert.NoError(err) assert.Equal([0]string{}, s) } func TestDecodeStructWithSlice(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { List []int } var s S err := Unmarshal(types.NewStruct("S", types.StructData{ "list": types.NewList(vs, types.Number(1), types.Number(2), types.Number(3)), }), &s) assert.NoError(err) assert.Equal(S{[]int{1, 2, 3}}, s) err = Unmarshal(types.NewStruct("S", types.StructData{ "list": types.NewSet(vs, types.Number(1), types.Number(2), types.Number(3)), }), &s) assert.NoError(err) assert.Equal(S{[]int{1, 2, 3}}, s) } func TestDecodeStructWithArrayOfNomsValue(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { List [1]types.Set } var s S err := Unmarshal(types.NewStruct("S", types.StructData{ "list": types.NewList(vs, types.NewSet(vs, types.Bool(true))), }), &s) assert.NoError(err) assert.Equal(S{[1]types.Set{types.NewSet(vs, types.Bool(true))}}, s) } func TestDecodeWrongArrayLength(t *testing.T) { vs := newTestValueStore() defer vs.Close() var l [2]string assertDecodeErrorMessage(t, types.NewList(vs, types.String("hi")), &l, "Cannot unmarshal List into Go value of type [2]string, length does not match") } func TestDecodeWrongArrayType(t *testing.T) { vs := newTestValueStore() defer vs.Close() var l [1]string assertDecodeErrorMessage(t, types.NewList(vs, types.Number(1)), &l, "Cannot unmarshal Number into Go value of type string") } func TestDecodeWrongSliceType(t *testing.T) { vs := newTestValueStore() defer vs.Close() var l []string assertDecodeErrorMessage(t, types.NewList(vs, types.Number(1)), &l, "Cannot unmarshal Number into Go value of type string") } func TestDecodeSliceWrongNomsType(t *testing.T) { vs := newTestValueStore() defer vs.Close() var l []string assertDecodeErrorMessage(t, types.NewMap(vs, types.String("a"), types.Number(1)), &l, "Cannot unmarshal Map into Go value of type []string") } func TestDecodeArrayWrongNomsType(t *testing.T) { vs := newTestValueStore() defer vs.Close() var l [1]string assertDecodeErrorMessage(t, types.NewMap(vs, types.String("a"), types.Number(1)), &l, "Cannot unmarshal Map into Go value of type [1]string") } func TestDecodeRecursive(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type Node struct { Value int Children []Node } v := types.NewStruct("Node", types.StructData{ "children": types.NewList( vs, types.NewStruct("Node", types.StructData{ "children": types.NewList(vs), "value": types.Number(2), }), types.NewStruct("Node", types.StructData{ "children": types.NewList(vs), "value": types.Number(3), }), ), "value": types.Number(1), }) var n Node err := Unmarshal(v, &n) assert.NoError(err) assert.Equal(Node{ 1, []Node{ {2, nil}, {3, nil}, }, }, n) } func TestDecodeMap(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var m map[string]int testMap := types.NewMap( vs, types.String("a"), types.Number(1), types.String("b"), types.Number(2), types.String("c"), types.Number(3)) expectedMap := map[string]int{"a": 1, "b": 2, "c": 3} err := Unmarshal(testMap, &m) assert.NoError(err) assert.Equal(expectedMap, m) m = map[string]int{"b": 2, "c": 333} err = Unmarshal(types.NewMap( vs, types.String("a"), types.Number(1), types.String("c"), types.Number(3)), &m) assert.NoError(err) assert.Equal(expectedMap, m) type S struct { N string } var m2 map[S]bool err = Unmarshal(types.NewMap( vs, types.NewStruct("S", types.StructData{"n": types.String("Yes")}), types.Bool(true), types.NewStruct("S", types.StructData{"n": types.String("No")}), types.Bool(false)), &m2) assert.NoError(err) assert.Equal(map[S]bool{S{"Yes"}: true, S{"No"}: false}, m2) } func TestDecodeMapEmpty(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var m map[string]int err := Unmarshal(types.NewMap(vs), &m) assert.NoError(err) assert.Equal(map[string]int(nil), m) m2 := map[string]int{} err = Unmarshal(types.NewMap(vs), &m2) assert.NoError(err) assert.Equal(map[string]int{}, m2) } func TestDecodeMapWrongNomsType(t *testing.T) { vs := newTestValueStore() defer vs.Close() var m map[string]int assertDecodeErrorMessage(t, types.NewList(vs, types.String("a"), types.Number(1)), &m, "Cannot unmarshal List into Go value of type map[string]int") } func TestDecodeOntoInterface(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var i interface{} err := Unmarshal(types.Number(1), &i) assert.NoError(err) assert.Equal(float64(1), i) err = Unmarshal(types.String("abc"), &i) assert.NoError(err) assert.Equal("abc", i) err = Unmarshal(types.Bool(true), &i) assert.NoError(err) assert.Equal(true, i) err = Unmarshal(types.NewList(vs, types.String("abc")), &i) assert.NoError(err) assert.Equal([]string{"abc"}, i) err = Unmarshal(types.NewMap(vs, types.String("abc"), types.Number(1)), &i) assert.NoError(err) assert.Equal(map[string]float64{"abc": float64(1)}, i) err = Unmarshal(types.NewList(vs, types.String("a"), types.Bool(true), types.Number(42)), &i) assert.NoError(err) assert.Equal([]interface{}{"a", true, float64(42)}, i) err = Unmarshal(types.NewMap(vs, types.String("a"), types.Bool(true), types.Number(42), types.NewList(vs)), &i) assert.NoError(err) assert.Equal(map[interface{}]interface{}{"a": true, float64(42): []interface{}(nil)}, i) } func TestDecodeOntoNonSupportedInterface(t *testing.T) { type I interface { M() int } var i I assertDecodeErrorMessage(t, types.Number(1), &i, "Type is not supported, type: marshal.I") } func TestDecodeOntoInterfaceStruct(t *testing.T) { // Not implemented because it requires Go 1.7. var i interface{} assertDecodeErrorMessage(t, types.NewStruct("", types.StructData{}), &i, "Cannot unmarshal Struct {} into Go value of type interface {}") } func TestDecodeSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type T struct { A map[int]struct{} `noms:",set"` B map[int]struct{} C map[string]struct{} `noms:",set"` D map[string]struct{} E []int F []int `noms:",set"` G []int } ns := types.NewStruct("T", types.StructData{ "a": types.NewSet(vs, types.Number(0), types.Number(1), types.Number(2)), "b": types.NewMap(vs, types.Number(3), types.EmptyStruct, types.Number(4), types.EmptyStruct, types.Number(5), types.EmptyStruct), "c": types.NewSet(vs, types.String("0"), types.String("1"), types.String("2")), "d": types.NewMap(vs, types.String("3"), types.EmptyStruct, types.String("4"), types.EmptyStruct, types.String("5"), types.EmptyStruct), "e": types.NewSet(vs, types.Number(6), types.Number(7), types.Number(8)), "f": types.NewSet(vs, types.Number(9), types.Number(10), types.Number(11)), "g": types.NewList(vs, types.Number(12), types.Number(13), types.Number(14)), }) gs := T{} assert.NoError(Unmarshal(ns, &gs)) assert.Equal(T{ A: map[int]struct{}{0: {}, 1: {}, 2: {}}, B: map[int]struct{}{3: {}, 4: {}, 5: {}}, C: map[string]struct{}{"0": {}, "1": {}, "2": {}}, D: map[string]struct{}{"3": {}, "4": {}, "5": {}}, E: []int{6, 7, 8}, F: []int{9, 10, 11}, G: []int{12, 13, 14}, }, gs) ns2 := types.NewStruct("T", types.StructData{ "a": types.NewSet(vs), "b": types.NewMap(vs), "c": types.NewSet(vs), "d": types.NewMap(vs), "e": types.NewSet(vs), "f": types.NewSet(vs), "g": types.NewList(vs), }) gs2 := T{ A: map[int]struct{}{}, } assert.NoError(Unmarshal(ns2, &gs2)) assert.Equal(T{ A: map[int]struct{}{}, }, gs2) } func TestDecodeOpt(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() tc := []struct { in types.Value opt Opt onto interface{} wantValue interface{} wantError string }{ { types.NewSet(vs, types.String("a"), types.String("b")), Opt{}, &[]string{}, &[]string{"a", "b"}, "", }, { types.NewSet(vs, types.String("a"), types.String("b")), Opt{Set: true}, &[]string{}, &[]string{"a", "b"}, "", }, { types.NewSet(vs, types.String("a"), types.String("b")), Opt{Set: true}, &map[string]struct{}{}, &map[string]struct{}{"a": struct{}{}, "b": struct{}{}}, "", }, { types.NewSet(vs, types.String("a"), types.String("b")), Opt{}, &map[string]struct{}{}, &map[string]struct{}{}, "Cannot unmarshal Set into Go value of type map[string]struct {}, field missing \"set\" tag", }, } for _, t := range tc { err := UnmarshalOpt(t.in, t.opt, t.onto) assert.Equal(t.wantValue, t.onto) if t.wantError == "" { assert.Nil(err) } else { assert.Equal(t.wantError, err.Error()) } } } func TestDecodeNamedSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type T struct { A map[int]struct{} `noms:"foo,set"` } ns := types.NewStruct("T", types.StructData{ "a": types.NewSet(vs, types.Number(0)), "foo": types.NewSet(vs, types.Number(1)), }) gs := T{} assert.NoError(Unmarshal(ns, &gs)) assert.Equal(T{ map[int]struct{}{1: {}}, }, gs) } func TestDecodeSetWrongMapType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type T1 struct { A map[int]int `noms:",set"` } err := Unmarshal(types.NewStruct("T1", types.StructData{ "a": types.NewSet(vs, types.Number(0)), }), &T1{}) assert.Error(err) assert.Equal("Cannot unmarshal Set into Go value of type map[int]int", err.Error()) type T2 struct { A map[int]struct{} } err = Unmarshal(types.NewStruct("T2", types.StructData{ "a": types.NewSet(vs, types.Number(0)), }), &T2{}) assert.Error(err) assert.Equal(`Cannot unmarshal Set into Go value of type map[int]struct {}, field missing "set" tag`, err.Error()) type T3 struct { A map[int]struct{} `noms:",set"` } err = Unmarshal(types.NewStruct("T3", types.StructData{ "a": types.NewMap(vs, types.Number(0), types.EmptyStruct), }), &T3{}) assert.Error(err) assert.Equal(`Cannot unmarshal Map into Go value of type map[int]struct {}, field has "set" tag`, err.Error()) } func TestDecodeOmitEmpty(t *testing.T) { assert := assert.New(t) type S struct { Foo int `noms:",omitempty"` Bar struct { Baz int Hotdog int `noms:",omitempty"` } } expected := S{ Bar: struct { Baz int Hotdog int `noms:",omitempty"` }{ Baz: 42, }, } var actual S err := Unmarshal(types.NewStruct("S", types.StructData{ "bar": types.NewStruct("", types.StructData{ "baz": types.Number(42), }), }), &actual) assert.NoError(err) assert.Equal(expected, actual) } func TestDecodeOriginal(t *testing.T) { assert := assert.New(t) type S struct { Foo int `noms:",omitempty"` Bar types.Struct `noms:",original"` Baz types.Struct `noms:",original"` } input := types.NewStruct("S", types.StructData{ "foo": types.Number(42), }) expected := S{ Foo: 42, Bar: input, Baz: input, } var actual S err := Unmarshal(input, &actual) assert.NoError(err) assert.True(expected.Bar.Equals(actual.Bar)) } func TestDecodeOriginalReceiveTypeError(t *testing.T) { assert := assert.New(t) type S struct { Foo types.Value `noms:",original"` } input := types.NewStruct("S", types.StructData{}) var actual S err := Unmarshal(input, &actual) assert.Error(err) assert.Equal(`Cannot unmarshal Struct S {} into Go value of type marshal.S, field with tag "original" must have type Struct`, err.Error()) } func TestDecodeCanSkipUnexportedField(t *testing.T) { assert := assert.New(t) type S struct { Abc int notExported bool `noms:"-"` } var s S err := Unmarshal(types.NewStruct("S", types.StructData{ "abc": types.Number(42), }), &s) assert.NoError(err) assert.Equal(S{42, false}, s) } func (u *primitiveType) UnmarshalNoms(v types.Value) error { *u = primitiveType(v.(types.Number) - 1) return nil } func TestUnmarshalerPrimitiveType(t *testing.T) { assert := assert.New(t) v := types.Number(43) u := primitiveType(0) assert.NoError(Unmarshal(v, &u)) assert.Equal(primitiveType(42), u) } func (u *primitiveSliceType) UnmarshalNoms(v types.Value) error { sv := string(v.(types.String)) spl := strings.Split(sv, ",") *u = make(primitiveSliceType, len(spl)) for i, s := range spl { (*u)[i] = s } return nil } func TestUnmarshalerPrimitiveSliceType(t *testing.T) { assert := assert.New(t) v := types.String("a,b,c") u := primitiveSliceType{} assert.NoError(Unmarshal(v, &u)) assert.Equal(primitiveSliceType{"a", "b", "c"}, u) } func (u *primitiveMapType) UnmarshalNoms(v types.Value) error { *u = primitiveMapType{} v.(types.Set).IterAll(func(v types.Value) { sv := v.(types.String) spl := strings.Split(string(sv), ",") d.PanicIfFalse(len(spl) == 2) (*u)[spl[0]] = spl[1] }) return nil } func TestUnmarshalerPrimitiveMapType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() v := types.NewSet(vs, types.String("a,foo"), types.String("b,bar")) u := primitiveMapType{} assert.NoError(Unmarshal(v, &u)) assert.Equal(primitiveMapType(map[string]string{ "a": "foo", "b": "bar", }), u) } func (u *primitiveStructType) UnmarshalNoms(v types.Value) error { n := int(v.(types.Number)) u.x = n / 3 u.y = n % 3 return nil } func TestUnmarshalerPrimitiveStructType(t *testing.T) { assert := assert.New(t) v := types.Number(10) u := primitiveStructType{} assert.NoError(Unmarshal(v, &u)) assert.Equal(primitiveStructType{3, 1}, u) } func (u *builtinType) UnmarshalNoms(v types.Value) error { sv := v.(types.String) *u = builtinType(*regexp.MustCompile(string(sv))) return nil } func TestUnmarshalerBuiltinType(t *testing.T) { assert := assert.New(t) s := "[a-z]+$" v := types.String(s) u := builtinType{} assert.NoError(Unmarshal(v, &u)) r := regexp.Regexp(u) assert.Equal(s, r.String()) } func (u *wrappedMarshalerType) UnmarshalNoms(v types.Value) error { n := v.(types.Number) *u = wrappedMarshalerType(int(n) - 2) return nil } func TestUnmarshalerWrappedMarshalerType(t *testing.T) { assert := assert.New(t) v := types.Number(44) u := wrappedMarshalerType(0) assert.NoError(Unmarshal(v, &u)) assert.Equal(wrappedMarshalerType(42), u) } func TestUnmarshalerComplexStructType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() s := "foo|bar" r := regexp.MustCompile(s) v := types.NewStruct("TestComplexStructType", types.StructData{ "p": types.Number(43), "ps": types.NewList(vs, types.Number(2), types.Number(3)), "pm": types.NewMap(vs, types.String("x"), types.Number(101), types.String("y"), types.Number(102)), "pslice": types.String("a,b,c"), "pmap": types.NewSet(vs, types.String("c,123"), types.String("d,456")), "pstruct": types.Number(5), "b": types.String(s), }) u := TestComplexStructType{} assert.NoError(Unmarshal(v, &u)) assert.Equal(TestComplexStructType{ P: 42, Ps: []primitiveType{1, 2}, Pm: map[string]primitiveType{ "x": 100, "y": 101, }, Pslice: primitiveSliceType{"a", "b", "c"}, Pmap: primitiveMapType{ "c": "123", "d": "456", }, Pstruct: primitiveStructType{1, 2}, B: builtinType(*r), }, u) } func (u *returnsMarshalerError) UnmarshalNoms(v types.Value) error { // Can't use u.err because an empty returnsMarshalerError is created for each // call to UnmarshalNoms. return errors.New("foo bar baz") } func (u panicsMarshaler) UnmarshalNoms(v types.Value) error { panic("panic") } func TestUnmarshalerError(t *testing.T) { assert := assert.New(t) m1 := returnsMarshalerError{} err := Unmarshal(types.EmptyStruct, &m1) assert.Equal(errors.New("foo bar baz"), err) m2 := panicsMarshaler{} assert.Panics(func() { Unmarshal(types.EmptyStruct, &m2) }) } type notPointer struct { x int } func (u notPointer) UnmarshalNoms(v types.Value) error { u.x++ return nil } func TestUnmarshalNomsNotPointerDoesNotShareState(t *testing.T) { assert := assert.New(t) u := notPointer{0} assert.NoError(Unmarshal(types.EmptyStruct, &u)) assert.NoError(Unmarshal(types.EmptyStruct, &u)) assert.NoError(Unmarshal(types.EmptyStruct, &u)) assert.Equal(notPointer{0}, u) } func TestUnmarshalMustUnmarshal(t *testing.T) { a := assert.New(t) vs := newTestValueStore() defer vs.Close() type TestStruct struct{ F1 int } v := MustMarshal(vs, types.Number(1)) var out TestStruct a.Panics(func() { MustUnmarshal(v, &out) }) v = MustMarshal(vs, TestStruct{2}) a.NotPanics(func() { MustUnmarshal(v, &out) }) } ================================================ FILE: go/marshal/encode.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package marshal implements encoding and decoding of Noms values. The mapping // between Noms objects and Go values is described in the documentation for the // Marshal and Unmarshal functions. package marshal import ( "fmt" "reflect" "sort" "strings" "sync" "github.com/attic-labs/noms/go/types" ) // Marshal converts a Go value to a Noms value. // // Marshal traverses the value v recursively. Marshal uses the following // type-dependent encodings: // // Boolean values are encoded as Noms types.Bool. // // Floating point and integer values are encoded as Noms types.Number. At the // moment this might lead to some loss in precision because types.Number // currently takes a float64. // // String values are encoded as Noms types.String. // // Slices and arrays are encoded as Noms types.List by default. If a // field is tagged with `noms:"set", it will be encoded as Noms types.Set // instead. // // Maps are encoded as Noms types.Map, or a types.Set if the value type is // struct{} and the field is tagged with `noms:"set"`. // // Struct values are encoded as Noms structs (types.Struct). Each exported Go // struct field becomes a member of the Noms struct unless // - The field's tag is "-" // - The field is empty and its tag specifies the "omitempty" option. // - The field has the "original" tag, in which case the field is used as an // initial value onto which the fields of the Go type are added. When // combined with the corresponding support for "original" in Unmarshal(), // this allows one to find and modify any values of a known subtype. // // Additionally, user-defined types can implement the Marshaler interface to // provide a custom encoding. // // The empty values are false, 0, any nil pointer or interface value, and any // array, slice, map, or string of length zero. // // The Noms struct default field name is the Go struct field name where the // first character is lower cased, but can be specified in the Go struct field's // tag value. The "noms" key in the Go struct field's tag value is the field // name. Examples: // // // Field is ignored. // Field int `noms:"-"` // // // Field appears in a Noms struct as field "myName". // MyName int // // // Field appears in a Noms struct as key "myName". // Field int `noms:"myName"` // // // Field appears in a Noms struct as key "myName" and the field is // // omitted from the object if its value is empty, as defined above. // Field int `noms:"myName,omitempty" // // // Field appears in a Noms struct as key "field" and the field is // // omitted from the object if its value is empty, as defined above. // Field int `noms:",omitempty" // // The name of the Noms struct is the name of the Go struct where the first // character is changed to upper case. You can also implement the // StructNameMarshaler interface to get more control over the actual struct // name. // // Anonymous struct fields are usually marshaled as if their inner exported // fields were fields in the outer struct, subject to the usual Go visibility. // An anonymous struct field with a name given in its Noms tag is treated as // having that name, rather than being anonymous. // // Noms values (values implementing types.Value) are copied over without any // change. // // When marshalling interface{} the dynamic type is used. // // Go pointers, complex, function are not supported. Attempting to encode such a // value causes Marshal to return an UnsupportedTypeError. func Marshal(vrw types.ValueReadWriter, v interface{}) (types.Value, error) { return MarshalOpt(vrw, v, Opt{}) } // MarshalOpt is like Marshal but provides additional options. func MarshalOpt(vrw types.ValueReadWriter, v interface{}, opt Opt) (nomsValue types.Value, err error) { defer func() { if r := recover(); r != nil { switch r := r.(type) { case *UnsupportedTypeError, *InvalidTagError: err = r.(error) case *marshalNomsError: err = r.err default: panic(r) } } }() nomsValue = MustMarshalOpt(vrw, v, opt) return } // MustMarshal marshals a Go value to a Noms value using the same rules as // Marshal(). Panics on failure. func MustMarshal(vrw types.ValueReadWriter, v interface{}) types.Value { return MustMarshalOpt(vrw, v, Opt{}) } // MustMarshalOpt is like MustMarshal, but with additional options. func MustMarshalOpt(vrw types.ValueReadWriter, v interface{}, opt Opt) types.Value { rv := reflect.ValueOf(v) nt := nomsTags{ set: opt.Set, } encoder := typeEncoder(rv.Type(), map[string]reflect.Type{}, nt) return encoder(rv, vrw) } // Marshaler is an interface types can implement to provide their own encoding. type Marshaler interface { // MarshalNoms returns the Noms Value encoding of a type, or an error. // nil is not a valid return val - if both val and err are nil, Marshal will // panic. MarshalNoms(vrw types.ValueReadWriter) (val types.Value, err error) } // StructNameMarshaler is an interface that can be implemented to define the // name of a Noms struct. type StructNameMarshaler interface { MarshalNomsStructName() string } // UnsupportedTypeError is returned by encode when attempting to encode a type // that isn't supported. type UnsupportedTypeError struct { Type reflect.Type Message string } func (e *UnsupportedTypeError) Error() string { msg := e.Message if msg == "" { msg = "Type is not supported" } return msg + ", type: " + e.Type.String() } // InvalidTagError is returned by encode and decode when the struct field tag is // invalid. For example if the field name is not a valid Noms struct field name. type InvalidTagError struct { message string } func (e *InvalidTagError) Error() string { return e.message } // marshalNomsError wraps errors from Marshaler.MarshalNoms. These should be // unwrapped and never leak to the caller of Marshal. type marshalNomsError struct { err error } func (e *marshalNomsError) Error() string { return e.err.Error() } type Opt struct { // Marshal []T or map[T]struct{} to Set, or Unmarhsal Set to map[T]struct{}. Set bool } type nomsTags struct { name string omitEmpty bool original bool set bool skip bool hasName bool } var nomsValueInterface = reflect.TypeOf((*types.Value)(nil)).Elem() var emptyInterface = reflect.TypeOf((*interface{})(nil)).Elem() var marshalerInterface = reflect.TypeOf((*Marshaler)(nil)).Elem() var structNameMarshalerInterface = reflect.TypeOf((*StructNameMarshaler)(nil)).Elem() type encoderFunc func(v reflect.Value, vrw types.ValueReadWriter) types.Value func boolEncoder(v reflect.Value, vrw types.ValueReadWriter) types.Value { return types.Bool(v.Bool()) } func float64Encoder(v reflect.Value, vrw types.ValueReadWriter) types.Value { return types.Number(v.Float()) } func intEncoder(v reflect.Value, vrw types.ValueReadWriter) types.Value { return types.Number(float64(v.Int())) } func uintEncoder(v reflect.Value, vrw types.ValueReadWriter) types.Value { return types.Number(float64(v.Uint())) } func stringEncoder(v reflect.Value, vrw types.ValueReadWriter) types.Value { return types.String(v.String()) } func nomsValueEncoder(v reflect.Value, vrw types.ValueReadWriter) types.Value { return v.Interface().(types.Value) } func marshalerEncoder(t reflect.Type) encoderFunc { return func(v reflect.Value, vrw types.ValueReadWriter) types.Value { val, err := v.Interface().(Marshaler).MarshalNoms(vrw) if err != nil { panic(&marshalNomsError{err}) } if val == nil { panic(fmt.Errorf("nil result from %s.MarshalNoms", t.String())) } return val } } func typeEncoder(t reflect.Type, seenStructs map[string]reflect.Type, tags nomsTags) encoderFunc { if t.Implements(marshalerInterface) { return marshalerEncoder(t) } switch t.Kind() { case reflect.Bool: return boolEncoder case reflect.Float64, reflect.Float32: return float64Encoder case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: return intEncoder case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: return uintEncoder case reflect.String: return stringEncoder case reflect.Struct: return structEncoder(t, seenStructs) case reflect.Slice, reflect.Array: if shouldEncodeAsSet(t, tags) { return setFromListEncoder(t, seenStructs) } return listEncoder(t, seenStructs) case reflect.Map: if shouldEncodeAsSet(t, tags) { return setEncoder(t, seenStructs) } return mapEncoder(t, seenStructs) case reflect.Interface: return func(v reflect.Value, vrw types.ValueReadWriter) types.Value { // Get the dynamic type. v2 := reflect.ValueOf(v.Interface()) return typeEncoder(v2.Type(), seenStructs, tags)(v2, vrw) } case reflect.Ptr: // Allow implementations of types.Value (like *types.Type) if t.Implements(nomsValueInterface) { return nomsValueEncoder } fallthrough default: panic(&UnsupportedTypeError{Type: t}) } } func getStructName(t reflect.Type) string { if t.Implements(structNameMarshalerInterface) { v := reflect.Zero(t) return v.Interface().(StructNameMarshaler).MarshalNomsStructName() } return strings.Title(t.Name()) } func structEncoder(t reflect.Type, seenStructs map[string]reflect.Type) encoderFunc { if t.Implements(nomsValueInterface) { return nomsValueEncoder } e := encoderCache.get(t) if e != nil { return e } structName := getStructName(t) seenStructs[t.Name()] = t fields, knownShape, originalFieldIndex := typeFields(t, seenStructs, false, false) if knownShape { fieldNames := make([]string, len(fields)) for i, f := range fields { fieldNames[i] = f.name } structTemplate := types.MakeStructTemplate(structName, fieldNames) e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { values := make(types.ValueSlice, len(fields)) for i, f := range fields { values[i] = f.encoder(v.FieldByIndex(f.index), vrw) } return structTemplate.NewStruct(values) } } else if originalFieldIndex == nil { // Slower path: cannot precompute the Noms type since there are Noms collections, // but at least there are a set number of fields. e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { data := make(types.StructData, len(fields)) for _, f := range fields { fv := v.FieldByIndex(f.index) if !fv.IsValid() || f.omitEmpty && isEmptyValue(fv) { continue } data[f.name] = f.encoder(fv, vrw) } return types.NewStruct(structName, data) } } else { // Slowest path - we are extending some other struct. We need to start with the // type of that struct and extend. e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { fv := v.FieldByIndex(originalFieldIndex) ret := fv.Interface().(types.Struct) if ret.IsZeroValue() { ret = types.NewStruct(structName, nil) } for _, f := range fields { fv := v.FieldByIndex(f.index) if !fv.IsValid() || f.omitEmpty && isEmptyValue(fv) { continue } ret = ret.Set(f.name, f.encoder(fv, vrw)) } return ret } } encoderCache.set(t, e) return e } func isEmptyValue(v reflect.Value) bool { switch v.Kind() { case reflect.Array, reflect.Map, reflect.Slice, reflect.String: return v.Len() == 0 case reflect.Bool: return !v.Bool() case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: return v.Int() == 0 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: return v.Uint() == 0 case reflect.Float32, reflect.Float64: return v.Float() == 0 case reflect.Struct: z := reflect.Zero(v.Type()) return reflect.DeepEqual(z.Interface(), v.Interface()) case reflect.Interface: return v.IsNil() } return false } type field struct { name string encoder encoderFunc index []int nomsType *types.Type omitEmpty bool } type fieldSlice []field func (fs fieldSlice) Len() int { return len(fs) } func (fs fieldSlice) Swap(i, j int) { fs[i], fs[j] = fs[j], fs[i] } func (fs fieldSlice) Less(i, j int) bool { return fs[i].name < fs[j].name } type encoderCacheT struct { sync.RWMutex m map[reflect.Type]encoderFunc } var encoderCache = &encoderCacheT{} // Separate Set encoder cache because the same type with and without the // `noms:",set"` tag encode differently (Set vs Map). var setEncoderCache = &encoderCacheT{} func (c *encoderCacheT) get(t reflect.Type) encoderFunc { c.RLock() defer c.RUnlock() return c.m[t] } func (c *encoderCacheT) set(t reflect.Type, e encoderFunc) { c.Lock() defer c.Unlock() if c.m == nil { c.m = map[reflect.Type]encoderFunc{} } c.m[t] = e } func getTags(f reflect.StructField) (tags nomsTags) { reflectTags := f.Tag.Get("noms") if reflectTags == "-" { tags.skip = true return } tagsSlice := strings.Split(reflectTags, ",") // The first tag is always the name, or empty to use the field as the name. if len(tagsSlice) == 0 || tagsSlice[0] == "" { tags.name = strings.ToLower(f.Name[:1]) + f.Name[1:] } else { tags.name = tagsSlice[0] tags.hasName = true } if !types.IsValidStructFieldName(tags.name) { panic(&InvalidTagError{"Invalid struct field name: " + tags.name}) } for i := 1; i < len(tagsSlice); i++ { switch tag := tagsSlice[i]; tag { case "omitempty": tags.omitEmpty = true case "original": tags.original = true case "set": tags.set = true default: panic(&InvalidTagError{"Unrecognized tag: " + tag}) } } return } func validateField(f reflect.StructField, t reflect.Type) { // PkgPath is the package path that qualifies a lower case (unexported) // field name. It is empty for upper case (exported) field names. // See https://golang.org/ref/spec#Uniqueness_of_identifiers if f.PkgPath != "" && !f.Anonymous { // unexported panic(&UnsupportedTypeError{t, "Non exported fields are not supported"}) } } func typeFields(t reflect.Type, seenStructs map[string]reflect.Type, computeType, embedded bool) (fields fieldSlice, knownShape bool, originalFieldIndex []int) { knownShape = true for i := 0; i < t.NumField(); i++ { index := make([]int, 1) index[0] = i f := t.Field(i) tags := getTags(f) if tags.skip { continue } if tags.original { originalFieldIndex = f.Index continue } if f.Anonymous && f.PkgPath == "" && !tags.hasName { embeddedFields, embeddedKnownShape, embeddedOriginalFieldIndex := typeFields(f.Type, seenStructs, computeType, true) if embeddedOriginalFieldIndex != nil { originalFieldIndex = append(index, embeddedOriginalFieldIndex...) } knownShape = knownShape && embeddedKnownShape for _, ef := range embeddedFields { ef.index = append(index, ef.index...) fields = append(fields, ef) } continue } var nt *types.Type validateField(f, t) if computeType { nt = encodeType(f.Type, seenStructs, tags) if nt == nil { knownShape = false } } if tags.omitEmpty && !computeType { knownShape = false } fields = append(fields, field{ name: tags.name, encoder: typeEncoder(f.Type, seenStructs, tags), index: index, nomsType: nt, omitEmpty: tags.omitEmpty, }) } if !embedded { sort.Sort(fields) } // If embedded then the fields gets sorted once we return to the caller. return } func listEncoder(t reflect.Type, seenStructs map[string]reflect.Type) encoderFunc { e := encoderCache.get(t) if e != nil { return e } var elemEncoder encoderFunc // lock e until encoder(s) are initialized var init sync.RWMutex init.Lock() defer init.Unlock() e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { init.RLock() defer init.RUnlock() values := make([]types.Value, v.Len()) for i := 0; i < v.Len(); i++ { values[i] = elemEncoder(v.Index(i), vrw) } return types.NewList(vrw, values...) } encoderCache.set(t, e) elemEncoder = typeEncoder(t.Elem(), seenStructs, nomsTags{}) return e } // Encode set from array or slice func setFromListEncoder(t reflect.Type, seenStructs map[string]reflect.Type) encoderFunc { e := setEncoderCache.get(t) if e != nil { return e } var elemEncoder encoderFunc // lock e until encoder(s) are initialized var init sync.RWMutex init.Lock() defer init.Unlock() e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { init.RLock() defer init.RUnlock() values := make([]types.Value, v.Len()) for i := 0; i < v.Len(); i++ { values[i] = elemEncoder(v.Index(i), vrw) } return types.NewSet(vrw, values...) } setEncoderCache.set(t, e) elemEncoder = typeEncoder(t.Elem(), seenStructs, nomsTags{}) return e } func setEncoder(t reflect.Type, seenStructs map[string]reflect.Type) encoderFunc { e := setEncoderCache.get(t) if e != nil { return e } var encoder encoderFunc // lock e until encoder(s) are initialized var init sync.RWMutex init.Lock() defer init.Unlock() e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { init.RLock() defer init.RUnlock() values := make([]types.Value, v.Len(), v.Len()) for i, k := range v.MapKeys() { values[i] = encoder(k, vrw) } return types.NewSet(vrw, values...) } setEncoderCache.set(t, e) encoder = typeEncoder(t.Key(), seenStructs, nomsTags{}) return e } func mapEncoder(t reflect.Type, seenStructs map[string]reflect.Type) encoderFunc { e := encoderCache.get(t) if e != nil { return e } var keyEncoder encoderFunc var valueEncoder encoderFunc // lock e until encoder(s) are initialized var init sync.RWMutex init.Lock() defer init.Unlock() e = func(v reflect.Value, vrw types.ValueReadWriter) types.Value { init.RLock() defer init.RUnlock() keys := v.MapKeys() kvs := make([]types.Value, 2*len(keys)) for i, k := range keys { kvs[2*i] = keyEncoder(k, vrw) kvs[2*i+1] = valueEncoder(v.MapIndex(k), vrw) } return types.NewMap(vrw, kvs...) } encoderCache.set(t, e) keyEncoder = typeEncoder(t.Key(), seenStructs, nomsTags{}) valueEncoder = typeEncoder(t.Elem(), seenStructs, nomsTags{}) return e } func shouldEncodeAsSet(t reflect.Type, tags nomsTags) bool { switch t.Kind() { case reflect.Slice, reflect.Array: return tags.set case reflect.Map: // map[T]struct{} `noms:,"set"` return tags.set && t.Elem().Kind() == reflect.Struct && t.Elem().NumField() == 0 default: panic(fmt.Errorf("called with unexpected kind %v", t.Kind())) } } ================================================ FILE: go/marshal/encode_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package marshal import ( "bytes" "errors" "fmt" "math" "regexp" "strings" "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestEncode(tt *testing.T) { vs := newTestValueStore() defer vs.Close() t := func(exp types.Value, v interface{}) { actual, err := Marshal(vs, v) assert.NoError(tt, err) assert.True(tt, exp.Equals(actual)) // Encode again for fallthrough actual2, err := Marshal(vs, actual) assert.NoError(tt, err) assert.True(tt, exp.Equals(actual2)) } for _, n := range []float32{0, 42, 3.14159265359, math.MaxFloat32} { t(types.Number(n), n) t(types.Number(-n), -n) } for _, n := range []float64{0, 42, 3.14159265359, 9007199254740991, math.MaxFloat64} { t(types.Number(n), n) t(types.Number(-n), -n) } for _, n := range []int8{0, 42, math.MaxInt8} { t(types.Number(n), n) t(types.Number(-n), -n) } for _, n := range []int16{0, 42, math.MaxInt16} { t(types.Number(n), n) t(types.Number(-n), -n) } for _, n := range []int32{0, 42, math.MaxInt32} { t(types.Number(n), n) t(types.Number(-n), -n) } // int is at least int32 for _, n := range []int{0, 42, math.MaxInt32} { t(types.Number(n), n) t(types.Number(-n), -n) } for _, n := range []int64{0, 42, math.MaxInt64} { t(types.Number(n), n) t(types.Number(-n), -n) } for _, n := range []uint8{0, 42, math.MaxUint8} { t(types.Number(n), n) } for _, n := range []uint16{0, 42, math.MaxUint16} { t(types.Number(n), n) } for _, n := range []uint32{0, 42, math.MaxUint32} { t(types.Number(n), n) } // uint is at least uint32 for _, n := range []uint{0, 42, math.MaxUint32} { t(types.Number(n), n) } for _, n := range []uint64{0, 42, math.MaxUint64} { t(types.Number(n), n) } t(types.Bool(true), true) t(types.Bool(false), false) for _, s := range []string{"", "s", "hello", "💩"} { t(types.String(s), s) } t(types.NewList(vs, types.Number(42)), types.NewList(vs, types.Number(42))) t(types.NewMap(vs, types.Number(42), types.String("hi")), types.NewMap(vs, types.Number(42), types.String("hi"))) t(types.NewSet(vs, types.String("bye")), types.NewSet(vs, types.String("bye"))) t(types.NewBlob(vs, bytes.NewBufferString("hello")), types.NewBlob(vs, bytes.NewBufferString("hello"))) type TestStruct struct { Str string Num float64 } t(types.NewStruct("TestStruct", types.StructData{ "num": types.Number(42), "str": types.String("Hello"), }), TestStruct{Str: "Hello", Num: 42}) // Same again to test caching t(types.NewStruct("TestStruct", types.StructData{ "num": types.Number(1), "str": types.String("Bye"), }), TestStruct{Str: "Bye", Num: 1}) anonStruct := struct { B bool }{ true, } t(types.NewStruct("", types.StructData{ "b": types.Bool(true), }), anonStruct) type TestNestedStruct struct { A types.List B TestStruct C float64 } t(types.NewStruct("TestNestedStruct", types.StructData{ "a": types.NewList(vs, types.String("hi")), "b": types.NewStruct("TestStruct", types.StructData{ "str": types.String("bye"), "num": types.Number(5678), }), "c": types.Number(1234), }), TestNestedStruct{ A: types.NewList(vs, types.String("hi")), B: TestStruct{ Str: "bye", Num: 5678, }, C: 1234, }) type testStruct struct { Str string Num float64 } t(types.NewStruct("TestStruct", types.StructData{ "num": types.Number(42), "str": types.String("Hello"), }), testStruct{Str: "Hello", Num: 42}) } func assertEncodeErrorMessage(t *testing.T, v interface{}, expectedMessage string) { vs := newTestValueStore() defer vs.Close() _, err := Marshal(vs, v) assert.Error(t, err) assert.Equal(t, expectedMessage, err.Error()) } func TestInvalidTypes(t *testing.T) { assertEncodeErrorMessage(t, make(chan int), "Type is not supported, type: chan int") x := 42 assertEncodeErrorMessage(t, &x, "Type is not supported, type: *int") } func TestEncodeEmbeddedStructSkip(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type EmbeddedStruct struct { X int } type TestStruct struct { EmbeddedStruct `noms:"-"` Y int } s := TestStruct{EmbeddedStruct{1}, 2} v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("TestStruct", types.StructData{ "y": types.Number(2), }).Equals(v)) } func TestEncodeEmbeddedStructWithName(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type EmbeddedStruct struct { X int } type TestStruct struct { EmbeddedStruct `noms:"em"` Y int } s := TestStruct{EmbeddedStruct{1}, 2} v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("TestStruct", types.StructData{ "em": types.NewStruct("EmbeddedStruct", types.StructData{ "x": types.Number(1), }), "y": types.Number(2), }).Equals(v)) } func TestEncodeEmbeddedStruct(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type EmbeddedStruct struct { X int } type TestStruct struct { EmbeddedStruct } s := TestStruct{EmbeddedStruct{1}} v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("TestStruct", types.StructData{ "x": types.Number(1), }).Equals(v)) type TestOuter struct { A int TestStruct B int } s2 := TestOuter{0, TestStruct{EmbeddedStruct{1}}, 2} v2, err := Marshal(vs, s2) assert.NoError(err) assert.True(types.NewStruct("TestOuter", types.StructData{ "a": types.Number(0), "b": types.Number(2), "x": types.Number(1), }).Equals(v2)) } func TestEncodeEmbeddedStructOriginal(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type EmbeddedStruct struct { X int O types.Struct `noms:",original"` B bool } type TestStruct struct { EmbeddedStruct } s := TestStruct{ EmbeddedStruct: EmbeddedStruct{ X: 1, B: true, }, } v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("TestStruct", types.StructData{ "b": types.Bool(true), "x": types.Number(1), }).Equals(v)) } func TestEncodeNonExportedField(t *testing.T) { type TestStruct struct { x int } assertEncodeErrorMessage(t, TestStruct{1}, "Non exported fields are not supported, type: marshal.TestStruct") } func TestEncodeTaggingSkip(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { Abc int `noms:"-"` Def bool } s := S{42, true} v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{ "def": types.Bool(true), }).Equals(v)) } func TestEncodeNamedFields(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { Aaa int `noms:"a"` Bbb bool `noms:"B"` Ccc string } s := S{42, true, "Hi"} v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{ "a": types.Number(42), "B": types.Bool(true), "ccc": types.String("Hi"), }).Equals(v)) } func TestEncodeInvalidNamedFields(t *testing.T) { type S struct { A int `noms:"1a"` } assertEncodeErrorMessage(t, S{42}, "Invalid struct field name: 1a") } func TestEncodeOmitEmpty(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { String string `noms:",omitempty"` Bool bool `noms:",omitempty"` Int int `noms:",omitempty"` Int8 int8 `noms:",omitempty"` Int16 int16 `noms:",omitempty"` Int32 int32 `noms:",omitempty"` Int64 int64 `noms:",omitempty"` Uint uint `noms:",omitempty"` Uint8 uint8 `noms:",omitempty"` Uint16 uint16 `noms:",omitempty"` Uint32 uint32 `noms:",omitempty"` Uint64 uint64 `noms:",omitempty"` Float32 float32 `noms:",omitempty"` Float64 float64 `noms:",omitempty"` } s := S{ String: "s", Bool: true, Int: 1, Int8: 1, Int16: 1, Int32: 1, Int64: 1, Uint: 1, Uint8: 1, Uint16: 1, Uint32: 1, Uint64: 1, Float32: 1, Float64: 1, } v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{ "string": types.String("s"), "bool": types.Bool(true), "int": types.Number(1), "int8": types.Number(1), "int16": types.Number(1), "int32": types.Number(1), "int64": types.Number(1), "uint": types.Number(1), "uint8": types.Number(1), "uint16": types.Number(1), "uint32": types.Number(1), "uint64": types.Number(1), "float32": types.Number(1), "float64": types.Number(1), }).Equals(v)) s2 := S{ String: "", Bool: false, Int: 0, Int8: 0, Int16: 0, Int32: 0, Int64: 0, Uint: 0, Uint8: 0, Uint16: 0, Uint32: 0, Uint64: 0, Float32: 0, Float64: 0, } v2, err := Marshal(vs, s2) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{}).Equals(v2)) type S2 struct { Slice []int `noms:",omitempty"` Map map[int]int `noms:",omitempty"` } s3 := S2{ Slice: []int{0}, Map: map[int]int{0: 0}, } v3, err := Marshal(vs, s3) assert.NoError(err) assert.True(types.NewStruct("S2", types.StructData{ "slice": types.NewList(vs, types.Number(0)), "map": types.NewMap(vs, types.Number(0), types.Number(0)), }).Equals(v3)) s4 := S2{ Slice: []int{}, Map: map[int]int{}, } v4, err := Marshal(vs, s4) assert.NoError(err) assert.True(types.NewStruct("S2", types.StructData{}).Equals(v4)) s5 := S2{ Slice: nil, Map: nil, } v5, err := Marshal(vs, s5) assert.NoError(err) assert.True(types.NewStruct("S2", types.StructData{}).Equals(v5)) type S3 struct { List types.List `noms:",omitempty"` Value types.Value `noms:",omitempty"` } s6 := S3{ List: types.NewList(vs), Value: types.Number(0), } v6, err := Marshal(vs, s6) assert.NoError(err) assert.True(types.NewStruct("S3", types.StructData{ "list": types.NewList(vs), "value": types.Number(0), }).Equals(v6)) s7 := S3{ List: types.List{}, Value: nil, } v7, err := Marshal(vs, s7) assert.NoError(err) assert.True(types.NewStruct("S3", types.StructData{}).Equals(v7)) // Both name and omitempty type S4 struct { X int `noms:"y,omitempty"` } s8 := S4{ X: 1, } v8, err := Marshal(vs, s8) assert.NoError(err) assert.True(types.NewStruct("S4", types.StructData{ "y": types.Number(1), }).Equals(v8)) s9 := S4{ X: 0, } v9, err := Marshal(vs, s9) assert.NoError(err) assert.True(types.NewStruct("S4", types.StructData{}).Equals(v9)) } func ExampleMarshal() { vs := newTestValueStore() defer vs.Close() type Person struct { Given string Male bool } arya, err := Marshal(vs, Person{"Arya", false}) if err != nil { fmt.Println(err) return } fmt.Printf("Given: %s, Male: %t\n", arya.(types.Struct).Get("given").(types.String), arya.(types.Struct).Get("male").(types.Bool)) // Output: Given: Arya, Male: false } func TestEncodeSlice(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() v, err := Marshal(vs, []string{"a", "b", "c"}) assert.NoError(err) assert.True(types.NewList(vs, types.String("a"), types.String("b"), types.String("c")).Equals(v)) } func TestEncodeArray(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() v, err := Marshal(vs, [3]int{1, 2, 3}) assert.NoError(err) assert.True(types.NewList(vs, types.Number(1), types.Number(2), types.Number(3)).Equals(v)) } func TestEncodeStructWithSlice(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { List []int } v, err := Marshal(vs, S{[]int{1, 2, 3}}) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{ "list": types.NewList(vs, types.Number(1), types.Number(2), types.Number(3)), }).Equals(v)) } func TestEncodeStructWithArrayOfNomsValue(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { List [1]types.Set } v, err := Marshal(vs, S{[1]types.Set{types.NewSet(vs, types.Bool(true))}}) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{ "list": types.NewList(vs, types.NewSet(vs, types.Bool(true))), }).Equals(v)) } func TestEncodeNomsTypePtr(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() testMarshal := func(g interface{}, expected types.Value) { v, err := Marshal(vs, g) assert.NoError(err) assert.Equal(expected, v) } type S struct { Type *types.Type } primitive := types.StringType testMarshal(S{primitive}, types.NewStruct("S", types.StructData{"type": primitive})) complex := types.MakeStructType("Complex", types.StructField{ Name: "stuff", Type: types.StringType, }, ) testMarshal(S{complex}, types.NewStruct("S", types.StructData{"type": complex})) } func TestEncodeRecursive(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type Node struct { Value int Children []Node } v, err := Marshal(vs, Node{ 1, []Node{ {2, []Node{}}, {3, []Node(nil)}, }, }) assert.NoError(err) typ := types.MakeStructType("Node", types.StructField{ Name: "children", Type: types.MakeListType(types.MakeCycleType("Node")), }, types.StructField{ Name: "value", Type: types.NumberType, }, ) assert.True(typ.Equals(types.TypeOf(v))) assert.True(types.NewStruct("Node", types.StructData{ "children": types.NewList( vs, types.NewStruct("Node", types.StructData{ "children": types.NewList(vs), "value": types.Number(2), }), types.NewStruct("Node", types.StructData{ "children": types.NewList(vs), "value": types.Number(3), }), ), "value": types.Number(1), }).Equals(v)) } func TestEncodeMap(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() v, err := Marshal(vs, map[string]int{"a": 1, "b": 2, "c": 3}) assert.NoError(err) assert.True(types.NewMap( vs, types.String("a"), types.Number(1), types.String("b"), types.Number(2), types.String("c"), types.Number(3)).Equals(v)) type S struct { N string } v, err = Marshal(vs, map[S]bool{S{"Yes"}: true, S{"No"}: false}) assert.NoError(err) assert.True(types.NewMap( vs, types.NewStruct("S", types.StructData{"n": types.String("Yes")}), types.Bool(true), types.NewStruct("S", types.StructData{"n": types.String("No")}), types.Bool(false)).Equals(v)) v, err = Marshal(vs, map[string]int(nil)) assert.NoError(err) assert.True(types.NewMap(vs).Equals(v)) v, err = Marshal(vs, map[string]int{}) assert.NoError(err) assert.True(types.NewMap(vs).Equals(v)) } func TestEncodeInterface(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() var i interface{} i = []string{"a", "b"} v, err := Marshal(vs, i) assert.NoError(err) assert.True(types.NewList(vs, types.String("a"), types.String("b")).Equals(v)) i = map[interface{}]interface{}{"a": true, struct{ Name string }{"b"}: 42} v, err = Marshal(vs, i) assert.NoError(err) assert.True(types.NewMap( vs, types.String("a"), types.Bool(true), types.NewStruct("", types.StructData{"name": types.String("b")}), types.Number(42), ).Equals(v)) } func TestEncodeSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() v, err := Marshal(vs, struct { A map[int]struct{} `noms:",set"` B map[int]struct{} C map[int]string `noms:",set"` D map[string]struct{} `noms:",set"` E map[string]struct{} F map[string]int `noms:",set"` G []int `noms:",set"` H string `noms:",set"` }{ map[int]struct{}{0: {}, 1: {}, 2: {}}, map[int]struct{}{3: {}, 4: {}, 5: {}}, map[int]string{}, map[string]struct{}{"A": {}, "B": {}, "C": {}}, map[string]struct{}{"D": {}, "E": {}, "F": {}}, map[string]int{}, []int{1, 2, 3}, "", }) assert.NoError(err) s, ok := v.(types.Struct) assert.True(ok) expect := map[string]types.NomsKind{ "a": types.SetKind, "b": types.MapKind, "c": types.MapKind, "d": types.SetKind, "e": types.MapKind, "f": types.MapKind, "g": types.SetKind, "h": types.StringKind, } for fieldName, kind := range expect { assert.Equal(kind, s.Get(fieldName).Kind()) } // Test both the Set values are correct, and that the equivalent typed Map // are correct in case the Set marshaling interferes with it. a := s.Get("a").(types.Set) assert.True(a.Has(types.Number(0))) assert.True(a.Has(types.Number(1))) assert.True(a.Has(types.Number(2))) b := s.Get("b").(types.Map) assert.True(b.Has(types.Number(3))) assert.True(b.Has(types.Number(4))) assert.True(b.Has(types.Number(5))) d := s.Get("d").(types.Set) assert.True(d.Has(types.String("A"))) assert.True(d.Has(types.String("B"))) assert.True(d.Has(types.String("C"))) e := s.Get("e").(types.Map) assert.True(e.Has(types.String("D"))) assert.True(e.Has(types.String("E"))) assert.True(e.Has(types.String("F"))) g := s.Get("g").(types.Set) assert.True(g.Has(types.Number(1))) assert.True(g.Has(types.Number(2))) assert.True(g.Has(types.Number(3))) } func TestEncodeOpt(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() tc := []struct { in interface{} opt Opt wantValue types.Value }{ { []string{"a", "b"}, Opt{}, types.NewList(vs, types.String("a"), types.String("b")), }, { []string{"a", "b"}, Opt{Set: true}, types.NewSet(vs, types.String("a"), types.String("b")), }, { map[string]struct{}{"a": struct{}{}, "b": struct{}{}}, Opt{}, types.NewMap(vs, types.String("a"), types.NewStruct("", nil), types.String("b"), types.NewStruct("", nil)), }, { map[string]struct{}{"a": struct{}{}, "b": struct{}{}}, Opt{Set: true}, types.NewSet(vs, types.String("a"), types.String("b")), }, } for _, t := range tc { r, err := MarshalOpt(vs, t.in, t.opt) assert.True(t.wantValue.Equals(r)) assert.Nil(err) } } func TestEncodeSetWithTags(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() v, err := Marshal(vs, struct { A map[int]struct{} `noms:"foo,set"` B map[int]struct{} `noms:",omitempty,set"` C map[int]struct{} `noms:"bar,omitempty,set"` }{ A: map[int]struct{}{0: {}, 1: {}}, C: map[int]struct{}{2: {}, 3: {}}, }) assert.NoError(err) s, ok := v.(types.Struct) assert.True(ok) _, ok = s.MaybeGet("a") assert.False(ok) _, ok = s.MaybeGet("b") assert.False(ok) _, ok = s.MaybeGet("c") assert.False(ok) foo, ok := s.Get("foo").(types.Set) assert.True(ok) assert.True(types.NewSet(vs, types.Number(0), types.Number(1)).Equals(foo)) bar, ok := s.Get("bar").(types.Set) assert.True(ok) assert.True(types.NewSet(vs, types.Number(2), types.Number(3)).Equals(bar)) } func TestInvalidTag(t *testing.T) { vs := newTestValueStore() defer vs.Close() _, err := Marshal(vs, struct { F string `noms:",omitEmpty"` }{"F"}) assert.Error(t, err) assert.Equal(t, `Unrecognized tag: omitEmpty`, err.Error()) } func TestEncodeCanSkipUnexportedField(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { Abc int notExported bool `noms:"-"` } s := S{42, true} v, err := Marshal(vs, s) assert.NoError(err) assert.True(types.NewStruct("S", types.StructData{ "abc": types.Number(42), }).Equals(v)) } func TestEncodeOriginal(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { Foo int `noms:",omitempty"` Bar types.Struct `noms:",original"` } var s S var err error var orig types.Struct // New field value clobbers old field value orig = types.NewStruct("S", types.StructData{ "foo": types.Number(42), }) err = Unmarshal(orig, &s) assert.NoError(err) s.Foo = 43 assert.True(MustMarshal(vs, s).Equals(orig.Set("foo", types.Number(43)))) // New field extends old struct orig = types.NewStruct("S", types.StructData{}) err = Unmarshal(orig, &s) assert.NoError(err) s.Foo = 43 assert.True(MustMarshal(vs, s).Equals(orig.Set("foo", types.Number(43)))) // Old struct name always used orig = types.NewStruct("Q", types.StructData{}) err = Unmarshal(orig, &s) assert.NoError(err) s.Foo = 43 assert.True(MustMarshal(vs, s).Equals(orig.Set("foo", types.Number(43)))) // Field type of base are preserved orig = types.NewStruct("S", types.StructData{ "foo": types.Number(42), }) err = Unmarshal(orig, &s) assert.NoError(err) s.Foo = 43 out := MustMarshal(vs, s) assert.True(out.Equals(orig.Set("foo", types.Number(43)))) st2 := types.MakeStructTypeFromFields("S", types.FieldMap{ "foo": types.NumberType, }) assert.True(types.TypeOf(out).Equals(st2)) // It's OK to have an empty original field s = S{ Foo: 42, } assert.True(MustMarshal(vs, s).Equals( types.NewStruct("S", types.StructData{"foo": types.Number(float64(42))}))) } func TestNomsTypes(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() type S struct { Blob types.Blob Bool types.Bool Number types.Number String types.String Type *types.Type } s := S{ Blob: types.NewBlob(vs), Bool: types.Bool(true), Number: types.Number(42), String: types.String("hi"), Type: types.NumberType, } assert.True(MustMarshal(vs, s).Equals( types.NewStruct("S", types.StructData{ "blob": types.NewBlob(vs), "bool": types.Bool(true), "number": types.Number(42), "string": types.String("hi"), "type": types.NumberType, }), )) } type primitiveType int func (t primitiveType) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return types.Number(int(t) + 1), nil } func TestMarshalerPrimitiveType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() u := primitiveType(42) v := MustMarshal(vs, u) assert.Equal(types.Number(43), v) } type primitiveSliceType []string func (u primitiveSliceType) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return types.String(strings.Join(u, ",")), nil } func TestMarshalerPrimitiveSliceType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() u := primitiveSliceType([]string{"a", "b", "c"}) v := MustMarshal(vs, u) assert.Equal(types.String("a,b,c"), v) } type primitiveMapType map[string]string func (u primitiveMapType) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { var vals types.ValueSlice for k, v := range u { vals = append(vals, types.String(k+","+v)) } return types.NewSet(vrw, vals...), nil } func TestMarshalerPrimitiveMapType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() u := primitiveMapType(map[string]string{ "a": "foo", "b": "bar", }) v := MustMarshal(vs, u) assert.True(types.NewSet(vs, types.String("a,foo"), types.String("b,bar")).Equals(v)) } type primitiveStructType struct { x, y int } func (u primitiveStructType) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return types.Number(u.x + u.y), nil } func TestMarshalerPrimitiveStructType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() u := primitiveStructType{1, 2} v := MustMarshal(vs, u) assert.Equal(types.Number(3), v) } type builtinType regexp.Regexp func (u builtinType) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { r := regexp.Regexp(u) return types.String(r.String()), nil } func TestMarshalerBuiltinType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() s := "[a-z]+$" r := regexp.MustCompile(s) u := builtinType(*r) v := MustMarshal(vs, u) assert.Equal(types.String(s), v) } type wrappedMarshalerType primitiveType func (u wrappedMarshalerType) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return types.Number(int(u) + 2), nil } func TestMarshalerWrapperMarshalerType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() u := wrappedMarshalerType(primitiveType(42)) v := MustMarshal(vs, u) assert.Equal(types.Number(44), v) } type TestComplexStructType struct { P primitiveType Ps []primitiveType Pm map[string]primitiveType Pslice primitiveSliceType Pmap primitiveMapType Pstruct primitiveStructType B builtinType } func TestMarshalerComplexStructType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() s := "foo|bar" r := regexp.MustCompile(s) u := TestComplexStructType{ P: 42, Ps: []primitiveType{1, 2}, Pm: map[string]primitiveType{ "x": 100, "y": 101, }, Pslice: primitiveSliceType{"a", "b", "c"}, Pmap: primitiveMapType{ "c": "123", "d": "456", }, Pstruct: primitiveStructType{10, 20}, B: builtinType(*r), } v := MustMarshal(vs, u) assert.True(types.NewStruct("TestComplexStructType", types.StructData{ "p": types.Number(43), "ps": types.NewList(vs, types.Number(2), types.Number(3)), "pm": types.NewMap(vs, types.String("x"), types.Number(101), types.String("y"), types.Number(102)), "pslice": types.String("a,b,c"), "pmap": types.NewSet(vs, types.String("c,123"), types.String("d,456")), "pstruct": types.Number(30), "b": types.String(s), }).Equals(v)) } type returnsMarshalerError struct { err error } func (u returnsMarshalerError) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return nil, u.err } type returnsMarshalerNil struct{} func (u returnsMarshalerNil) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return nil, nil } type panicsMarshaler struct{} func (u panicsMarshaler) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { panic("panic") } func TestMarshalerErrors(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() expErr := errors.New("expected error") m1 := returnsMarshalerError{expErr} _, actErr := Marshal(vs, m1) assert.Equal(expErr, actErr) m2 := returnsMarshalerNil{} assert.Panics(func() { Marshal(vs, m2) }) m3 := panicsMarshaler{} assert.Panics(func() { Marshal(vs, m3) }) } type TestStructWithNameImpl struct { X int } func (ts TestStructWithNameImpl) MarshalNomsStructName() string { return "A" } func TestMarshalStructName(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() ts := TestStructWithNameImpl{ X: 1, } v := MustMarshal(vs, ts) assert.True(types.NewStruct("A", types.StructData{ "x": types.Number(1), }).Equals(v), types.EncodedValue(v)) } type TestStructWithNameImpl2 struct { X int } func (ts TestStructWithNameImpl2) MarshalNomsStructName() string { return "" } func TestMarshalStructName2(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() ts := TestStructWithNameImpl2{ X: 1, } v := MustMarshal(vs, ts) assert.True(types.NewStruct("", types.StructData{ "x": types.Number(1), }).Equals(v), types.EncodedValue(v)) } ================================================ FILE: go/marshal/encode_type.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package marshal implements encoding and decoding of Noms values. The mapping // between Noms objects and Go values is described in the documentation for the // Marshal and Unmarshal functions. package marshal import ( "fmt" "reflect" "github.com/attic-labs/noms/go/types" ) // MarshalType computes a Noms type from a Go type // // The rules for MarshalType is the same as for Marshal, except for omitempty // which leads to an optional field since it depends on the runtime value and // can lead to the property not being present. // // If a Go struct contains a noms tag with original the field is skipped since // the Noms type depends on the original Noms value which is not available. func MarshalType(v interface{}) (nt *types.Type, err error) { return MarshalTypeOpt(v, Opt{}) } // MarshalTypeOpt is like MarshalType but with additional options. func MarshalTypeOpt(v interface{}, opt Opt) (nt *types.Type, err error) { defer func() { if r := recover(); r != nil { switch r := r.(type) { case *UnsupportedTypeError, *InvalidTagError: err = r.(error) case *marshalNomsError: err = r.err default: panic(r) } } }() nt = MustMarshalTypeOpt(v, opt) return } // MustMarshalType computes a Noms type from a Go type or panics if there is an // error. func MustMarshalType(v interface{}) (nt *types.Type) { return MustMarshalTypeOpt(v, Opt{}) } // MustMarshalTypeOpt is like MustMarshalType but provides additional options. func MustMarshalTypeOpt(v interface{}, opt Opt) (nt *types.Type) { rv := reflect.ValueOf(v) tags := nomsTags{ set: opt.Set, } nt = encodeType(rv.Type(), map[string]reflect.Type{}, tags) if nt == nil { panic(&UnsupportedTypeError{Type: rv.Type()}) } return } // TypeMarshaler is an interface types can implement to provide their own // encoding of type. type TypeMarshaler interface { // MarshalNomsType returns the Noms Type encoding of a type, or an error. // nil is not a valid return val - if both val and err are nil, MarshalType // will panic. MarshalNomsType() (t *types.Type, err error) } var typeOfTypesType = reflect.TypeOf((*types.Type)(nil)) var typeMarshalerInterface = reflect.TypeOf((*TypeMarshaler)(nil)).Elem() func encodeType(t reflect.Type, seenStructs map[string]reflect.Type, tags nomsTags) *types.Type { if t.Implements(typeMarshalerInterface) { v := reflect.Zero(t) typ, err := v.Interface().(TypeMarshaler).MarshalNomsType() if err != nil { panic(&marshalNomsError{err}) } if typ == nil { panic(fmt.Errorf("nil result from %s.MarshalNomsType", t)) } return typ } if t.Implements(marshalerInterface) { // There is no way to determine the noms type now. For Marshal it can be // different each time MarshalNoms is called and is handled further up the // stack. err := fmt.Errorf("Cannot marshal type which implements %s, perhaps implement %s for %s", marshalerInterface, typeMarshalerInterface, t) panic(&marshalNomsError{err}) } if t.Implements(nomsValueInterface) { if t == typeOfTypesType { return types.TypeType } // Use Name because List and Blob are convertible to each other on Go. switch t.Name() { case "Blob": return types.BlobType case "Bool": return types.BoolType case "List": return types.MakeListType(types.ValueType) case "Map": return types.MakeMapType(types.ValueType, types.ValueType) case "Number": return types.NumberType case "Ref": return types.MakeRefType(types.ValueType) case "Set": return types.MakeSetType(types.ValueType) case "String": return types.StringType case "Value": return types.ValueType } err := fmt.Errorf("Cannot marshal type %s, it requires type parameters", t) panic(&marshalNomsError{err}) } switch t.Kind() { case reflect.Bool: return types.BoolType case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64: return types.NumberType case reflect.String: return types.StringType case reflect.Struct: return structEncodeType(t, seenStructs) case reflect.Array, reflect.Slice: elemType := encodeType(t.Elem(), seenStructs, nomsTags{}) if elemType == nil { break } if shouldEncodeAsSet(t, tags) { return types.MakeSetType(elemType) } return types.MakeListType(elemType) case reflect.Map: keyType := encodeType(t.Key(), seenStructs, nomsTags{}) if keyType == nil { break } if shouldEncodeAsSet(t, tags) { return types.MakeSetType(keyType) } valueType := encodeType(t.Elem(), seenStructs, nomsTags{}) if valueType != nil { return types.MakeMapType(keyType, valueType) } } // This will be reported as an error at a different layer. return nil } // structEncodeType returns the Noms types.Type if it can be determined from the // reflect.Type. In some cases we cannot determine the type by only looking at // the type but we also need to look at the value. In these cases this returns // nil and we have to wait until we have a value to be able to determine the // type. func structEncodeType(t reflect.Type, seenStructs map[string]reflect.Type) *types.Type { name := getStructName(t) if name != "" { if _, ok := seenStructs[name]; ok { return types.MakeCycleType(name) } seenStructs[name] = t } fields, knownShape, _ := typeFields(t, seenStructs, true, false) var structType *types.Type if knownShape { structTypeFields := make([]types.StructField, len(fields)) for i, fs := range fields { structTypeFields[i] = types.StructField{ Name: fs.name, Type: fs.nomsType, Optional: fs.omitEmpty, } } structType = types.MakeStructType(getStructName(t), structTypeFields...) } return structType } ================================================ FILE: go/marshal/encode_type_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package marshal import ( "errors" "fmt" "reflect" "testing" "github.com/attic-labs/noms/go/nomdl" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestMarshalTypeType(tt *testing.T) { t := func(exp *types.Type, ptr interface{}) { p := reflect.ValueOf(ptr) assert.NotEqual(tt, reflect.Ptr, p.Type().Kind()) actual, err := MarshalType(p.Interface()) assert.NoError(tt, err) assert.NotNil(tt, actual, "%#v", p.Interface()) assert.True(tt, exp.Equals(actual)) } t(types.NumberType, float32(0)) t(types.NumberType, float64(0)) t(types.NumberType, int(0)) t(types.NumberType, int16(0)) t(types.NumberType, int32(0)) t(types.NumberType, int64(0)) t(types.NumberType, int8(0)) t(types.NumberType, uint(0)) t(types.NumberType, uint16(0)) t(types.NumberType, uint32(0)) t(types.NumberType, uint64(0)) t(types.NumberType, uint8(0)) t(types.BoolType, true) t(types.StringType, "hi") var l []int t(types.MakeListType(types.NumberType), l) var m map[uint32]string t(types.MakeMapType(types.NumberType, types.StringType), m) t(types.MakeListType(types.ValueType), types.List{}) t(types.MakeSetType(types.ValueType), types.Set{}) t(types.MakeMapType(types.ValueType, types.ValueType), types.Map{}) t(types.MakeRefType(types.ValueType), types.Ref{}) type TestStruct struct { Str string Num float64 } var str TestStruct t(types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "str": types.StringType, "num": types.NumberType, }), str) // Same again to test caching t(types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "str": types.StringType, "num": types.NumberType, }), str) anonStruct := struct { B bool }{ true, } t(types.MakeStructTypeFromFields("", types.FieldMap{ "b": types.BoolType, }), anonStruct) type TestNestedStruct struct { A []int16 B TestStruct C float64 } var nestedStruct TestNestedStruct t(types.MakeStructTypeFromFields("TestNestedStruct", types.FieldMap{ "a": types.MakeListType(types.NumberType), "b": types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "str": types.StringType, "num": types.NumberType, }), "c": types.NumberType, }), nestedStruct) type testStruct struct { Str string Num float64 } var ts testStruct t(types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "str": types.StringType, "num": types.NumberType, }), ts) } // func assertMarshalTypeErrorMessage(t *testing.T, v interface{}, expectedMessage string) { _, err := MarshalType(v) assert.Error(t, err) assert.Equal(t, expectedMessage, err.Error()) } func TestMarshalTypeInvalidTypes(t *testing.T) { assertMarshalTypeErrorMessage(t, make(chan int), "Type is not supported, type: chan int") } func TestMarshalTypeEmbeddedStruct(t *testing.T) { assert := assert.New(t) type EmbeddedStruct struct { B bool } type TestStruct struct { EmbeddedStruct A int } var s TestStruct typ := MustMarshalType(s) assert.True(types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "a": types.NumberType, "b": types.BoolType, }).Equals(typ)) } func TestMarshalTypeEmbeddedStructSkip(t *testing.T) { assert := assert.New(t) type EmbeddedStruct struct { B bool } type TestStruct struct { EmbeddedStruct `noms:"-"` A int } var s TestStruct typ := MustMarshalType(s) assert.True(types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "a": types.NumberType, }).Equals(typ)) } func TestMarshalTypeEmbeddedStructNamed(t *testing.T) { assert := assert.New(t) type EmbeddedStruct struct { B bool } type TestStruct struct { EmbeddedStruct `noms:"em"` A int } var s TestStruct typ := MustMarshalType(s) assert.True(types.MakeStructTypeFromFields("TestStruct", types.FieldMap{ "a": types.NumberType, "em": types.MakeStructTypeFromFields("EmbeddedStruct", types.FieldMap{ "b": types.BoolType, }), }).Equals(typ)) } func TestMarshalTypeEncodeNonExportedField(t *testing.T) { type TestStruct struct { x int } assertMarshalTypeErrorMessage(t, TestStruct{1}, "Non exported fields are not supported, type: marshal.TestStruct") } func TestMarshalTypeEncodeTaggingSkip(t *testing.T) { assert := assert.New(t) type S struct { Abc int `noms:"-"` Def bool } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructTypeFromFields("S", types.FieldMap{ "def": types.BoolType, }).Equals(typ)) } func TestMarshalTypeNamedFields(t *testing.T) { assert := assert.New(t) type S struct { Aaa int `noms:"a"` Bbb bool `noms:"B"` Ccc string } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructTypeFromFields("S", types.FieldMap{ "a": types.NumberType, "B": types.BoolType, "ccc": types.StringType, }).Equals(typ)) } func TestMarshalTypeInvalidNamedFields(t *testing.T) { type S struct { A int `noms:"1a"` } var s S assertMarshalTypeErrorMessage(t, s, "Invalid struct field name: 1a") } func TestMarshalTypeOmitEmpty(t *testing.T) { assert := assert.New(t) type S struct { String string `noms:",omitempty"` } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructType("S", types.StructField{Name: "string", Type: types.StringType, Optional: true}).Equals(typ)) } func ExampleMarshalType() { type Person struct { Given string Female bool } var person Person personNomsType, err := MarshalType(person) if err != nil { fmt.Println(err) return } fmt.Println(personNomsType.Describe()) // Output: Struct Person { // female: Bool, // given: String, // } } func TestMarshalTypeSlice(t *testing.T) { assert := assert.New(t) s := []string{"a", "b", "c"} typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeListType(types.StringType).Equals(typ)) } func TestMarshalTypeArray(t *testing.T) { assert := assert.New(t) a := [3]int{1, 2, 3} typ, err := MarshalType(a) assert.NoError(err) assert.True(types.MakeListType(types.NumberType).Equals(typ)) } func TestMarshalTypeStructWithSlice(t *testing.T) { assert := assert.New(t) type S struct { List []int } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructTypeFromFields("S", types.FieldMap{ "list": types.MakeListType(types.NumberType), }).Equals(typ)) } func TestMarshalTypeRecursive(t *testing.T) { assert := assert.New(t) type Node struct { Value int Children []Node } var n Node typ, err := MarshalType(n) assert.NoError(err) typ2 := types.MakeStructType("Node", types.StructField{ Name: "children", Type: types.MakeListType(types.MakeCycleType("Node")), }, types.StructField{ Name: "value", Type: types.NumberType, }, ) assert.True(typ2.Equals(typ)) } func TestMarshalTypeMap(t *testing.T) { assert := assert.New(t) var m map[string]int typ, err := MarshalType(m) assert.NoError(err) assert.True(types.MakeMapType(types.StringType, types.NumberType).Equals(typ)) type S struct { N string } var m2 map[S]bool typ, err = MarshalType(m2) assert.NoError(err) assert.True(types.MakeMapType( types.MakeStructTypeFromFields("S", types.FieldMap{ "n": types.StringType, }), types.BoolType).Equals(typ)) } func TestMarshalTypeSet(t *testing.T) { assert := assert.New(t) type S struct { A map[int]struct{} `noms:",set"` B map[int]struct{} C map[int]string `noms:",set"` D map[string]struct{} `noms:",set"` E map[string]struct{} F map[string]int `noms:",set"` G []int `noms:",set"` H string `noms:",set"` } var s S typ, err := MarshalType(s) assert.NoError(err) emptyStructType := types.MakeStructTypeFromFields("", types.FieldMap{}) assert.True(types.MakeStructTypeFromFields("S", types.FieldMap{ "a": types.MakeSetType(types.NumberType), "b": types.MakeMapType(types.NumberType, emptyStructType), "c": types.MakeMapType(types.NumberType, types.StringType), "d": types.MakeSetType(types.StringType), "e": types.MakeMapType(types.StringType, emptyStructType), "f": types.MakeMapType(types.StringType, types.NumberType), "g": types.MakeSetType(types.NumberType), "h": types.StringType, }).Equals(typ)) } func TestEncodeTypeOpt(t *testing.T) { assert := assert.New(t) tc := []struct { in interface{} opt Opt wantType *types.Type }{ { []string{}, Opt{}, types.MakeListType(types.StringType), }, { []string{}, Opt{Set: true}, types.MakeSetType(types.StringType), }, { map[string]struct{}{}, Opt{}, types.MakeMapType(types.StringType, types.MakeStructType("")), }, { map[string]struct{}{}, Opt{Set: true}, types.MakeSetType(types.StringType), }, } for _, t := range tc { r, err := MarshalTypeOpt(t.in, t.opt) assert.True(t.wantType.Equals(r)) assert.Nil(err) } } func TestMarshalTypeSetWithTags(t *testing.T) { assert := assert.New(t) type S struct { A map[int]struct{} `noms:"foo,set"` B map[int]struct{} `noms:",omitempty,set"` C map[int]struct{} `noms:"bar,omitempty,set"` } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructType("S", types.StructField{Name: "foo", Type: types.MakeSetType(types.NumberType), Optional: false}, types.StructField{Name: "b", Type: types.MakeSetType(types.NumberType), Optional: true}, types.StructField{Name: "bar", Type: types.MakeSetType(types.NumberType), Optional: true}, ).Equals(typ)) } func TestMarshalTypeInvalidTag(t *testing.T) { type S struct { F string `noms:",omitEmpty"` } var s S _, err := MarshalType(s) assert.Error(t, err) assert.Equal(t, `Unrecognized tag: omitEmpty`, err.Error()) } func TestMarshalTypeCanSkipUnexportedField(t *testing.T) { assert := assert.New(t) type S struct { Abc int notExported bool `noms:"-"` } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructTypeFromFields("S", types.FieldMap{ "abc": types.NumberType, }).Equals(typ)) } func TestMarshalTypeOriginal(t *testing.T) { assert := assert.New(t) type S struct { Foo int `noms:",omitempty"` Bar types.Struct `noms:",original"` } var s S typ, err := MarshalType(s) assert.NoError(err) assert.True(types.MakeStructType("S", types.StructField{Name: "foo", Type: types.NumberType, Optional: true}, ).Equals(typ)) } func TestMarshalTypeNomsTypes(t *testing.T) { assert := assert.New(t) type S struct { Blob types.Blob Bool types.Bool Number types.Number String types.String Type *types.Type } var s S assert.True(MustMarshalType(s).Equals( types.MakeStructTypeFromFields("S", types.FieldMap{ "blob": types.BlobType, "bool": types.BoolType, "number": types.NumberType, "string": types.StringType, "type": types.TypeType, }), )) } func (t primitiveType) MarshalNomsType() (*types.Type, error) { return types.NumberType, nil } func TestTypeMarshalerPrimitiveType(t *testing.T) { assert := assert.New(t) var u primitiveType typ := MustMarshalType(u) assert.Equal(types.NumberType, typ) } func (u primitiveSliceType) MarshalNomsType() (*types.Type, error) { return types.StringType, nil } func TestTypeMarshalerPrimitiveSliceType(t *testing.T) { assert := assert.New(t) var u primitiveSliceType typ := MustMarshalType(u) assert.Equal(types.StringType, typ) } func (u primitiveMapType) MarshalNomsType() (*types.Type, error) { return types.MakeSetType(types.StringType), nil } func TestTypeMarshalerPrimitiveMapType(t *testing.T) { assert := assert.New(t) var u primitiveMapType typ := MustMarshalType(u) assert.Equal(types.MakeSetType(types.StringType), typ) } func TestTypeMarshalerPrimitiveStructTypeNoMarshalNomsType(t *testing.T) { assert := assert.New(t) var u primitiveStructType _, err := MarshalType(u) assert.Error(err) assert.Equal("Cannot marshal type which implements marshal.Marshaler, perhaps implement marshal.TypeMarshaler for marshal.primitiveStructType", err.Error()) } func (u builtinType) MarshalNomsType() (*types.Type, error) { return types.StringType, nil } func TestTypeMarshalerBuiltinType(t *testing.T) { assert := assert.New(t) var u builtinType typ := MustMarshalType(u) assert.Equal(types.StringType, typ) } func (u wrappedMarshalerType) MarshalNomsType() (*types.Type, error) { return types.NumberType, nil } func TestTypeMarshalerWrapperMarshalerType(t *testing.T) { assert := assert.New(t) var u wrappedMarshalerType typ := MustMarshalType(u) assert.Equal(types.NumberType, typ) } func (u returnsMarshalerError) MarshalNomsType() (*types.Type, error) { return nil, errors.New("expected error") } func (u returnsMarshalerNil) MarshalNomsType() (*types.Type, error) { return nil, nil } func (u panicsMarshaler) MarshalNomsType() (*types.Type, error) { panic("panic") } func TestTypeMarshalerErrors(t *testing.T) { assert := assert.New(t) expErr := errors.New("expected error") var m1 returnsMarshalerError _, actErr := MarshalType(m1) assert.Equal(expErr, actErr) var m2 returnsMarshalerNil assert.Panics(func() { MarshalType(m2) }) var m3 panicsMarshaler assert.Panics(func() { MarshalType(m3) }) } func TestMarshalTypeStructName(t *testing.T) { assert := assert.New(t) var ts TestStructWithNameImpl typ := MustMarshalType(ts) assert.True(types.MakeStructType("A", types.StructField{Name: "x", Type: types.NumberType, Optional: false}).Equals(typ), typ.Describe()) } func TestMarshalTypeStructName2(t *testing.T) { assert := assert.New(t) var ts TestStructWithNameImpl2 typ := MustMarshalType(ts) assert.True(types.MakeStructType("", types.StructField{Name: "x", Type: types.NumberType, Optional: false}).Equals(typ), typ.Describe()) } type OutPhoto struct { Faces []OutFace `noms:",set"` SomeOtherFacesSet []OutFace `noms:",set"` } type OutFace struct { Blob types.Ref } func (f OutFace) MarshalNomsStructName() string { return "Face" } func TestMarshalTypeOutface(t *testing.T) { typ := MustMarshalType(OutPhoto{}) expectedType := nomdl.MustParseType(`Struct OutPhoto { faces: Set, }>, someOtherFacesSet: Set>, }`) assert.True(t, typ.Equals(expectedType)) } ================================================ FILE: go/merge/candidate.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "fmt" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // candidate represents a collection that is a candidate to be merged. This // interface exists to wrap Maps, Sets and Structs with a common API so that // threeWayOrderedSequenceMerge() can remain agnostic to which kind of // collections it's actually working with. type candidate interface { diff(parent candidate, change chan<- types.ValueChanged, stop <-chan struct{}) get(k types.Value) types.Value pathConcat(change types.ValueChanged, path types.Path) (out types.Path) getValue() types.Value } type mapCandidate struct { m types.Map } func (mc mapCandidate) diff(p candidate, change chan<- types.ValueChanged, stop <-chan struct{}) { mc.m.Diff(p.(mapCandidate).m, change, stop) } func (mc mapCandidate) get(k types.Value) types.Value { return mc.m.Get(k) } func (mc mapCandidate) pathConcat(change types.ValueChanged, path types.Path) (out types.Path) { out = append(out, path...) if kind := change.Key.Kind(); kind == types.BoolKind || kind == types.StringKind || kind == types.NumberKind { out = append(out, types.NewIndexPath(change.Key)) } else { out = append(out, types.NewHashIndexPath(change.Key.Hash())) } return } func (mc mapCandidate) getValue() types.Value { return mc.m } type setCandidate struct { s types.Set } func (sc setCandidate) diff(p candidate, change chan<- types.ValueChanged, stop <-chan struct{}) { sc.s.Diff(p.(setCandidate).s, change, stop) } func (sc setCandidate) get(k types.Value) types.Value { return k } func (sc setCandidate) pathConcat(change types.ValueChanged, path types.Path) (out types.Path) { out = append(out, path...) if kind := change.Key.Kind(); kind == types.BoolKind || kind == types.StringKind || kind == types.NumberKind { out = append(out, types.NewIndexPath(change.Key)) } else { out = append(out, types.NewHashIndexPath(change.Key.Hash())) } return } func (sc setCandidate) getValue() types.Value { return sc.s } type structCandidate struct { s types.Struct } func (sc structCandidate) diff(p candidate, change chan<- types.ValueChanged, stop <-chan struct{}) { sc.s.Diff(p.(structCandidate).s, change, stop) } func (sc structCandidate) get(key types.Value) types.Value { if field, ok := key.(types.String); ok { val, _ := sc.s.MaybeGet(string(field)) return val } panic(fmt.Errorf("Bad key type in diff: %s", types.TypeOf(key).Describe())) } func (sc structCandidate) pathConcat(change types.ValueChanged, path types.Path) (out types.Path) { out = append(out, path...) str, ok := change.Key.(types.String) if !ok { d.Panic("Field names must be strings, not %s", types.TypeOf(change.Key).Describe()) } return append(out, types.NewFieldPath(string(str))) } func (sc structCandidate) getValue() types.Value { return sc.s } ================================================ FILE: go/merge/three_way.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "fmt" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // Policy functors are used to merge two values (a and b) against a common // ancestor. All three Values and their must by wholly readable from vrw. // Whenever a change is merged, implementations should send a struct{} over // progress. type Policy func(a, b, ancestor types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error) // ResolveFunc is the type for custom merge-conflict resolution callbacks. // When the merge algorithm encounters two non-mergeable changes (aChange and // bChange) at the same path, it calls the ResolveFunc passed into ThreeWay(). // The callback gets the types of the two incompatible changes (added, changed // or removed) and the two Values that could not be merged (if any). If the // ResolveFunc cannot devise a resolution, ok should be false upon return and // the other return values are undefined. If the conflict can be resolved, the // function should return the appropriate type of change to apply, the new value // to be used (if any), and true. type ResolveFunc func(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) // None is the no-op ResolveFunc. Any conflict results in a merge failure. func None(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { return change, merged, false } // Ours resolves conflicts by preferring changes from the Value currently being committed. func Ours(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { return aChange, a, true } // Theirs resolves conflicts by preferring changes in the current HEAD. func Theirs(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { return bChange, b, true } // ErrMergeConflict indicates that a merge attempt failed and must be resolved // manually for the provided reason. type ErrMergeConflict struct { msg string } func (e *ErrMergeConflict) Error() string { return e.msg } func newMergeConflict(format string, args ...interface{}) *ErrMergeConflict { return &ErrMergeConflict{fmt.Sprintf(format, args...)} } // NewThreeWay creates a new Policy based on ThreeWay using the provided // ResolveFunc. func NewThreeWay(resolve ResolveFunc) Policy { return func(a, b, parent types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error) { return ThreeWay(a, b, parent, vrw, resolve, progress) } } // ThreeWay attempts a three-way merge between two _candidate_ values that // have both changed with respect to a common _parent_ value. The result of // the algorithm is a _merged_ value or an error if merging could not be done. // // The algorithm works recursively, applying the following rules for each value: // // - If any of the three values have a different [kind](link): conflict // - If the two candidates are identical: the result is that value // - If the values are primitives or Blob: conflict // - If the values are maps: // - if the same key was inserted or updated in both candidates: // - first run this same algorithm on those two values to attempt to merge them // - if the two merged values are still different: conflict // - if a key was inserted in one candidate and removed in the other: conflict // - If the values are structs: // - Same as map, except using field names instead of map keys // - If the values are sets: // - Apply the changes from both candidates to the parent to get the result. No conflicts are possible. // - If the values are list: // - Apply list-merge (see below) // // Merge rules for List are a bit more complex than Map, Struct, and Set due // to a wider away of potential use patterns. A List might be a de-facto Map // with sequential numeric keys, or it might be a sequence of objects where // order matters but the caller is unlikely to go back and update the value at // a given index. List modifications are expressed in terms of 'splices' (see // types/edit_distance.go). Roughly, a splice indicates that some number of // elements were added and/or removed at some index in |parent|. In the // following example: // // parent: [a, b, c, d] // a: [b, c, d] // b: [a, b, c, d, e] // merged: [b, c, d, e] // // The difference from parent -> is described by the splice {0, 1}, indicating // that 1 element was removed from parent at index 0. The difference from // parent -> b is described as {4, 0, e}, indicating that 0 elements were // removed at parent's index 4, and the element 'e' was added. Our merge // algorithm will successfully merge a and b, because these splices do not // overlap; that is, neither one removes the index at which the other // operates. As a general rule, the merge algorithm will refuse to merge // splices that overlap, as in the following examples: // // parent: [a, b, c] // a: [a, d, b, c] // b: [a, c] // merged: conflict // // parent: [a, b, c] // a: [a, e, b, c] // b: [a, d, b, c] // merged: conflict // // The splices in the first example are {1, 0, d} (remove 0 elements at index // 1 and add 'd') and {1, 1} (remove 1 element at index 1). Since the latter // removes the element at which the former adds an element, these splices // overlap. Similarly, in the second example, both splices operate at index 1 // but add different elements. Thus, they also overlap. // // There is one special case for overlapping splices. If they perform the // exact same operation, the algorithm considers them not to be in conflict. // E.g. // // parent: [a, b, c] // a: [a, d, e] // b: [a, d, e] // merged: [a, d, e] func ThreeWay(a, b, parent types.Value, vrw types.ValueReadWriter, resolve ResolveFunc, progress chan struct{}) (merged types.Value, err error) { describe := func(v types.Value) string { if v != nil { return types.TypeOf(v).Describe() } return "nil Value" } if a == nil && b == nil { return parent, nil } else if unmergeable(a, b) { return parent, newMergeConflict("Cannot merge %s with %s.", describe(a), describe(b)) } if resolve == nil { resolve = None } m := &merger{vrw, resolve, progress} return m.threeWay(a, b, parent, types.Path{}) } // a and b cannot be merged if they are of different NomsKind, or if at least one of the two is nil, or if either is a Noms primitive. func unmergeable(a, b types.Value) bool { if a != nil && b != nil { aKind, bKind := a.Kind(), b.Kind() return aKind != bKind || types.IsPrimitiveKind(aKind) || types.IsPrimitiveKind(bKind) } return true } type merger struct { vrw types.ValueReadWriter resolve ResolveFunc progress chan<- struct{} } func updateProgress(progress chan<- struct{}) { // TODO: Eventually we'll want more information than a single bit :). if progress != nil { progress <- struct{}{} } } func (m *merger) threeWay(a, b, parent types.Value, path types.Path) (merged types.Value, err error) { defer updateProgress(m.progress) if a == nil || b == nil { d.Panic("Merge candidates cannont be nil: a = %v, b = %v", a, b) } switch a.Kind() { case types.ListKind: if aList, bList, pList, ok := listAssert(m.vrw, a, b, parent); ok { return threeWayListMerge(aList, bList, pList) } case types.MapKind: if aMap, bMap, pMap, ok := mapAssert(m.vrw, a, b, parent); ok { return m.threeWayMapMerge(aMap, bMap, pMap, path) } case types.RefKind: if aValue, bValue, pValue, ok := refAssert(a, b, parent, m.vrw); ok { merged, err := m.threeWay(aValue, bValue, pValue, path) if err != nil { return parent, err } return m.vrw.WriteValue(merged), nil } case types.SetKind: if aSet, bSet, pSet, ok := setAssert(m.vrw, a, b, parent); ok { return m.threeWaySetMerge(aSet, bSet, pSet, path) } case types.StructKind: if aStruct, bStruct, pStruct, ok := structAssert(a, b, parent); ok { return m.threeWayStructMerge(aStruct, bStruct, pStruct, path) } } pDescription := "" if parent != nil { pDescription = types.TypeOf(parent).Describe() } return parent, newMergeConflict("Cannot merge %s and %s on top of %s.", types.TypeOf(a).Describe(), types.TypeOf(b).Describe(), pDescription) } func (m *merger) threeWayMapMerge(a, b, parent types.Map, path types.Path) (merged types.Value, err error) { apply := func(target candidate, change types.ValueChanged, newVal types.Value) candidate { defer updateProgress(m.progress) switch change.ChangeType { case types.DiffChangeAdded, types.DiffChangeModified: return mapCandidate{target.getValue().(types.Map).Edit().Set(change.Key, newVal).Map()} case types.DiffChangeRemoved: return mapCandidate{target.getValue().(types.Map).Edit().Remove(change.Key).Map()} default: panic("Not Reached") } } return m.threeWayOrderedSequenceMerge(mapCandidate{a}, mapCandidate{b}, mapCandidate{parent}, apply, path) } func (m *merger) threeWaySetMerge(a, b, parent types.Set, path types.Path) (merged types.Value, err error) { apply := func(target candidate, change types.ValueChanged, newVal types.Value) candidate { defer updateProgress(m.progress) switch change.ChangeType { case types.DiffChangeAdded, types.DiffChangeModified: return setCandidate{target.getValue().(types.Set).Edit().Insert(newVal).Set()} case types.DiffChangeRemoved: return setCandidate{target.getValue().(types.Set).Edit().Remove(newVal).Set()} default: panic("Not Reached") } } return m.threeWayOrderedSequenceMerge(setCandidate{a}, setCandidate{b}, setCandidate{parent}, apply, path) } func (m *merger) threeWayStructMerge(a, b, parent types.Struct, path types.Path) (merged types.Value, err error) { apply := func(target candidate, change types.ValueChanged, newVal types.Value) candidate { defer updateProgress(m.progress) // Right now, this always iterates over all fields to create a new Struct, because there's no API for adding/removing a field from an existing struct type. targetVal := target.getValue().(types.Struct) if f, ok := change.Key.(types.String); ok { field := string(f) data := types.StructData{} targetVal.IterFields(func(name string, v types.Value) bool { if name != field { data[name] = v } return false }) if change.ChangeType == types.DiffChangeAdded || change.ChangeType == types.DiffChangeModified { data[field] = newVal } return structCandidate{types.NewStruct(targetVal.Name(), data)} } panic(fmt.Errorf("Bad key type in diff: %s", types.TypeOf(change.Key).Describe())) } return m.threeWayOrderedSequenceMerge(structCandidate{a}, structCandidate{b}, structCandidate{parent}, apply, path) } func listAssert(vrw types.ValueReadWriter, a, b, parent types.Value) (aList, bList, pList types.List, ok bool) { var aOk, bOk, pOk bool aList, aOk = a.(types.List) bList, bOk = b.(types.List) if parent != nil { pList, pOk = parent.(types.List) } else { pList, pOk = types.NewList(vrw), true } return aList, bList, pList, aOk && bOk && pOk } func mapAssert(vrw types.ValueReadWriter, a, b, parent types.Value) (aMap, bMap, pMap types.Map, ok bool) { var aOk, bOk, pOk bool aMap, aOk = a.(types.Map) bMap, bOk = b.(types.Map) if parent != nil { pMap, pOk = parent.(types.Map) } else { pMap, pOk = types.NewMap(vrw), true } return aMap, bMap, pMap, aOk && bOk && pOk } func refAssert(a, b, parent types.Value, vrw types.ValueReadWriter) (aValue, bValue, pValue types.Value, ok bool) { var aOk, bOk, pOk bool var aRef, bRef, pRef types.Ref aRef, aOk = a.(types.Ref) bRef, bOk = b.(types.Ref) if !aOk || !bOk { return } aValue = aRef.TargetValue(vrw) bValue = bRef.TargetValue(vrw) if parent != nil { if pRef, pOk = parent.(types.Ref); pOk { pValue = pRef.TargetValue(vrw) } } else { pOk = true // parent == nil is still OK. It just leaves pValue as nil. } return aValue, bValue, pValue, aOk && bOk && pOk } func setAssert(vrw types.ValueReadWriter, a, b, parent types.Value) (aSet, bSet, pSet types.Set, ok bool) { var aOk, bOk, pOk bool aSet, aOk = a.(types.Set) bSet, bOk = b.(types.Set) if parent != nil { pSet, pOk = parent.(types.Set) } else { pSet, pOk = types.NewSet(vrw), true } return aSet, bSet, pSet, aOk && bOk && pOk } func structAssert(a, b, parent types.Value) (aStruct, bStruct, pStruct types.Struct, ok bool) { var aOk, bOk, pOk bool aStruct, aOk = a.(types.Struct) bStruct, bOk = b.(types.Struct) if aOk && bOk { if aStruct.Name() == bStruct.Name() { if parent != nil { pStruct, pOk = parent.(types.Struct) } else { pStruct, pOk = types.NewStruct(aStruct.Name(), nil), true } return aStruct, bStruct, pStruct, pOk } } return } ================================================ FILE: go/merge/three_way_keyval_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/suite" ) func TestThreeWayMapMerge(t *testing.T) { suite.Run(t, &ThreeWayMapMergeSuite{}) } func TestThreeWayStructMerge(t *testing.T) { suite.Run(t, &ThreeWayStructMergeSuite{}) } type kvs []interface{} func (kv kvs) items() []interface{} { return kv } func (kv kvs) remove(k interface{}) kvs { out := make(kvs, 0, len(kv)) for i := 0; i < len(kv); i += 2 { if kv[i] != k { out = append(out, kv[i], kv[i+1]) } } return out } func (kv kvs) set(k, v interface{}) kvs { out := make(kvs, len(kv)) for i := 0; i < len(kv); i += 2 { out[i], out[i+1] = kv[i], kv[i+1] if kv[i] == k { out[i+1] = v } } return out } var ( aa1 = kvs{"a1", "a-one", "a2", "a-two", "a3", "a-three", "a4", "a-four"} aa1a = kvs{"a1", "a-one", "a2", "a-two", "a3", "a-three-diff", "a4", "a-four", "a6", "a-six"} aa1b = kvs{"a1", "a-one", "a3", "a-three-diff", "a4", "a-four", "a5", "a-five"} aaMerged = kvs{"a1", "a-one", "a3", "a-three-diff", "a4", "a-four", "a5", "a-five", "a6", "a-six"} mm1 = kvs{} mm1a = kvs{"k1", kvs{"a", 0}} mm1b = kvs{"k1", kvs{"b", 1}} mm1Merged = kvs{"k1", kvs{"a", 0, "b", 1}} mm2 = kvs{"k2", aa1, "k3", "k-three"} mm2a = kvs{"k1", kvs{"a", 0}, "k2", aa1a, "k3", "k-three", "k4", "k-four"} mm2b = kvs{"k1", kvs{"b", 1}, "k2", aa1b} mm2Merged = kvs{"k1", kvs{"a", 0, "b", 1}, "k2", aaMerged, "k4", "k-four"} ) type ThreeWayKeyValMergeSuite struct { ThreeWayMergeSuite } type ThreeWayMapMergeSuite struct { ThreeWayKeyValMergeSuite } func (s *ThreeWayMapMergeSuite) SetupSuite() { s.create = func(seq seq) (val types.Value) { if seq != nil { keyValues := valsToTypesValues(s.create, seq.items()...) val = types.NewMap(s.vs, keyValues...) } return } s.typeStr = "Map" } type ThreeWayStructMergeSuite struct { ThreeWayKeyValMergeSuite } func (s *ThreeWayStructMergeSuite) SetupSuite() { s.create = func(seq seq) (val types.Value) { if seq != nil { kv := seq.items() fields := types.StructData{} for i := 0; i < len(kv); i += 2 { fields[kv[i].(string)] = valToTypesValue(s.create, kv[i+1]) } val = types.NewStruct("TestStruct", fields) } return } s.typeStr = "Struct" } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_DoNothing() { s.tryThreeWayMerge(nil, nil, aa1, aa1) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_NoRecursion() { s.tryThreeWayMerge(aa1a, aa1b, aa1, aaMerged) s.tryThreeWayMerge(aa1b, aa1a, aa1, aaMerged) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_RecursiveCreate() { s.tryThreeWayMerge(mm1a, mm1b, mm1, mm1Merged) s.tryThreeWayMerge(mm1b, mm1a, mm1, mm1Merged) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_RecursiveCreateNil() { s.tryThreeWayMerge(mm1a, mm1b, nil, mm1Merged) s.tryThreeWayMerge(mm1b, mm1a, nil, mm1Merged) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_RecursiveMerge() { s.tryThreeWayMerge(mm2a, mm2b, mm2, mm2Merged) s.tryThreeWayMerge(mm2b, mm2a, mm2, mm2Merged) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_RefMerge() { strRef := s.vs.WriteValue(types.NewStruct("Foo", types.StructData{"life": types.Number(42)})) m := kvs{"r2", s.vs.WriteValue(s.create(aa1))} ma := kvs{"r1", strRef, "r2", s.vs.WriteValue(s.create(aa1a))} mb := kvs{"r1", strRef, "r2", s.vs.WriteValue(s.create(aa1b))} mMerged := kvs{"r1", strRef, "r2", s.vs.WriteValue(s.create(aaMerged))} s.tryThreeWayMerge(ma, mb, m, mMerged) s.tryThreeWayMerge(mb, ma, m, mMerged) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_RecursiveMultiLevelMerge() { m := kvs{"mm1", mm1, "mm2", s.vs.WriteValue(s.create(mm2))} ma := kvs{"mm1", mm1a, "mm2", s.vs.WriteValue(s.create(mm2a))} mb := kvs{"mm1", mm1b, "mm2", s.vs.WriteValue(s.create(mm2b))} mMerged := kvs{"mm1", mm1Merged, "mm2", s.vs.WriteValue(s.create(mm2Merged))} s.tryThreeWayMerge(ma, mb, m, mMerged) s.tryThreeWayMerge(mb, ma, m, mMerged) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_CustomMerge() { p := kvs{"k1", "k-one", "k2", "k-two", "mm1", mm1, "s1", "s-one"} a := kvs{"k1", "k-won", "k2", "k-too", "mm1", mm1, "s1", "s-one", "n1", kvs{"a", "1"}} b := kvs{"k2", "k-two", "mm1", "mm-one", "s1", "s-one", "n1", kvs{"a", "2"}} exp := kvs{"k2", "k-too", "mm1", "mm-one", "s1", "s-one", "n1", kvs{"a", "1"}} expectedConflictPaths := [][]string{{"k1"}, {"n1", "a"}} conflictPaths := []types.Path{} resolve := func(aChange, bChange types.DiffChangeType, aVal, bVal types.Value, p types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { conflictPaths = append(conflictPaths, p) if _, ok := aVal.(types.Map); ok || bChange == types.DiffChangeRemoved { return bChange, bVal, true } return aChange, aVal, true } merged, err := ThreeWay(s.create(a), s.create(b), s.create(p), s.vs, resolve, nil) if s.NoError(err) { expected := s.create(exp) s.True(expected.Equals(merged), "%s != %s", types.EncodedValue(expected), types.EncodedValue(merged)) } if s.Len(conflictPaths, len(expectedConflictPaths), "Wrong number of conflicts!") { for i := 0; i < len(conflictPaths); i++ { for j, c := range conflictPaths[i] { s.Contains(c.String(), expectedConflictPaths[i][j]) } } } } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_MergeOurs() { p := kvs{"k1", "k-one"} a := kvs{"k1", "k-won"} b := kvs{"k1", "k-too", "k2", "k-two"} exp := kvs{"k1", "k-won", "k2", "k-two"} merged, err := ThreeWay(s.create(a), s.create(b), s.create(p), s.vs, Ours, nil) if s.NoError(err) { expected := s.create(exp) s.True(expected.Equals(merged), "%s != %s", types.EncodedValue(expected), types.EncodedValue(merged)) } } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_MergeTheirs() { p := kvs{"k1", "k-one"} a := kvs{"k1", "k-won"} b := kvs{"k1", "k-too", "k2", "k-two"} exp := kvs{"k1", "k-too", "k2", "k-two"} merged, err := ThreeWay(s.create(a), s.create(b), s.create(p), s.vs, Theirs, nil) if s.NoError(err) { expected := s.create(exp) s.True(expected.Equals(merged), "%s != %s", types.EncodedValue(expected), types.EncodedValue(merged)) } } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_NilConflict() { s.tryThreeWayConflict(nil, s.create(mm2b), s.create(mm2), "Cannot merge nil Value with") s.tryThreeWayConflict(s.create(mm2a), nil, s.create(mm2), "with nil Value.") } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_ImmediateConflict() { s.tryThreeWayConflict(types.NewSet(s.vs), s.create(mm2b), s.create(mm2), "Cannot merge Set<> with "+s.typeStr) s.tryThreeWayConflict(s.create(mm2b), types.NewSet(s.vs), s.create(mm2), "Cannot merge "+s.typeStr) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_RefConflict() { strRef := s.vs.WriteValue(types.NewStruct("Foo", types.StructData{"life": types.Number(42)})) numRef := s.vs.WriteValue(types.Number(7)) m := kvs{"r2", strRef} ma := kvs{"r1", strRef, "r2", strRef} mb := kvs{"r1", numRef, "r2", strRef} s.tryThreeWayConflict(s.create(ma), s.create(mb), s.create(m), "Cannot merge Struct Foo") s.tryThreeWayConflict(s.create(mb), s.create(ma), s.create(m), "Cannot merge Number and Struct Foo") } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_NestedConflict() { a := mm2a.set("k2", types.NewSet(s.vs)) s.tryThreeWayConflict(s.create(a), s.create(mm2b), s.create(mm2), types.EncodedValue(types.NewSet(s.vs))) s.tryThreeWayConflict(s.create(a), s.create(mm2b), s.create(mm2), types.EncodedValue(s.create(aa1b))) } func (s *ThreeWayKeyValMergeSuite) TestThreeWayMerge_NestedConflictingOperation() { a := mm2a.remove("k2") s.tryThreeWayConflict(s.create(a), s.create(mm2b), s.create(mm2), `removed "k2"`) s.tryThreeWayConflict(s.create(a), s.create(mm2b), s.create(mm2), `modded "k2"`) } ================================================ FILE: go/merge/three_way_list.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "fmt" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) func threeWayListMerge(a, b, parent types.List) (merged types.List, err error) { aSpliceChan, bSpliceChan := make(chan types.Splice), make(chan types.Splice) aStopChan, bStopChan := make(chan struct{}, 1), make(chan struct{}, 1) go func() { a.Diff(parent, aSpliceChan, aStopChan) close(aSpliceChan) }() go func() { b.Diff(parent, bSpliceChan, bStopChan) close(bSpliceChan) }() stopAndDrain := func(stop chan<- struct{}, drain <-chan types.Splice) { close(stop) for range drain { } } defer stopAndDrain(aStopChan, aSpliceChan) defer stopAndDrain(bStopChan, bSpliceChan) // The algorithm below relies on determining whether one splice "comes before" another, and whether the splices coming from the two diffs remove/add precisely the same elements. Unfortunately, the Golang zero-value for types.Splice (which is what gets read out of a/bSpliceChan when they're closed) is actaually a valid splice, albeit a meaningless one that indicates a no-op. It "comes before" any other splice, so having it in play really gums up the logic below. Rather than specifically checking for it all over the place, swap the zero-splice out for one full of SPLICE_UNASSIGNED, which is really the proper invalid splice value. That splice doesn't come before ANY valid splice, so the logic below can flow more clearly. zeroSplice := types.Splice{} zeroToInvalid := func(sp types.Splice) types.Splice { if sp == zeroSplice { return types.Splice{ SpAt: types.SPLICE_UNASSIGNED, SpRemoved: types.SPLICE_UNASSIGNED, SpAdded: types.SPLICE_UNASSIGNED, SpFrom: types.SPLICE_UNASSIGNED, } } return sp } invalidSplice := zeroToInvalid(types.Splice{}) merged = parent offset := uint64(0) aSplice, bSplice := invalidSplice, invalidSplice for { // Get the next splice from both a and b. If either diff(a, parent) or diff(b, parent) is complete, aSplice or bSplice will get an invalid types.Splice. Generally, though, this allows us to proceed through both diffs in (index) order, considering the "current" splice from both diffs at the same time. if aSplice == invalidSplice { aSplice = zeroToInvalid(<-aSpliceChan) } if bSplice == invalidSplice { bSplice = zeroToInvalid(<-bSpliceChan) } // Both channels are producing zero values, so we're done. if aSplice == invalidSplice && bSplice == invalidSplice { break } if overlap(aSplice, bSplice) { if canMerge(a, b, aSplice, bSplice) { splice := merge(aSplice, bSplice) merged = apply(a, merged, offset, splice) offset += splice.SpAdded - splice.SpRemoved aSplice, bSplice = invalidSplice, invalidSplice continue } return parent, newMergeConflict("Overlapping splices: %s vs %s", describeSplice(aSplice), describeSplice(bSplice)) } if aSplice.SpAt < bSplice.SpAt { merged = apply(a, merged, offset, aSplice) offset += aSplice.SpAdded - aSplice.SpRemoved aSplice = invalidSplice continue } merged = apply(b, merged, offset, bSplice) offset += bSplice.SpAdded - bSplice.SpRemoved bSplice = invalidSplice } return merged, nil } func overlap(s1, s2 types.Splice) bool { earlier, later := s1, s2 if s2.SpAt < s1.SpAt { earlier, later = s2, s1 } return s1.SpAt == s2.SpAt || earlier.SpAt+earlier.SpRemoved > later.SpAt } // canMerge returns whether aSplice and bSplice can be merged into a single splice that can be applied to parent. Currently, we're only willing to do this if the two splices do _precisely_ the same thing -- that is, remove the same number of elements from the same starting index and insert the exact same list of new elements. func canMerge(a, b types.List, aSplice, bSplice types.Splice) bool { if aSplice != bSplice { return false } aIter, bIter := a.IteratorAt(aSplice.SpFrom), b.IteratorAt(bSplice.SpFrom) for count := uint64(0); count < aSplice.SpAdded; count++ { aVal, bVal := aIter.Next(), bIter.Next() if aVal == nil || bVal == nil || !aVal.Equals(bVal) { return false } } return true } // Since merge() is only called when canMerge() is true, we know s1 and s2 are exactly equal. func merge(s1, s2 types.Splice) types.Splice { return s1 } func apply(source, target types.List, offset uint64, s types.Splice) types.List { toAdd := make([]types.Valuable, s.SpAdded) iter := source.IteratorAt(s.SpFrom) for i := 0; uint64(i) < s.SpAdded; i++ { v := iter.Next() if v == nil { d.Panic("List diff returned a splice that inserts a nonexistent element.") } toAdd[i] = v } return target.Edit().Splice(s.SpAt+offset, s.SpRemoved, toAdd...).List() } func describeSplice(s types.Splice) string { return fmt.Sprintf("%d elements removed at %d; adding %d elements", s.SpRemoved, s.SpAt, s.SpAdded) } ================================================ FILE: go/merge/three_way_list_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/suite" ) func TestThreeWayListMerge(t *testing.T) { suite.Run(t, &ThreeWayListMergeSuite{}) } type ThreeWayListMergeSuite struct { ThreeWayMergeSuite } func (s *ThreeWayListMergeSuite) SetupSuite() { s.create = func(i seq) (val types.Value) { if i != nil { items := valsToTypesValues(s.create, i.items()...) val = types.NewList(s.vs, items...) } return } s.typeStr = "List" } var p = items{"a", "b", "c", "d", "e"} func (s *ThreeWayListMergeSuite) TestThreeWayMerge_DoNothing() { s.tryThreeWayMerge(nil, nil, p, p) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_NoLengthChange() { a := items{"a", 1, "c", "d", "e"} b := items{"a", "b", "c", 2, "e"} m := items{"a", 1, "c", 2, "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_HandleEmpty() { s.tryThreeWayMerge(p, items{}, items{}, p) s.tryThreeWayMerge(items{}, p, items{}, p) s.tryThreeWayMerge(p, p, items{}, p) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_HandleNil() { s.tryThreeWayMerge(p, items{}, nil, p) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_MakeLonger() { a := items{"a", 1, 2, "c", "d", "e"} b := items{"a", "b", "c", 3, "e"} m := items{"a", 1, 2, "c", 3, "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_MakeShorter() { a := items{"a", "c", "d", "e"} b := items{"a", "b", "c", 3, "e"} m := items{"a", "c", 3, "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_BothSidesRemove() { a := items{"a", "c", "d", "e"} b := items{"a", "b", "c", "e"} m := items{"a", "c", "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_OverlapSameRemoveNoInsert() { a := items{"a", "d", "e"} b := items{"a", "d", "e"} m := items{"a", "d", "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_OverlapSameRemoveSameInsert() { a := items{"a", 1, 2, 3, "d", "e"} b := items{"a", 1, 2, 3, "d", "e"} m := items{"a", 1, 2, 3, "d", "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_RemoveUpToOtherSideInsertionPoint() { a := items{"a", 1, 2, "c", "d", "e"} b := items{"a", "b", 3, "c", "d", "e"} m := items{"a", 1, 2, 3, "c", "d", "e"} s.tryThreeWayMerge(a, b, p, m) s.tryThreeWayMerge(b, a, p, m) } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_ConflictingAppends() { a := append(p, 1) b := append(p, 2) s.tryThreeWayConflict(s.create(a), s.create(b), s.create(p), "Overlapping splices: 0 elements removed at 5; adding 1 elements") s.tryThreeWayConflict(s.create(b), s.create(a), s.create(p), "Overlapping splices: 0 elements removed at 5; adding 1 elements") } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_OverlappingRemoves() { a := p[:4] b := p[:3] s.tryThreeWayConflict(s.create(a), s.create(b), s.create(p), "Overlapping splices: 1 elements removed at 4") s.tryThreeWayConflict(s.create(b), s.create(a), s.create(p), "Overlapping splices: 2 elements removed at 3") } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_SameRemoveAddPrefix() { a := items{"a", "b", "c", 1} b := items{"a", "b", "c", 1, 2} s.tryThreeWayConflict(s.create(a), s.create(b), s.create(p), "Overlapping splices: 2 elements removed at 3; adding 1 elements") s.tryThreeWayConflict(s.create(b), s.create(a), s.create(p), "Overlapping splices: 2 elements removed at 3; adding 2 elements") } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_RemoveSupersetAddPrefix() { a := items{"a", "b", "c", 1, 2} b := items{"a", "b", "c", "d", 1} s.tryThreeWayConflict(s.create(a), s.create(b), s.create(p), "Overlapping splices: 2 elements removed at 3; adding 2 elements") s.tryThreeWayConflict(s.create(b), s.create(a), s.create(p), "Overlapping splices: 1 elements removed at 4; adding 1 elements") } func (s *ThreeWayListMergeSuite) TestThreeWayMerge_RemoveOtherSideInsertionPoint() { a := items{"a", "c", "d", "e"} b := items{"a", 1, "b", "c", "d", "e"} s.tryThreeWayConflict(s.create(a), s.create(b), s.create(p), "Overlapping splices: 1 elements removed at 1; adding 0 elements") s.tryThreeWayConflict(s.create(b), s.create(a), s.create(p), "Overlapping splices: 0 elements removed at 1; adding 1 elements") } ================================================ FILE: go/merge/three_way_ordered_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "fmt" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) type applyFunc func(candidate, types.ValueChanged, types.Value) candidate func (m *merger) threeWayOrderedSequenceMerge(a, b, parent candidate, apply applyFunc, path types.Path) (types.Value, error) { aChangeChan, bChangeChan := make(chan types.ValueChanged), make(chan types.ValueChanged) aStopChan, bStopChan := make(chan struct{}, 1), make(chan struct{}, 1) go func() { a.diff(parent, aChangeChan, aStopChan) close(aChangeChan) }() go func() { b.diff(parent, bChangeChan, bStopChan) close(bChangeChan) }() defer stopAndDrain(aStopChan, aChangeChan) defer stopAndDrain(bStopChan, bChangeChan) merged := parent aChange, bChange := types.ValueChanged{}, types.ValueChanged{} for { // Get the next change from both a and b. If either diff(a, parent) or diff(b, parent) is complete, aChange or bChange will get an empty types.ValueChanged containing a nil Value. Generally, though, this allows us to proceed through both diffs in (key) order, considering the "current" change from both diffs at the same time. if aChange.Key == nil { aChange = <-aChangeChan } if bChange.Key == nil { bChange = <-bChangeChan } // Both channels are producing zero values, so we're done. if aChange.Key == nil && bChange.Key == nil { break } // Since diff generates changes in key-order, and we never skip over a change without processing it, we can simply compare the keys at which aChange and bChange occurred to determine if either is safe to apply to the merge result without further processing. This is because if, e.g. aChange.V.Less(bChange.V), we know that the diff of b will never generate a change at that key. If it was going to, it would have done so on an earlier iteration of this loop and been processed at that time. // It's also obviously OK to apply a change if only one diff is generating any changes, e.g. aChange.V is non-nil and bChange.V is nil. if aChange.Key != nil && (bChange.Key == nil || aChange.Key.Less(bChange.Key)) { merged = apply(merged, aChange, a.get(aChange.Key)) aChange = types.ValueChanged{} continue } else if bChange.Key != nil && (aChange.Key == nil || bChange.Key.Less(aChange.Key)) { merged = apply(merged, bChange, b.get(bChange.Key)) bChange = types.ValueChanged{} continue } if !aChange.Key.Equals(bChange.Key) { d.Panic("Diffs have skewed!") // Sanity check. } change, mergedVal, err := m.mergeChanges(aChange, bChange, a, b, parent, apply, path) if err != nil { return parent.getValue(), err } merged = apply(merged, change, mergedVal) aChange, bChange = types.ValueChanged{}, types.ValueChanged{} } return merged.getValue(), nil } func (m *merger) mergeChanges(aChange, bChange types.ValueChanged, a, b, p candidate, apply applyFunc, path types.Path) (change types.ValueChanged, mergedVal types.Value, err error) { path = a.pathConcat(aChange, path) aValue, bValue := a.get(aChange.Key), b.get(bChange.Key) // If the two diffs generate different kinds of changes at the same key, conflict. if aChange.ChangeType != bChange.ChangeType { if change, mergedVal, ok := m.resolve(aChange.ChangeType, bChange.ChangeType, aValue, bValue, path); ok { // TODO: Correctly encode Old/NewValue with this change report. https://github.com/attic-labs/noms/issues/3467 return types.ValueChanged{ChangeType: change, Key: aChange.Key, OldValue: nil, NewValue: nil}, mergedVal, nil } return change, nil, newMergeConflict("Conflict:\n%s\nvs\n%s\n", describeChange(aChange), describeChange(bChange)) } if aChange.ChangeType == types.DiffChangeRemoved || aValue.Equals(bValue) { // If both diffs generated a remove, or if the new value is the same in both, merge is fine. return aChange, aValue, nil } // There's one case that might still be OK even if aValue and bValue differ: different, but mergeable, compound values of the same type being added/modified at the same key, e.g. a Map being added to both a and b. If either is a primitive, or Values of different Kinds were added, though, we're in conflict. if !unmergeable(aValue, bValue) { // TODO: Add concurrency. var err error if mergedVal, err = m.threeWay(aValue, bValue, p.get(aChange.Key), path); err == nil { return aChange, mergedVal, nil } return change, nil, err } if change, mergedVal, ok := m.resolve(aChange.ChangeType, bChange.ChangeType, aValue, bValue, path); ok { // TODO: Correctly encode Old/NewValue with this change report. https://github.com/attic-labs/noms/issues/3467 return types.ValueChanged{ChangeType: change, Key: aChange.Key, OldValue: nil, NewValue: nil}, mergedVal, nil } return change, nil, newMergeConflict("Conflict:\n%s = %s\nvs\n%s = %s", describeChange(aChange), types.EncodedValue(aValue), describeChange(bChange), types.EncodedValue(bValue)) } func stopAndDrain(stop chan<- struct{}, drain <-chan types.ValueChanged) { close(stop) for range drain { } } func describeChange(change types.ValueChanged) string { op := "" switch change.ChangeType { case types.DiffChangeAdded: op = "added" case types.DiffChangeModified: op = "modded" case types.DiffChangeRemoved: op = "removed" } return fmt.Sprintf("%s %s", op, types.EncodedValue(change.Key)) } ================================================ FILE: go/merge/three_way_set_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/suite" ) func TestThreeWaySetMerge(t *testing.T) { suite.Run(t, &ThreeWaySetMergeSuite{}) } type items []interface{} func (kv items) items() []interface{} { return kv } type ThreeWaySetMergeSuite struct { ThreeWayMergeSuite } func (s *ThreeWaySetMergeSuite) SetupSuite() { s.create = func(i seq) (val types.Value) { if i != nil { keyValues := valsToTypesValues(s.create, i.items()...) val = types.NewSet(s.vs, keyValues...) } return } s.typeStr = "Set" } var ( flat = items{"a1", "a2", "a3", "a4"} flatA = items{"a1", "a2", "a5", "a6"} flatB = items{"a1", "a4", "a7", "a5"} flatM = items{"a1", "a5", "a6", "a7"} ss1 = items{} ss1a = items{"k1", flatA, items{"a", 0}} ss1b = items{"k1", items{"a", 0}, flatB} ss1Merged = items{"k1", items{"a", 0}, flatA, flatB} ) func (s *ThreeWaySetMergeSuite) TestThreeWayMerge_DoNothing() { s.tryThreeWayMerge(nil, nil, flat, flat) } func (s *ThreeWaySetMergeSuite) TestThreeWayMerge_Primitives() { s.tryThreeWayMerge(flatA, flatB, flat, flatM) s.tryThreeWayMerge(flatB, flatA, flat, flatM) } func (s *ThreeWaySetMergeSuite) TestThreeWayMerge_HandleEmpty() { s.tryThreeWayMerge(ss1a, ss1b, ss1, ss1Merged) s.tryThreeWayMerge(ss1b, ss1a, ss1, ss1Merged) } func (s *ThreeWaySetMergeSuite) TestThreeWayMerge_HandleNil() { s.tryThreeWayMerge(ss1a, ss1b, nil, ss1Merged) s.tryThreeWayMerge(ss1b, ss1a, nil, ss1Merged) } func (s *ThreeWaySetMergeSuite) TestThreeWayMerge_Refs() { strRef := s.vs.WriteValue(types.NewStruct("Foo", types.StructData{"life": types.Number(42)})) m := items{s.vs.WriteValue(s.create(flatA)), s.vs.WriteValue(s.create(flatB))} ma := items{"r1", s.vs.WriteValue(s.create(flatA))} mb := items{"r1", strRef, s.vs.WriteValue(s.create(flatA))} mMerged := items{"r1", strRef, s.vs.WriteValue(s.create(flatA))} s.tryThreeWayMerge(ma, mb, m, mMerged) s.tryThreeWayMerge(mb, ma, m, mMerged) } func (s *ThreeWaySetMergeSuite) TestThreeWayMerge_ImmediateConflict() { s.tryThreeWayConflict(types.NewMap(s.vs), s.create(ss1b), s.create(ss1), "Cannot merge Map<> with "+s.typeStr) s.tryThreeWayConflict(s.create(ss1b), types.NewMap(s.vs), s.create(ss1), "Cannot merge "+s.typeStr) } ================================================ FILE: go/merge/three_way_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package merge import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) type seq interface { items() []interface{} } type ThreeWayMergeSuite struct { suite.Suite vs *types.ValueStore create func(seq) types.Value typeStr string } func (s *ThreeWayMergeSuite) SetupTest() { storage := &chunks.MemoryStorage{} s.vs = types.NewValueStore(storage.NewView()) } func (s *ThreeWayMergeSuite) TearDownTest() { s.vs.Close() } func (s *ThreeWayMergeSuite) tryThreeWayMerge(a, b, p, exp seq) { merged, err := ThreeWay(s.create(a), s.create(b), s.create(p), s.vs, nil, nil) if s.NoError(err) { expected := s.create(exp) s.True(expected.Equals(merged), "%s != %s", types.EncodedValue(expected), types.EncodedValue(merged)) } } func (s *ThreeWayMergeSuite) tryThreeWayConflict(a, b, p types.Value, contained string) { m, err := ThreeWay(a, b, p, s.vs, nil, nil) if s.Error(err) { s.Contains(err.Error(), contained) return } s.Fail("Expected error!", "Got successful merge: %s", types.EncodedValue(m)) } func valsToTypesValues(f func(seq) types.Value, items ...interface{}) []types.Value { keyValues := []types.Value{} for _, e := range items { v := valToTypesValue(f, e) keyValues = append(keyValues, v) } return keyValues } func valToTypesValue(f func(seq) types.Value, v interface{}) types.Value { var v1 types.Value switch t := v.(type) { case string: v1 = types.String(t) case int: v1 = types.Number(t) case seq: v1 = f(t) case types.Value: v1 = t } return v1 } func TestThreeWayMerge_PrimitiveConflict(t *testing.T) { threeWayConflict := func(a, b, p types.Value, contained string) { mrgr := &merger{} m, err := mrgr.threeWay(a, b, p, nil) if assert.Error(t, err) { assert.Contains(t, err.Error(), contained) return } assert.Fail(t, "Expected error!", "Got successful merge: %s", types.EncodedValue(m)) } a, b, p := types.Number(7), types.String("nope"), types.String("parent") threeWayConflict(a, b, p, "Number and String on top of") threeWayConflict(b, a, p, "String and Number on top of") } ================================================ FILE: go/metrics/histogram.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package metrics import ( "fmt" "strings" "time" "github.com/attic-labs/noms/go/d" humanize "github.com/dustin/go-humanize" ) // Histogram is a shameless and low-rent knock of the chromium project's // histogram: // https://chromium.googlesource.com/chromium/src/base/+/master/metrics/histogram.h // // It logically stores a running histogram of uint64 values and shares some // important features of its inspiration: // * It acccepts a correctness deficit in return for not needing to lock. // IOW, concurrent calls to Sample may clobber each other. // * It trades compactness and ease of arithmatic across histograms for // precision. Samples lose precision up to the range of the values which // are stored in a bucket // // Only implemented: Log2-based histogram type Histogram struct { sum uint64 buckets [bucketCount]uint64 ToString ToStringFunc } type ToStringFunc func(v uint64) string func identToString(v uint64) string { return fmt.Sprintf("%d", v) } const bucketCount = 64 // Sample adds a uint64 data point to the histogram func (h *Histogram) Sample(v uint64) { d.PanicIfTrue(v == 0) h.sum += v pot := 0 for v > 0 { v = v >> 1 pot++ } h.buckets[pot-1]++ } // SampleTimeSince is a convenience wrapper around Sample which takes the // duration since |t|, if 0, rounds to 1 and passes to Sample() as an uint64 // number of nanoseconds. func (h *Histogram) SampleTimeSince(t time.Time) { d := time.Since(t) if d == 0 { d = 1 } h.Sample(uint64(d)) } // SampleLen is a convenience wrapper around Sample which internally type // asserts the int to a uint64 func (h *Histogram) SampleLen(l int) { h.Sample(uint64(l)) } func (h Histogram) bucketVal(bucket int) uint64 { return 1 << (uint64(bucket)) } // Sum return the sum of sampled values, note that Sum can be overflowed without // overflowing the histogram buckets. func (h Histogram) Sum() uint64 { return h.sum } // Add returns a new Histogram which is the result of adding this and other // bucket-wise. func (h *Histogram) Add(other Histogram) { h.sum += other.sum for i := 0; i < bucketCount; i++ { h.buckets[i] += other.buckets[i] } } // Delta returns a new Histogram which is the result of subtracting other from // this bucket-wise. The intent is to capture changes in the state of histogram // which is collecting samples over some time period. It will panic if any // bucket from other is larger than the corresponding bucket in this. func (h Histogram) Delta(other Histogram) Histogram { nh := Histogram{} nh.sum = h.sum - other.sum for i := 0; i < bucketCount; i++ { c := h.buckets[i] l := other.buckets[i] d.PanicIfTrue(l > c) nh.buckets[i] = c - l } return nh } // Mean returns 0 if there are no samples, and h.Sum()/h.Samples otherwise. func (h Histogram) Mean() uint64 { samples := h.Samples() if samples == 0 { return 0 } return h.Sum() / samples } // Samples returns the number of samples contained in the histogram func (h Histogram) Samples() uint64 { s := uint64(0) for i := 0; i < bucketCount; i++ { s += h.buckets[i] } return s } func (h Histogram) String() string { f := h.ToString if f == nil { f = identToString } return fmt.Sprintf("Mean: %s, Sum: %s, Samples: %d", f(h.Mean()), f(h.Sum()), h.Samples()) } func NewTimeHistogram() Histogram { return Histogram{ToString: timeToString} } func timeToString(v uint64) string { return time.Duration(v).String() } // NewByteHistogram stringifies values using humanize over byte values func NewByteHistogram() Histogram { return Histogram{ToString: humanize.Bytes} } const colWidth = 100 // Report returns an ASCII graph of the non-zero range of normalized buckets. // IOW, it returns a basic graph of the histogram func (h Histogram) Report() string { ts := h.ToString if ts == nil { ts = identToString } maxSamples := uint64(0) foundFirstNonEmpty := false firstNonEmpty := 0 lastNonEmpty := 0 for i := 0; i < bucketCount; i++ { samples := h.buckets[i] if samples > 0 { lastNonEmpty = i if !foundFirstNonEmpty { foundFirstNonEmpty = true firstNonEmpty = i } } if samples > maxSamples { maxSamples = samples } } if maxSamples == 0 { return "" } val := uint64(1) p := func(bucket int) string { samples := h.buckets[bucket] val := h.bucketVal(bucket) adj := samples * colWidth / maxSamples return fmt.Sprintf("%s> %s: (%d)", strings.Repeat("-", int(adj)), ts(val), samples) } lines := make([]string, 0) for i := 0; i < bucketCount; i++ { if i >= firstNonEmpty && i <= lastNonEmpty { lines = append(lines, p(i)) } val = val << 1 } return strings.Join(lines, "\n") } ================================================ FILE: go/metrics/histogram_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package metrics import ( "testing" "github.com/stretchr/testify/assert" ) func TestHistogramBucketValue(t *testing.T) { assert := assert.New(t) h := Histogram{} assert.Equal(uint64(1<<0), h.bucketVal(0)) assert.Equal(uint64(1<<1), h.bucketVal(1)) assert.Equal(uint64(1<<2), h.bucketVal(2)) assert.Equal(uint64(1<<32), h.bucketVal(32)) assert.Equal(uint64(1<<40), h.bucketVal(40)) } func TestHistogramBasic(t *testing.T) { assert := assert.New(t) h := Histogram{} h.Sample(1) h.Sample(1) assert.Equal(uint64(2), h.buckets[0]) h.Sample(2) h.Sample(3) assert.Equal(uint64(2), h.buckets[1]) h.Sample(4) h.Sample(5) h.Sample(6) assert.Equal(uint64(3), h.buckets[2]) h.Sample(256) h.Sample(300) h.Sample(500) h.Sample(511) assert.Equal(uint64(4), h.buckets[8]) assert.Equal(uint64(11), h.Samples()) assert.Equal(uint64(1589), h.Sum()) assert.Equal(uint64(144), h.Mean()) } func TestHistogramLarge(t *testing.T) { assert := assert.New(t) h := Histogram{} h.Sample(0xfffffffffffffe30) assert.Equal(uint64(1), h.Samples()) assert.Equal(uint64(0xfffffffffffffe30), h.Sum()) } func TestHistogramAdd(t *testing.T) { assert := assert.New(t) h := Histogram{} h.Sample(1) h.Sample(2) h.Sample(10) h2 := Histogram{} h2.Sample(3) h2.Sample(1073741854) h.Add(h2) assert.Equal(uint64(5), h.Samples()) assert.Equal(uint64(1073741870), h.Sum()) assert.Equal(uint64(1073741870)/uint64(5), h.Mean()) } func TestHistogramString(t *testing.T) { assert := assert.New(t) h := Histogram{} h.Sample(1) h.Sample(2) h.Sample(10) h.Sample(3034030343) assert.Equal("Mean: 758507589, Sum: 3034030356, Samples: 4", h.String()) th := NewTimeHistogram() th.Add(h) assert.Equal("Mean: 758.507589ms, Sum: 3.034030356s, Samples: 4", th.String()) bh := NewByteHistogram() bh.Add(h) assert.Equal("Mean: 758 MB, Sum: 3.0 GB, Samples: 4", bh.String()) } func TestHistogramReport(t *testing.T) { assert := assert.New(t) h := Histogram{} h.Sample(1) assert.Equal("----------------------------------------------------------------------------------------------------> 1: (1)", h.Report()) h.Sample(1 << 62) assert.Equal(`----------------------------------------------------------------------------------------------------> 1: (1) > 2: (0) > 4: (0) > 8: (0) > 16: (0) > 32: (0) > 64: (0) > 128: (0) > 256: (0) > 512: (0) > 1024: (0) > 2048: (0) > 4096: (0) > 8192: (0) > 16384: (0) > 32768: (0) > 65536: (0) > 131072: (0) > 262144: (0) > 524288: (0) > 1048576: (0) > 2097152: (0) > 4194304: (0) > 8388608: (0) > 16777216: (0) > 33554432: (0) > 67108864: (0) > 134217728: (0) > 268435456: (0) > 536870912: (0) > 1073741824: (0) > 2147483648: (0) > 4294967296: (0) > 8589934592: (0) > 17179869184: (0) > 34359738368: (0) > 68719476736: (0) > 137438953472: (0) > 274877906944: (0) > 549755813888: (0) > 1099511627776: (0) > 2199023255552: (0) > 4398046511104: (0) > 8796093022208: (0) > 17592186044416: (0) > 35184372088832: (0) > 70368744177664: (0) > 140737488355328: (0) > 281474976710656: (0) > 562949953421312: (0) > 1125899906842624: (0) > 2251799813685248: (0) > 4503599627370496: (0) > 9007199254740992: (0) > 18014398509481984: (0) > 36028797018963968: (0) > 72057594037927936: (0) > 144115188075855872: (0) > 288230376151711744: (0) > 576460752303423488: (0) > 1152921504606846976: (0) > 2305843009213693952: (0) ----------------------------------------------------------------------------------------------------> 4611686018427387904: (1)`, h.Report()) h = Histogram{} h.Sample(4) h.Sample(8) assert.Equal(`----------------------------------------------------------------------------------------------------> 4: (1) ----------------------------------------------------------------------------------------------------> 8: (1)`, h.Report()) } ================================================ FILE: go/nbs/NBS-on-AWS.md ================================================ # Backing a Noms Block Store with AWS How to use S3 and DynamoDB as the persistent storage layer for a Noms Block Store (NBS). ## Overview When running atop AWS, NBS stores immutable chunk data in S3 objects and mutable state -- a 'manifest' indicating which S3 objects are live, essentially -- in DynamoDB. It is possible to have many separate Noms Block Stores backed by a single bucket/table as long as you give each a distinct name. You could also choose to spin up a separate bucket/table pair for each NBS, though this is not required -- and, indeed, probably overkill. ## AWS Setup This assumes a setup in a single AWS region. ### Create an S3 bucket and DynamoDB table There are no special requirements on the S3 bucket you create. Just choose a name and, once it's created, remember the ARN for use later. The DynamoDB table you create, on the other hand, does need to have a particular structure. It must have a *primary partition key* that is a *string* with the name *db*. Again, remember its ARN for later use. ### Access control The NBS code honors AWS credentials files, so when running on your development machine the easiest thing to do is drop the creds of the user that created the bucket and table above into `~/.aws/credentials` and run that way. This isn't a great approach for running in on an EC2 instance in production, however. The right way to do that is to create an IAM Role, and run your instance as that role. Create such a role using the IAM Management Console (or command line tool of your choice) and make sure it has a policy with at least the following permissions: ```json { "Version": "2012-10-17", "Statement": [ { "Sid": "Stmt1453230562000", "Effect": "Allow", "Action": [ "dynamodb:BatchGetItem", "dynamodb:BatchWriteItem", "dynamodb:DeleteItem", "dynamodb:GetItem", "dynamodb:PutItem", ], "Resource": [ "[ARN for your DynamoDB table]", ] }, { "Sid": "Stmt1454457944000", "Effect": "Allow", "Action": [ "s3:AbortMultipartUpload" "s3:CompleteMultipartUpload", "s3:CreateMultipartUpload", "s3:GetObject", "s3:PutObject", "s3:UploadPart", "s3:UploadPartCopy", ], "Resource": [ "[ARN for your S3 bucket]", ] } ] } ``` This is where the ARN for your bucket and table come in. ## Instantiating an NBS-on-AWS ChunkStore ### On the command line ```shell noms ds aws:dynamo-table/s3-bucket/store-name ``` ### NewAWSStore If your code only needs to create a store pointing to a single named stores, you can write code similar to the following: ```go sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) store := nbs.NewAWSStore("dynamo-table", "store-name", "s3-bucket", s3.New(sess), dynamodb.New(sess), 1<<28)) ``` ### NewAWSStoreFactory If you find yourself wanting to create NBS instances pointing to multiple, different named stores, you can use `nbs.NewAWSStoreFactory()`, which also supports caching Noms data on disk in some cases: ```go sess := session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) fact := nbs.NewAWSStoreFactory( sess, "dynamo-table", "s3-bucket", 128 /* Maximum number of open files in cache */, 1 << 28 /* Amount of index data to cache in memory */, 1 << 30 /* Amount of Noms data to cache on disk */, "/path/to/cache" /* Directory in which to cache Noms data */, ) store := fact.CreateStore("store-name") ``` ================================================ FILE: go/nbs/README.md ================================================ # Noms Block Store A horizontally-scalable storage backend for Noms. ## Overview NBS is a storage layer optimized for the needs of the [Noms](https://github.com/attic-labs/noms) database. NBS can run in two configurations: either backed by local disk, or [backed by Amazon AWS](https://github.com/attic-labs/noms/blob/master/go/nbs/NBS-on-AWS.md). When backed by local disk, NBS is significantly faster than LevelDB for our workloads and supports full multiprocess concurrency. When backed by AWS, NBS stores its data mainly in S3, along with a single DynamoDB item. This configuration makes Noms "[effectively CA](https://research.google.com/pubs/pub45855.html)", in the sense that Noms is always consistent, and Noms+NBS is as available as DynamoDB and S3 are. This configuration also gives Noms the cost profile of S3 with power closer to that of a traditional database. ## Details * NBS provides storage for a content-addressed DAG of nodes (with exactly one root), where each node is encoded as a sequence of bytes and addressed by a 20-byte hash of the byte-sequence. * There is no `update` or `delete` -- only `insert`, `update root` and `garbage collect`. * Insertion of any novel byte-sequence is durable only upon updating the root. * File-level multiprocess concurrency is supported, with optimistic locking for multiple writers. * Writers need not worry about re-writing duplicate chunks. NBS will efficiently detect and drop (most) duplicates. ## Perf For the file back-end, perf is substantially better than LevelDB mainly because LDB spends substantial IO with the goal of keeping KV pairs in key-order which doesn't benenfit Noms at all. NBS locates related chunks together and thus reading data from a NBS store can be done quite alot faster. As an example, storing & retrieving a 1.1GB MP4 video file on a MBP i5 2.9Ghz: * LDB * Initial import: 44 MB/s, size on disk: 1.1 GB. * Import exact same bytes: 35 MB/s, size on disk: 1.4 GB. * Export: 60 MB/s * NBS * Initial import: 72 MB/s, size on disk: 1.1 GB. * Import exact same bytes: 92 MB/s, size on disk: 1.1GB. * Export: 300 MB/s ## Status NBS is more-or-less "beta". There's still [work we want to do](https://github.com/attic-labs/noms/issues?q=is%3Aopen+is%3Aissue+label%3ANBS), but it now works better than LevelDB for our purposes and so we have made it the default local backend for Noms: ```shell # This uses nbs locally: ./csv-import foo.csv /Users/bob/csv-store::data ``` The AWS backend is available via the `aws:` scheme: ```shell ./csv-import foo.csv aws:table/bucket/database::data ``` ================================================ FILE: go/nbs/aws_chunk_source.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "sync" "time" "github.com/attic-labs/noms/go/d" ) func newAWSChunkSource(ddb *ddbTableStore, s3 *s3ObjectReader, al awsLimits, name addr, chunkCount uint32, indexCache *indexCache, stats *Stats) chunkSource { if indexCache != nil { indexCache.lockEntry(name) defer indexCache.unlockEntry(name) if index, found := indexCache.get(name); found { tra := &awsTableReaderAt{al: al, ddb: ddb, s3: s3, name: name, chunkCount: chunkCount} return &awsChunkSource{newTableReader(index, tra, s3BlockSize), name} } } t1 := time.Now() indexBytes, tra := func() ([]byte, tableReaderAt) { if al.tableMayBeInDynamo(chunkCount) { data, err := ddb.ReadTable(name, stats) if data != nil { return data, &dynamoTableReaderAt{ddb: ddb, h: name} } d.PanicIfTrue(err == nil) // There MUST be either data or an error d.PanicIfNotType(err, tableNotInDynamoErr{}) } size := indexSize(chunkCount) + footerSize buff := make([]byte, size) n, err := s3.ReadFromEnd(name, buff, stats) d.PanicIfError(err) d.PanicIfFalse(size == uint64(n)) return buff, &s3TableReaderAt{s3: s3, h: name} }() stats.IndexBytesPerRead.Sample(uint64(len(indexBytes))) stats.IndexReadLatency.SampleTimeSince(t1) index := parseTableIndex(indexBytes) if indexCache != nil { indexCache.put(name, index) } return &awsChunkSource{newTableReader(index, tra, s3BlockSize), name} } type awsChunkSource struct { tableReader name addr } func (acs *awsChunkSource) hash() addr { return acs.name } type awsTableReaderAt struct { once sync.Once tra tableReaderAt al awsLimits ddb *ddbTableStore s3 *s3ObjectReader name addr chunkCount uint32 } func (atra *awsTableReaderAt) hash() addr { return atra.name } func (atra *awsTableReaderAt) ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) { atra.once.Do(func() { atra.tra = atra.getTableReaderAt(stats) }) return atra.tra.ReadAtWithStats(p, off, stats) } func (atra *awsTableReaderAt) getTableReaderAt(stats *Stats) tableReaderAt { if atra.al.tableMayBeInDynamo(atra.chunkCount) { data, err := atra.ddb.ReadTable(atra.name, stats) if data != nil { return &dynamoTableReaderAt{ddb: atra.ddb, h: atra.name} } d.PanicIfTrue(err == nil) // There MUST be either data or an error d.PanicIfNotType(err, tableNotInDynamoErr{}) } return &s3TableReaderAt{s3: atra.s3, h: atra.name} } ================================================ FILE: go/nbs/aws_chunk_source_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "github.com/stretchr/testify/assert" ) func TestAWSChunkSource(t *testing.T) { chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, h := buildTable(chunks) s3 := makeFakeS3(t) ddb := makeFakeDDB(t) s3or := &s3ObjectReader{s3, "bucket", nil, nil} dts := &ddbTableStore{ddb, "table", nil, nil} makeSrc := func(chunkMax int, ic *indexCache) chunkSource { return newAWSChunkSource( dts, s3or, awsLimits{itemMax: maxDynamoItemSize, chunkMax: uint32(chunkMax)}, h, uint32(len(chunks)), ic, &Stats{}, ) } t.Run("Dynamo", func(t *testing.T) { ddb.putData(fmtTableName(h), tableData) t.Run("NoIndexCache", func(t *testing.T) { src := makeSrc(len(chunks)+1, nil) assertChunksInReader(chunks, src, assert.New(t)) }) t.Run("WithIndexCache", func(t *testing.T) { assert := assert.New(t) index := parseTableIndex(tableData) cache := newIndexCache(1024) cache.put(h, index) baseline := ddb.numGets src := makeSrc(len(chunks)+1, cache) // constructing the table reader shouldn't have resulted in any reads assert.Zero(ddb.numGets - baseline) assertChunksInReader(chunks, src, assert) }) }) t.Run("S3", func(t *testing.T) { s3.data[h.String()] = tableData t.Run("NoIndexCache", func(t *testing.T) { src := makeSrc(len(chunks)-1, nil) assertChunksInReader(chunks, src, assert.New(t)) }) t.Run("WithIndexCache", func(t *testing.T) { assert := assert.New(t) index := parseTableIndex(tableData) cache := newIndexCache(1024) cache.put(h, index) baseline := s3.getCount src := makeSrc(len(chunks)-1, cache) // constructing the table reader shouldn't have resulted in any reads assert.Zero(s3.getCount - baseline) assertChunksInReader(chunks, src, assert) }) }) } ================================================ FILE: go/nbs/aws_table_persister.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "io" "net/url" "sort" "sync" "time" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/verbose" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3" ) const ( minS3PartSize = 5 * 1 << 20 // 5MiB maxS3PartSize = 64 * 1 << 20 // 64MiB maxS3Parts = 10000 maxDynamoChunks = 64 maxDynamoItemSize = 400 * (1 << 10) // 400k defaultS3PartSize = minS3PartSize // smallest allowed by S3 allows for most throughput ) type awsTablePersister struct { s3 s3svc bucket string rl chan struct{} tc tableCache ddb *ddbTableStore limits awsLimits indexCache *indexCache } type awsLimits struct { partTarget, partMin, partMax uint64 itemMax int chunkMax uint32 } func (al awsLimits) tableFitsInDynamo(name addr, dataLen int, chunkCount uint32) bool { calcItemSize := func(n addr, dataLen int) int { return len(dbAttr) + len(tablePrefix) + len(n.String()) + len(dataAttr) + dataLen } return chunkCount <= al.chunkMax && calcItemSize(name, dataLen) < al.itemMax } func (al awsLimits) tableMayBeInDynamo(chunkCount uint32) bool { return chunkCount <= al.chunkMax } func (s3p awsTablePersister) Open(name addr, chunkCount uint32, stats *Stats) chunkSource { return newAWSChunkSource( s3p.ddb, &s3ObjectReader{s3: s3p.s3, bucket: s3p.bucket, readRl: s3p.rl, tc: s3p.tc}, s3p.limits, name, chunkCount, s3p.indexCache, stats, ) } type s3UploadedPart struct { idx int64 etag string } func (s3p awsTablePersister) Persist(mt *memTable, haver chunkReader, stats *Stats) chunkSource { name, data, chunkCount := mt.write(haver, stats) if chunkCount == 0 { return emptyChunkSource{} } if s3p.limits.tableFitsInDynamo(name, len(data), chunkCount) { s3p.ddb.Write(name, data) return s3p.newReaderFromIndexData(data, name, &dynamoTableReaderAt{ddb: s3p.ddb, h: name}) } if s3p.tc != nil { go s3p.tc.store(name, bytes.NewReader(data), uint64(len(data))) } s3p.multipartUpload(data, name.String()) tra := &s3TableReaderAt{&s3ObjectReader{s3: s3p.s3, bucket: s3p.bucket, readRl: s3p.rl, tc: s3p.tc}, name} return s3p.newReaderFromIndexData(data, name, tra) } func (s3p awsTablePersister) newReaderFromIndexData(idxData []byte, name addr, tra tableReaderAt) chunkSource { index := parseTableIndex(idxData) if s3p.indexCache != nil { s3p.indexCache.lockEntry(name) defer s3p.indexCache.unlockEntry(name) s3p.indexCache.put(name, index) } return &awsChunkSource{newTableReader(index, tra, s3BlockSize), name} } func (s3p awsTablePersister) multipartUpload(data []byte, key string) { uploadID := s3p.startMultipartUpload(key) multipartUpload, err := s3p.uploadParts(data, key, uploadID) if err != nil { s3p.abortMultipartUpload(key, uploadID) d.PanicIfError(err) // TODO: Better error handling here } s3p.completeMultipartUpload(key, uploadID, multipartUpload) } func (s3p awsTablePersister) startMultipartUpload(key string) string { result, err := s3p.s3.CreateMultipartUpload(&s3.CreateMultipartUploadInput{ Bucket: aws.String(s3p.bucket), Key: aws.String(key), }) d.PanicIfError(err) return *result.UploadId } func (s3p awsTablePersister) abortMultipartUpload(key, uploadID string) { _, abrtErr := s3p.s3.AbortMultipartUpload(&s3.AbortMultipartUploadInput{ Bucket: aws.String(s3p.bucket), Key: aws.String(key), UploadId: aws.String(uploadID), }) d.PanicIfError(abrtErr) } func (s3p awsTablePersister) completeMultipartUpload(key, uploadID string, mpu *s3.CompletedMultipartUpload) { _, err := s3p.s3.CompleteMultipartUpload(&s3.CompleteMultipartUploadInput{ Bucket: aws.String(s3p.bucket), Key: aws.String(key), MultipartUpload: mpu, UploadId: aws.String(uploadID), }) d.PanicIfError(err) } func (s3p awsTablePersister) uploadParts(data []byte, key, uploadID string) (*s3.CompletedMultipartUpload, error) { sent, failed, done := make(chan s3UploadedPart), make(chan error), make(chan struct{}) numParts := getNumParts(uint64(len(data)), s3p.limits.partTarget) d.PanicIfTrue(numParts > maxS3Parts) // TODO: BUG 3433: handle > 10k parts var wg sync.WaitGroup sendPart := func(partNum, start, end uint64) { if s3p.rl != nil { s3p.rl <- struct{}{} defer func() { <-s3p.rl }() } defer wg.Done() // Check if upload has been terminated select { case <-done: return default: } // Upload the desired part if partNum == numParts { // If this is the last part, make sure it includes any overflow end = uint64(len(data)) } etag, err := s3p.uploadPart(data[start:end], key, uploadID, int64(partNum)) if err != nil { failed <- err return } // Try to send along part info. In the case that the upload was aborted, reading from done allows this worker to exit correctly. select { case sent <- s3UploadedPart{int64(partNum), etag}: case <-done: return } } for i := uint64(0); i < numParts; i++ { wg.Add(1) partNum := i + 1 // Parts are 1-indexed start, end := i*s3p.limits.partTarget, (i+1)*s3p.limits.partTarget go sendPart(partNum, start, end) } go func() { wg.Wait() close(sent) close(failed) }() multipartUpload := &s3.CompletedMultipartUpload{} var firstFailure error for cont := true; cont; { select { case sentPart, open := <-sent: if open { multipartUpload.Parts = append(multipartUpload.Parts, &s3.CompletedPart{ ETag: aws.String(sentPart.etag), PartNumber: aws.Int64(sentPart.idx), }) } cont = open case err := <-failed: if err != nil && firstFailure == nil { // nil err may happen when failed gets closed firstFailure = err close(done) } } } if firstFailure == nil { close(done) } sort.Sort(partsByPartNum(multipartUpload.Parts)) return multipartUpload, firstFailure } func getNumParts(dataLen, minPartSize uint64) uint64 { numParts := dataLen / minPartSize if numParts == 0 { numParts = 1 } return numParts } type partsByPartNum []*s3.CompletedPart func (s partsByPartNum) Len() int { return len(s) } func (s partsByPartNum) Less(i, j int) bool { return *s[i].PartNumber < *s[j].PartNumber } func (s partsByPartNum) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s3p awsTablePersister) ConjoinAll(sources chunkSources, stats *Stats) chunkSource { plan := planConjoin(sources, stats) if plan.chunkCount == 0 { return emptyChunkSource{} } t1 := time.Now() name := nameFromSuffixes(plan.suffixes()) s3p.executeCompactionPlan(plan, name.String()) verbose.Log("Compacted table of %d Kb in %s", plan.totalCompressedData/1024, time.Since(t1)) if s3p.tc != nil { go s3p.loadIntoCache(name) // load conjoined table to the cache } tra := &s3TableReaderAt{&s3ObjectReader{s3: s3p.s3, bucket: s3p.bucket, readRl: s3p.rl, tc: s3p.tc}, name} return s3p.newReaderFromIndexData(plan.mergedIndex, name, tra) } func (s3p awsTablePersister) loadIntoCache(name addr) { input := &s3.GetObjectInput{ Bucket: aws.String(s3p.bucket), Key: aws.String(name.String()), } result, err := s3p.s3.GetObject(input) d.PanicIfError(err) s3p.tc.store(name, result.Body, uint64(*result.ContentLength)) } func (s3p awsTablePersister) executeCompactionPlan(plan compactionPlan, key string) { uploadID := s3p.startMultipartUpload(key) multipartUpload, err := s3p.assembleTable(plan, key, uploadID) if err != nil { s3p.abortMultipartUpload(key, uploadID) d.PanicIfError(err) // TODO: Better error handling here } s3p.completeMultipartUpload(key, uploadID, multipartUpload) } func (s3p awsTablePersister) assembleTable(plan compactionPlan, key, uploadID string) (*s3.CompletedMultipartUpload, error) { d.PanicIfTrue(len(plan.sources) > maxS3Parts) // TODO: BUG 3433: handle > 10k parts // Separate plan.sources by amount of chunkData. Tables with >5MB of chunk data (copies) can be added to the new table using S3's multipart upload copy feature. Smaller tables with <5MB of chunk data (manuals) must be read, assembled into |buff|, and then re-uploaded in parts that are larger than 5MB. copies, manuals, buff := dividePlan(plan, uint64(s3p.limits.partMin), uint64(s3p.limits.partMax)) // Concurrently read data from small tables into |buff| var readWg sync.WaitGroup for _, man := range manuals { readWg.Add(1) go func(m manualPart) { defer readWg.Done() n, _ := m.srcR.Read(buff[m.dstStart:m.dstEnd]) d.PanicIfTrue(int64(n) < m.dstEnd-m.dstStart) }(man) } readWg.Wait() // sendPart calls |doUpload| to send part |partNum|, forwarding errors over |failed| or success over |sent|. Closing (or sending) on |done| will cancel all in-progress calls to sendPart. sent, failed, done := make(chan s3UploadedPart), make(chan error), make(chan struct{}) var uploadWg sync.WaitGroup type uploadFn func() (etag string, err error) sendPart := func(partNum int64, doUpload uploadFn) { if s3p.rl != nil { s3p.rl <- struct{}{} defer func() { <-s3p.rl }() } defer uploadWg.Done() // Check if upload has been terminated select { case <-done: return default: } etag, err := doUpload() if err != nil { failed <- err return } // Try to send along part info. In the case that the upload was aborted, reading from done allows this worker to exit correctly. select { case sent <- s3UploadedPart{int64(partNum), etag}: case <-done: return } } // Concurrently begin sending all parts using sendPart(). // First, kick off sending all the copyable parts. partNum := int64(1) // Part numbers are 1-indexed for _, cp := range copies { uploadWg.Add(1) go func(cp copyPart, partNum int64) { sendPart(partNum, func() (etag string, err error) { return s3p.uploadPartCopy(cp.name, cp.srcOffset, cp.srcLen, key, uploadID, partNum) }) }(cp, partNum) partNum++ } // Then, split buff (data from |manuals| and index) into parts and upload those concurrently. numManualParts := getNumParts(uint64(len(buff)), s3p.limits.partTarget) // TODO: What if this is too big? for i := uint64(0); i < numManualParts; i++ { start, end := i*s3p.limits.partTarget, (i+1)*s3p.limits.partTarget if i+1 == numManualParts { // If this is the last part, make sure it includes any overflow end = uint64(len(buff)) } uploadWg.Add(1) go func(data []byte, partNum int64) { sendPart(partNum, func() (etag string, err error) { return s3p.uploadPart(data, key, uploadID, partNum) }) }(buff[start:end], partNum) partNum++ } // When all the uploads started above are done, close |sent| and |failed| so that the code below will correctly detect that we're done sending parts and move forward. go func() { uploadWg.Wait() close(sent) close(failed) }() // Watch |sent| and |failed| for the results of part uploads. If ever one fails, close |done| to stop all the in-progress or pending sendPart() calls and then bail. multipartUpload := &s3.CompletedMultipartUpload{} var firstFailure error for cont := true; cont; { select { case sentPart, open := <-sent: if open { multipartUpload.Parts = append(multipartUpload.Parts, &s3.CompletedPart{ ETag: aws.String(sentPart.etag), PartNumber: aws.Int64(sentPart.idx), }) } cont = open case err := <-failed: if err != nil && firstFailure == nil { // nil err may happen when failed gets closed firstFailure = err close(done) } } } // If there was any failure detected above, |done| is already closed if firstFailure == nil { close(done) } sort.Sort(partsByPartNum(multipartUpload.Parts)) // S3 requires that these be in part-order return multipartUpload, firstFailure } type copyPart struct { name string srcOffset, srcLen int64 } type manualPart struct { srcR io.Reader dstStart, dstEnd int64 } // dividePlan assumes that plan.sources (which is of type chunkSourcesByDescendingDataSize) is correctly sorted by descending data size. func dividePlan(plan compactionPlan, minPartSize, maxPartSize uint64) (copies []copyPart, manuals []manualPart, buff []byte) { // NB: if maxPartSize < 2*minPartSize, splitting large copies apart isn't solvable. S3's limits are plenty far enough apart that this isn't a problem in production, but we could violate this in tests. d.PanicIfTrue(maxPartSize < 2*minPartSize) buffSize := uint64(len(plan.mergedIndex)) i := 0 for ; i < len(plan.sources); i++ { sws := plan.sources[i] if sws.dataLen < minPartSize { // since plan.sources is sorted in descending chunk-data-length order, we know that sws and all members after it are too small to copy. break } if sws.dataLen <= maxPartSize { copies = append(copies, copyPart{sws.source.hash().String(), 0, int64(sws.dataLen)}) continue } // Now, we need to break the data into some number of parts such that for all parts minPartSize <= size(part) <= maxPartSize. This code tries to split the part evenly, such that all new parts satisfy the previous inequality. This gets tricky around edge cases. Consider min = 5b and max = 10b and a data length of 101b. You need to send 11 parts, but you can't just send 10 parts of 10 bytes and 1 part of 1 byte -- the last is too small. You also can't send 10 parts of 9 bytes each and 1 part of 11 bytes, because the last is too big. You have to distribute the extra bytes across all the parts so that all of them fall into the proper size range. lens := splitOnMaxSize(sws.dataLen, maxPartSize) var srcStart int64 for _, length := range lens { copies = append(copies, copyPart{sws.source.hash().String(), srcStart, length}) srcStart += length } } var offset int64 for ; i < len(plan.sources); i++ { sws := plan.sources[i] manuals = append(manuals, manualPart{sws.source.reader(), offset, offset + int64(sws.dataLen)}) offset += int64(sws.dataLen) buffSize += sws.dataLen } buff = make([]byte, buffSize) copy(buff[buffSize-uint64(len(plan.mergedIndex)):], plan.mergedIndex) return } // Splits |dataLen| into the maximum number of roughly-equal part sizes such that each is <= maxPartSize. func splitOnMaxSize(dataLen, maxPartSize uint64) []int64 { numParts := dataLen / maxPartSize if dataLen%maxPartSize > 0 { numParts++ } baseSize := int64(dataLen / numParts) extraBytes := dataLen % numParts sizes := make([]int64, numParts) for i := range sizes { sizes[i] = baseSize if extraBytes > 0 { sizes[i]++ extraBytes-- } } return sizes } func (s3p awsTablePersister) uploadPartCopy(src string, srcStart, srcEnd int64, key, uploadID string, partNum int64) (etag string, err error) { res, err := s3p.s3.UploadPartCopy(&s3.UploadPartCopyInput{ // TODO: Use url.PathEscape() once we're on go 1.8 CopySource: aws.String(url.QueryEscape(s3p.bucket + "/" + src)), CopySourceRange: aws.String(s3RangeHeader(srcStart, srcEnd)), Bucket: aws.String(s3p.bucket), Key: aws.String(key), PartNumber: aws.Int64(int64(partNum)), UploadId: aws.String(uploadID), }) if err == nil { etag = *res.CopyPartResult.ETag } return } func (s3p awsTablePersister) uploadPart(data []byte, key, uploadID string, partNum int64) (etag string, err error) { res, err := s3p.s3.UploadPart(&s3.UploadPartInput{ Bucket: aws.String(s3p.bucket), Key: aws.String(key), PartNumber: aws.Int64(int64(partNum)), UploadId: aws.String(uploadID), Body: bytes.NewReader(data), }) if err == nil { etag = *res.ETag } return } ================================================ FILE: go/nbs/aws_table_persister_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io" "math/rand" "sync" "testing" "github.com/attic-labs/noms/go/util/sizecache" "github.com/aws/aws-sdk-go/service/s3" "github.com/stretchr/testify/assert" ) func TestAWSTablePersisterPersist(t *testing.T) { calcPartSize := func(rdr chunkReader, maxPartNum uint64) uint64 { return maxTableSize(uint64(rdr.count()), rdr.uncompressedLen()) / maxPartNum } mt := newMemTable(testMemTableSize) for _, c := range testChunks { assert.True(t, mt.addChunk(computeAddr(c), c)) } t.Run("PersistToS3", func(t *testing.T) { t.Run("InMultipleParts", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) ic := newIndexCache(1024) limits := awsLimits{partTarget: calcPartSize(mt, 3)} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, indexCache: ic} src := s3p.Persist(mt, nil, &Stats{}) assert.NotNil(ic.get(src.hash())) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(testChunks, r, assert) } } }) t.Run("CacheTable", func(t *testing.T) { s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) limits := awsLimits{partTarget: calcPartSize(mt, 3)} tc := &waitOnStoreTableCache{readers: map[addr]io.ReaderAt{}} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, tc: tc} // Persist and wait until tc.store() has completed tc.storeWG.Add(1) src := s3p.Persist(mt, nil, &Stats{}) tc.storeWG.Wait() // Now, open the table that should have been cached by the above Persist() and read out all the chunks. All the reads should be serviced from tc. rdr := s3p.Open(src.hash(), src.count(), &Stats{}) baseline := s3svc.getCount ch := make(chan extractRecord) go func() { defer close(ch); rdr.extract(ch) }() for range ch { } assert.Zero(t, s3svc.getCount-baseline) }) t.Run("InSinglePart", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) limits := awsLimits{partTarget: calcPartSize(mt, 1)} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits} src := s3p.Persist(mt, nil, &Stats{}) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(testChunks, r, assert) } } }) t.Run("NoNewChunks", func(t *testing.T) { assert := assert.New(t) mt := newMemTable(testMemTableSize) existingTable := newMemTable(testMemTableSize) for _, c := range testChunks { assert.True(mt.addChunk(computeAddr(c), c)) assert.True(existingTable.addChunk(computeAddr(c), c)) } s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) limits := awsLimits{partTarget: 1 << 10} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits} src := s3p.Persist(mt, existingTable, &Stats{}) assert.True(src.count() == 0) _, present := s3svc.data[src.hash().String()] assert.False(present) }) t.Run("Abort", func(t *testing.T) { assert := assert.New(t) s3svc := &failingFakeS3{makeFakeS3(t), sync.Mutex{}, 1} ddb := makeFakeDTS(makeFakeDDB(t), nil) limits := awsLimits{partTarget: calcPartSize(mt, 4)} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits} assert.Panics(func() { s3p.Persist(mt, nil, &Stats{}) }) }) }) t.Run("PersistToDynamo", func(t *testing.T) { t.Run("Success", func(t *testing.T) { assert := assert.New(t) ddb := makeFakeDDB(t) s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, nil) limits := awsLimits{itemMax: maxDynamoItemSize, chunkMax: 2 * mt.count()} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits} src := s3p.Persist(mt, nil, &Stats{}) if assert.True(src.count() > 0) { if r := ddb.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(testChunks, r, assert) } } }) t.Run("CacheOnOpen", func(t *testing.T) { assert := assert.New(t) tc := sizecache.New(maxDynamoItemSize) ddb := makeFakeDDB(t) s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, tc) limits := awsLimits{itemMax: maxDynamoItemSize, chunkMax: 2 * mt.count()} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits} tableData, name := buildTable(testChunks) ddb.putData(fmtTableName(name), tableData) src := s3p.Open(name, uint32(len(testChunks)), &Stats{}) if assert.True(src.count() > 0) { if r := ddb.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(testChunks, r, assert) } if data, present := tc.Get(name); assert.True(present) { assert.Equal(tableData, data.([]byte)) } } }) t.Run("FailTooManyChunks", func(t *testing.T) { assert := assert.New(t) ddb := makeFakeDDB(t) s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, nil) limits := awsLimits{itemMax: maxDynamoItemSize, chunkMax: 1, partTarget: calcPartSize(mt, 1)} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits} src := s3p.Persist(mt, nil, &Stats{}) if assert.True(src.count() > 0) { if r := ddb.readerForTable(src.hash()); assert.Nil(r) { if r = s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(testChunks, r, assert) } } } }) t.Run("FailItemTooBig", func(t *testing.T) { assert := assert.New(t) ddb := makeFakeDDB(t) s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, nil) limits := awsLimits{itemMax: 0, chunkMax: 2 * mt.count(), partTarget: calcPartSize(mt, 1)} s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits} src := s3p.Persist(mt, nil, &Stats{}) if assert.True(src.count() > 0) { if r := ddb.readerForTable(src.hash()); assert.Nil(r) { if r = s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(testChunks, r, assert) } } } }) }) } func makeFakeDTS(ddb ddbsvc, tc *sizecache.SizeCache) *ddbTableStore { return &ddbTableStore{ddb, "table", nil, tc} } type waitOnStoreTableCache struct { readers map[addr]io.ReaderAt mu sync.RWMutex storeWG sync.WaitGroup } func (mtc *waitOnStoreTableCache) checkout(h addr) io.ReaderAt { mtc.mu.RLock() defer mtc.mu.RUnlock() return mtc.readers[h] } func (mtc *waitOnStoreTableCache) checkin(h addr) {} func (mtc *waitOnStoreTableCache) store(h addr, data io.Reader, size uint64) { defer mtc.storeWG.Done() mtc.mu.Lock() defer mtc.mu.Unlock() mtc.readers[h] = data.(io.ReaderAt) } type failingFakeS3 struct { *fakeS3 mu sync.Mutex numSuccesses int } func (m *failingFakeS3) UploadPart(input *s3.UploadPartInput) (*s3.UploadPartOutput, error) { m.mu.Lock() defer m.mu.Unlock() if m.numSuccesses > 0 { m.numSuccesses-- return m.fakeS3.UploadPart(input) } return nil, mockAWSError("MalformedXML") } func TestAWSTablePersisterDividePlan(t *testing.T) { assert := assert.New(t) minPartSize, maxPartSize := uint64(16), uint64(32) tooSmall := bytesToChunkSource([]byte("a")) justRight := bytesToChunkSource([]byte("123456789"), []byte("abcdefghi")) bigUns := [][]byte{make([]byte, maxPartSize-1), make([]byte, maxPartSize-1)} for _, b := range bigUns { rand.Read(b) } tooBig := bytesToChunkSource(bigUns...) sources := chunkSources{justRight, tooBig, tooSmall} plan := planConjoin(sources, &Stats{}) copies, manuals, _ := dividePlan(plan, minPartSize, maxPartSize) perTableDataSize := map[string]int64{} for _, c := range copies { assert.True(minPartSize <= uint64(c.srcLen)) assert.True(uint64(c.srcLen) <= maxPartSize) totalSize := perTableDataSize[c.name] totalSize += c.srcLen perTableDataSize[c.name] = totalSize } assert.Len(perTableDataSize, 2) assert.Contains(perTableDataSize, justRight.hash().String()) assert.Contains(perTableDataSize, tooBig.hash().String()) assert.EqualValues(calcChunkDataLen(justRight.index()), perTableDataSize[justRight.hash().String()]) assert.EqualValues(calcChunkDataLen(tooBig.index()), perTableDataSize[tooBig.hash().String()]) assert.Len(manuals, 1) assert.EqualValues(calcChunkDataLen(tooSmall.index()), manuals[0].dstEnd-manuals[0].dstStart) } func TestAWSTablePersisterCalcPartSizes(t *testing.T) { assert := assert.New(t) min, max := uint64(8*1<<10), uint64(1+(16*1<<10)) testPartSizes := func(dataLen uint64) { lengths := splitOnMaxSize(dataLen, max) var sum int64 for _, l := range lengths { assert.True(uint64(l) >= min) assert.True(uint64(l) <= max) sum += l } assert.EqualValues(dataLen, sum) } testPartSizes(1 << 20) testPartSizes(max + 1) testPartSizes(10*max - 1) testPartSizes(max + max/2) } func TestAWSTablePersisterConjoinAll(t *testing.T) { targetPartSize := uint64(1024) minPartSize, maxPartSize := targetPartSize, 5*targetPartSize maxItemSize, maxChunkCount := int(targetPartSize/2), uint32(4) ic := newIndexCache(1024) rl := make(chan struct{}, 8) defer close(rl) newPersister := func(s3svc s3svc, ddb *ddbTableStore) awsTablePersister { return awsTablePersister{s3svc, "bucket", rl, nil, ddb, awsLimits{targetPartSize, minPartSize, maxPartSize, maxItemSize, maxChunkCount}, ic} } smallChunks := [][]byte{} rnd := rand.New(rand.NewSource(0)) for smallChunkTotal := uint64(0); smallChunkTotal <= uint64(minPartSize); { small := make([]byte, minPartSize/5) rnd.Read(small) src := bytesToChunkSource(small) smallChunks = append(smallChunks, small) smallChunkTotal += calcChunkDataLen(src.index()) } t.Run("Small", func(t *testing.T) { makeSources := func(s3p awsTablePersister, chunks [][]byte) (sources chunkSources) { for i := 0; i < len(chunks); i++ { mt := newMemTable(uint64(2 * targetPartSize)) mt.addChunk(computeAddr(chunks[i]), chunks[i]) sources = append(sources, s3p.Persist(mt, nil, &Stats{})) } return } t.Run("TotalUnderMinSize", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) s3p := newPersister(s3svc, ddb) chunks := smallChunks[:len(smallChunks)-1] sources := makeSources(s3p, chunks) src := s3p.ConjoinAll(sources, &Stats{}) assert.NotNil(ic.get(src.hash())) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(chunks, r, assert) } } }) t.Run("TotalOverMinSize", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) s3p := newPersister(s3svc, ddb) sources := makeSources(s3p, smallChunks) src := s3p.ConjoinAll(sources, &Stats{}) assert.NotNil(ic.get(src.hash())) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(smallChunks, r, assert) } } }) }) bigUns1 := [][]byte{make([]byte, maxPartSize-1), make([]byte, maxPartSize-1)} bigUns2 := [][]byte{make([]byte, maxPartSize-1), make([]byte, maxPartSize-1)} for _, bu := range [][][]byte{bigUns1, bigUns2} { for _, b := range bu { rand.Read(b) } } t.Run("AllOverMax", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) s3p := newPersister(s3svc, ddb) // Make 2 chunk sources that each have >maxPartSize chunk data sources := make(chunkSources, 2) for i, bu := range [][][]byte{bigUns1, bigUns2} { mt := newMemTable(uint64(2 * maxPartSize)) for _, b := range bu { mt.addChunk(computeAddr(b), b) } sources[i] = s3p.Persist(mt, nil, &Stats{}) } src := s3p.ConjoinAll(sources, &Stats{}) assert.NotNil(ic.get(src.hash())) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(bigUns1, r, assert) assertChunksInReader(bigUns2, r, assert) } } }) t.Run("SomeOverMax", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) s3p := newPersister(s3svc, ddb) // Add one chunk source that has >maxPartSize data mtb := newMemTable(uint64(2 * maxPartSize)) for _, b := range bigUns1 { mtb.addChunk(computeAddr(b), b) } // Follow up with a chunk source where minPartSize < data size < maxPartSize medChunks := make([][]byte, 2) mt := newMemTable(uint64(2 * maxPartSize)) for i := range medChunks { medChunks[i] = make([]byte, minPartSize+1) rand.Read(medChunks[i]) mt.addChunk(computeAddr(medChunks[i]), medChunks[i]) } sources := chunkSources{s3p.Persist(mt, nil, &Stats{}), s3p.Persist(mtb, nil, &Stats{})} src := s3p.ConjoinAll(sources, &Stats{}) assert.NotNil(ic.get(src.hash())) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(bigUns1, r, assert) assertChunksInReader(medChunks, r, assert) } } }) t.Run("Mix", func(t *testing.T) { assert := assert.New(t) s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil) s3p := newPersister(s3svc, ddb) // Start with small tables. Since total > minPartSize, will require more than one part to upload. sources := make(chunkSources, len(smallChunks)) for i := 0; i < len(smallChunks); i++ { mt := newMemTable(uint64(2 * targetPartSize)) mt.addChunk(computeAddr(smallChunks[i]), smallChunks[i]) sources[i] = s3p.Persist(mt, nil, &Stats{}) } // Now, add a table with big chunks that will require more than one upload copy part. mt := newMemTable(uint64(2 * maxPartSize)) for _, b := range bigUns1 { mt.addChunk(computeAddr(b), b) } sources = append(sources, s3p.Persist(mt, nil, &Stats{})) // Last, some tables that should be directly upload-copyable medChunks := make([][]byte, 2) mt = newMemTable(uint64(2 * maxPartSize)) for i := range medChunks { medChunks[i] = make([]byte, minPartSize+1) rand.Read(medChunks[i]) mt.addChunk(computeAddr(medChunks[i]), medChunks[i]) } sources = append(sources, s3p.Persist(mt, nil, &Stats{})) src := s3p.ConjoinAll(sources, &Stats{}) assert.NotNil(ic.get(src.hash())) if assert.True(src.count() > 0) { if r := s3svc.readerForTable(src.hash()); assert.NotNil(r) { assertChunksInReader(smallChunks, r, assert) assertChunksInReader(bigUns1, r, assert) assertChunksInReader(medChunks, r, assert) } } }) } func bytesToChunkSource(bs ...[]byte) chunkSource { sum := 0 for _, b := range bs { sum += len(b) } maxSize := maxTableSize(uint64(len(bs)), uint64(sum)) buff := make([]byte, maxSize) tw := newTableWriter(buff, nil) for _, b := range bs { tw.addChunk(computeAddr(b), b) } tableSize, name := tw.finish() data := buff[:tableSize] rdr := newTableReader(parseTableIndex(data), tableReaderAtFromBytes(data), fileBlockSize) return chunkSourceAdapter{rdr, name} } ================================================ FILE: go/nbs/block_store_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "crypto/rand" "io/ioutil" "os" "path/filepath" "sort" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) const testMemTableSize = 1 << 8 func TestBlockStoreSuite(t *testing.T) { suite.Run(t, &BlockStoreSuite{}) } type BlockStoreSuite struct { suite.Suite dir string store *NomsBlockStore putCountFn func() int } func (suite *BlockStoreSuite) SetupTest() { var err error suite.dir, err = ioutil.TempDir("", "") suite.NoError(err) suite.store = NewLocalStore(suite.dir, testMemTableSize) suite.putCountFn = func() int { return int(suite.store.putCount) } } func (suite *BlockStoreSuite) TearDownTest() { suite.store.Close() os.RemoveAll(suite.dir) } func (suite *BlockStoreSuite) TestChunkStoreMissingDir() { newDir := filepath.Join(suite.dir, "does-not-exist") suite.Panics(func() { NewLocalStore(newDir, testMemTableSize) }) } func (suite *BlockStoreSuite) TestChunkStoreNotDir() { existingFile := filepath.Join(suite.dir, "path-exists-but-is-a-file") os.Create(existingFile) suite.Panics(func() { NewLocalStore(existingFile, testMemTableSize) }) } func (suite *BlockStoreSuite) TestChunkStorePut() { input := []byte("abc") c := chunks.NewChunk(input) suite.store.Put(c) h := c.Hash() // See http://www.di-mgt.com.au/sha_testvectors.html suite.Equal("rmnjb8cjc5tblj21ed4qs821649eduie", h.String()) suite.store.Commit(h, suite.store.Root()) // Commit writes // And reading it via the API should work... assertInputInStore(input, h, suite.store, suite.Assert()) if suite.putCountFn != nil { suite.Equal(1, suite.putCountFn()) } // Re-writing the same data should cause a second put c = chunks.NewChunk(input) suite.store.Put(c) suite.Equal(h, c.Hash()) assertInputInStore(input, h, suite.store, suite.Assert()) suite.store.Commit(h, suite.store.Root()) // Commit writes if suite.putCountFn != nil { suite.Equal(2, suite.putCountFn()) } } func (suite *BlockStoreSuite) TestChunkStorePutMany() { input1, input2 := []byte("abc"), []byte("def") c1, c2 := chunks.NewChunk(input1), chunks.NewChunk(input2) suite.store.Put(c1) suite.store.Put(c2) suite.store.Commit(c1.Hash(), suite.store.Root()) // Commit writes // And reading it via the API should work... assertInputInStore(input1, c1.Hash(), suite.store, suite.Assert()) assertInputInStore(input2, c2.Hash(), suite.store, suite.Assert()) if suite.putCountFn != nil { suite.Equal(2, suite.putCountFn()) } } func (suite *BlockStoreSuite) TestChunkStoreStatsSummary() { input1, input2 := []byte("abc"), []byte("def") c1, c2 := chunks.NewChunk(input1), chunks.NewChunk(input2) suite.store.Put(c1) suite.store.Put(c2) suite.store.Commit(c1.Hash(), suite.store.Root()) // Commit writes summary := suite.store.StatsSummary() suite.Contains(summary, c1.Hash().String()) suite.NotEqual("Unsupported", summary) } func (suite *BlockStoreSuite) TestChunkStorePutMoreThanMemTable() { input1, input2 := make([]byte, testMemTableSize/2+1), make([]byte, testMemTableSize/2+1) rand.Read(input1) rand.Read(input2) c1, c2 := chunks.NewChunk(input1), chunks.NewChunk(input2) suite.store.Put(c1) suite.store.Put(c2) suite.store.Commit(c1.Hash(), suite.store.Root()) // Commit writes // And reading it via the API should work... assertInputInStore(input1, c1.Hash(), suite.store, suite.Assert()) assertInputInStore(input2, c2.Hash(), suite.store, suite.Assert()) if suite.putCountFn != nil { suite.Equal(2, suite.putCountFn()) } suite.Len(suite.store.tables.ToSpecs(), 2) } func (suite *BlockStoreSuite) TestChunkStoreGetMany() { inputs := [][]byte{make([]byte, testMemTableSize/2+1), make([]byte, testMemTableSize/2+1), []byte("abc")} rand.Read(inputs[0]) rand.Read(inputs[1]) chnx := make([]chunks.Chunk, len(inputs)) for i, data := range inputs { chnx[i] = chunks.NewChunk(data) suite.store.Put(chnx[i]) } suite.store.Commit(chnx[0].Hash(), suite.store.Root()) // Commit writes hashes := make(hash.HashSlice, len(chnx)) for i, c := range chnx { hashes[i] = c.Hash() } chunkChan := make(chan *chunks.Chunk, len(hashes)) suite.store.GetMany(hashes.HashSet(), chunkChan) close(chunkChan) found := make(hash.HashSlice, 0) for c := range chunkChan { found = append(found, c.Hash()) } sort.Sort(found) sort.Sort(hashes) suite.True(found.Equals(hashes)) } func (suite *BlockStoreSuite) TestChunkStoreHasMany() { chnx := []chunks.Chunk{ chunks.NewChunk([]byte("abc")), chunks.NewChunk([]byte("def")), } for _, c := range chnx { suite.store.Put(c) } suite.store.Commit(chnx[0].Hash(), suite.store.Root()) // Commit writes notPresent := chunks.NewChunk([]byte("ghi")).Hash() hashes := hash.NewHashSet(chnx[0].Hash(), chnx[1].Hash(), notPresent) absent := suite.store.HasMany(hashes) suite.Len(absent, 1) for _, c := range chnx { suite.False(absent.Has(c.Hash()), "%s present in %v", c.Hash(), absent) } suite.True(absent.Has(notPresent)) } func (suite *BlockStoreSuite) TestChunkStoreExtractChunks() { input1, input2 := make([]byte, testMemTableSize/2+1), make([]byte, testMemTableSize/2+1) rand.Read(input1) rand.Read(input2) chnx := []chunks.Chunk{chunks.NewChunk(input1), chunks.NewChunk(input2)} for _, c := range chnx { suite.store.Put(c) } chunkChan := make(chan *chunks.Chunk) go func() { suite.store.extractChunks(chunkChan); close(chunkChan) }() i := 0 for c := range chunkChan { suite.Equal(chnx[i].Data(), c.Data()) suite.Equal(chnx[i].Hash(), c.Hash()) i++ } } func (suite *BlockStoreSuite) TestChunkStoreFlushOptimisticLockFail() { input1, input2 := []byte("abc"), []byte("def") c1, c2 := chunks.NewChunk(input1), chunks.NewChunk(input2) root := suite.store.Root() interloper := NewLocalStore(suite.dir, testMemTableSize) interloper.Put(c1) suite.True(interloper.Commit(interloper.Root(), interloper.Root())) suite.store.Put(c2) suite.True(suite.store.Commit(suite.store.Root(), suite.store.Root())) // Reading c2 via the API should work... assertInputInStore(input2, c2.Hash(), suite.store, suite.Assert()) // And so should reading c1 via the API assertInputInStore(input1, c1.Hash(), suite.store, suite.Assert()) suite.True(interloper.Commit(c1.Hash(), interloper.Root())) // Commit root // Updating from stale root should fail... suite.False(suite.store.Commit(c2.Hash(), root)) // ...but new root should succeed suite.True(suite.store.Commit(c2.Hash(), suite.store.Root())) } func (suite *BlockStoreSuite) TestChunkStoreRebaseOnNoOpFlush() { input1 := []byte("abc") c1 := chunks.NewChunk(input1) interloper := NewLocalStore(suite.dir, testMemTableSize) interloper.Put(c1) suite.True(interloper.Commit(c1.Hash(), interloper.Root())) suite.False(suite.store.Has(c1.Hash())) suite.Equal(hash.Hash{}, suite.store.Root()) // Should Rebase, even though there's no work to do. suite.True(suite.store.Commit(suite.store.Root(), suite.store.Root())) // Reading c1 via the API should work assertInputInStore(input1, c1.Hash(), suite.store, suite.Assert()) suite.True(suite.store.Has(c1.Hash())) } func (suite *BlockStoreSuite) TestChunkStorePutWithRebase() { input1, input2 := []byte("abc"), []byte("def") c1, c2 := chunks.NewChunk(input1), chunks.NewChunk(input2) root := suite.store.Root() interloper := NewLocalStore(suite.dir, testMemTableSize) interloper.Put(c1) suite.True(interloper.Commit(interloper.Root(), interloper.Root())) suite.store.Put(c2) // Reading c2 via the API should work pre-rebase assertInputInStore(input2, c2.Hash(), suite.store, suite.Assert()) // Shouldn't have c1 yet. suite.False(suite.store.Has(c1.Hash())) suite.store.Rebase() // Reading c2 via the API should work post-rebase assertInputInStore(input2, c2.Hash(), suite.store, suite.Assert()) // And so should reading c1 via the API assertInputInStore(input1, c1.Hash(), suite.store, suite.Assert()) // Commit interloper root suite.True(interloper.Commit(c1.Hash(), interloper.Root())) // suite.store should still have its initial root suite.EqualValues(root, suite.store.Root()) suite.store.Rebase() // Rebase grabbed the new root, so updating should now succeed! suite.True(suite.store.Commit(c2.Hash(), suite.store.Root())) // Interloper shouldn't see c2 yet.... suite.False(interloper.Has(c2.Hash())) interloper.Rebase() // ...but post-rebase it must assertInputInStore(input2, c2.Hash(), interloper, suite.Assert()) } func TestBlockStoreConjoinOnCommit(t *testing.T) { stats := &Stats{} assertContainAll := func(t *testing.T, store chunks.ChunkStore, srcs ...chunkSource) { rdrs := make(chunkReaderGroup, len(srcs)) for i, src := range srcs { rdrs[i] = src } chunkChan := make(chan extractRecord, rdrs.count()) rdrs.extract(chunkChan) close(chunkChan) for rec := range chunkChan { assert.True(t, store.Has(hash.Hash(rec.a))) } } makeManifestManager := func(m manifest) manifestManager { return manifestManager{m, newManifestCache(0), newManifestLocks()} } newChunk := chunks.NewChunk([]byte("gnu")) t.Run("NoConjoin", func(t *testing.T) { mm := makeManifestManager(&fakeManifest{}) p := newFakeTablePersister() c := &fakeConjoiner{} smallTableStore := newNomsBlockStore(mm, p, c, testMemTableSize) root := smallTableStore.Root() smallTableStore.Put(newChunk) assert.True(t, smallTableStore.Commit(newChunk.Hash(), root)) assert.True(t, smallTableStore.Has(newChunk.Hash())) }) makeCanned := func(conjoinees, keepers []tableSpec, p tablePersister) cannedConjoin { srcs := chunkSources{} for _, sp := range conjoinees { srcs = append(srcs, p.Open(sp.name, sp.chunkCount, nil)) } conjoined := p.ConjoinAll(srcs, stats) cannedSpecs := []tableSpec{{conjoined.hash(), conjoined.count()}} return cannedConjoin{true, append(cannedSpecs, keepers...)} } t.Run("ConjoinSuccess", func(t *testing.T) { fm := &fakeManifest{} p := newFakeTablePersister() srcs := makeTestSrcs([]uint32{1, 1, 3, 7}, p) upstream := toSpecs(srcs) fm.set(constants.NomsVersion, computeAddr([]byte{0xbe}), hash.Of([]byte{0xef}), upstream) c := &fakeConjoiner{ []cannedConjoin{makeCanned(upstream[:2], upstream[2:], p)}, } smallTableStore := newNomsBlockStore(makeManifestManager(fm), p, c, testMemTableSize) root := smallTableStore.Root() smallTableStore.Put(newChunk) assert.True(t, smallTableStore.Commit(newChunk.Hash(), root)) assert.True(t, smallTableStore.Has(newChunk.Hash())) assertContainAll(t, smallTableStore, srcs...) }) t.Run("ConjoinRetry", func(t *testing.T) { fm := &fakeManifest{} p := newFakeTablePersister() srcs := makeTestSrcs([]uint32{1, 1, 3, 7, 13}, p) upstream := toSpecs(srcs) fm.set(constants.NomsVersion, computeAddr([]byte{0xbe}), hash.Of([]byte{0xef}), upstream) c := &fakeConjoiner{ []cannedConjoin{ makeCanned(upstream[:2], upstream[2:], p), makeCanned(upstream[:4], upstream[4:], p), }, } smallTableStore := newNomsBlockStore(makeManifestManager(fm), p, c, testMemTableSize) root := smallTableStore.Root() smallTableStore.Put(newChunk) assert.True(t, smallTableStore.Commit(newChunk.Hash(), root)) assert.True(t, smallTableStore.Has(newChunk.Hash())) assertContainAll(t, smallTableStore, srcs...) }) } type cannedConjoin struct { should bool specs []tableSpec // Must name tables that are already persisted } type fakeConjoiner struct { canned []cannedConjoin } func (fc *fakeConjoiner) ConjoinRequired(ts tableSet) bool { if len(fc.canned) == 0 { return false } return fc.canned[0].should } func (fc *fakeConjoiner) Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents { d.PanicIfTrue(len(fc.canned) == 0) canned := fc.canned[0] fc.canned = fc.canned[1:] newContents := manifestContents{ vers: constants.NomsVersion, root: upstream.root, specs: canned.specs, lock: generateLockHash(upstream.root, canned.specs), } upstream = mm.Update(upstream.lock, newContents, stats, nil) d.PanicIfFalse(upstream.lock == newContents.lock) return upstream } func assertInputInStore(input []byte, h hash.Hash, s chunks.ChunkStore, assert *assert.Assertions) { c := s.Get(h) assert.False(c.IsEmpty(), "Shouldn't get empty chunk for %s", h.String()) assert.Zero(bytes.Compare(input, c.Data()), "%s != %s", string(input), string(c.Data())) } func (suite *BlockStoreSuite) TestChunkStoreGetNonExisting() { h := hash.Parse("11111111111111111111111111111111") c := suite.store.Get(h) suite.True(c.IsEmpty()) } ================================================ FILE: go/nbs/cache.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io/ioutil" "os" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) const ( defaultCacheMemTableSize uint64 = 1 << 27 // 128MiB ) func NewCache() *NomsBlockCache { dir, err := ioutil.TempDir("", "") d.PanicIfError(err) store := NewLocalStore(dir, defaultCacheMemTableSize) d.Chk.NoError(err, "opening put cache in %s", dir) return &NomsBlockCache{store, dir} } // NomsBlockCache holds Chunks, allowing them to be retrieved by hash or enumerated in hash order. type NomsBlockCache struct { chunks *NomsBlockStore dbDir string } // Insert stores c in the cache. func (nbc *NomsBlockCache) Insert(c chunks.Chunk) { d.PanicIfFalse(nbc.chunks.addChunk(addr(c.Hash()), c.Data())) } // Has checks if the chunk referenced by hash is in the cache. func (nbc *NomsBlockCache) Has(hash hash.Hash) bool { return nbc.chunks.Has(hash) } // HasMany returns a set containing the members of hashes present in the // cache. func (nbc *NomsBlockCache) HasMany(hashes hash.HashSet) hash.HashSet { return nbc.chunks.HasMany(hashes) } // Get retrieves the chunk referenced by hash. If the chunk is not present, // Get returns the empty Chunk. func (nbc *NomsBlockCache) Get(hash hash.Hash) chunks.Chunk { return nbc.chunks.Get(hash) } // GetMany gets the Chunks with |hashes| from the store. On return, // |foundChunks| will have been fully sent all chunks which have been // found. Any non-present chunks will silently be ignored. func (nbc *NomsBlockCache) GetMany(hashes hash.HashSet, foundChunks chan *chunks.Chunk) { nbc.chunks.GetMany(hashes, foundChunks) } // ExtractChunks writes the entire contents of the cache to chunkChan. The // chunks are extracted in insertion order. func (nbc *NomsBlockCache) ExtractChunks(chunkChan chan *chunks.Chunk) { nbc.chunks.extractChunks(chunkChan) } // Count returns the number of items in the cache. func (nbc *NomsBlockCache) Count() uint32 { return nbc.chunks.Count() } // Destroy drops the cache and deletes any backing storage. func (nbc *NomsBlockCache) Destroy() error { d.Chk.NoError(nbc.chunks.Close()) return os.RemoveAll(nbc.dbDir) } ================================================ FILE: go/nbs/conjoiner.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "sort" "sync" "time" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" ) type conjoiner interface { // ConjoinRequired tells the caller whether or not it's time to request a // Conjoin, based upon the contents of |ts| and the conjoiner // implementation's policy. ConjoinRequired(ts tableSet) bool // Conjoin attempts to use |p| to conjoin some number of tables referenced // by |upstream|, allowing it to update |mm| with a new, smaller, set of tables // that references precisely the same set of chunks. Conjoin() may not // actually conjoin any upstream tables, usually because some out-of- // process actor has already landed a conjoin of its own. Callers must // handle this, likely by rebasing against upstream and re-evaluating the // situation. Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents } type inlineConjoiner struct { maxTables int } func (c inlineConjoiner) ConjoinRequired(ts tableSet) bool { return ts.Size() > c.maxTables } func (c inlineConjoiner) Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents { return conjoin(upstream, mm, p, stats) } func conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents { var conjoined tableSpec var conjoinees, keepers []tableSpec for { if conjoinees == nil { conjoined, conjoinees, keepers = conjoinTables(p, upstream.specs, stats) } specs := append(make([]tableSpec, 0, len(keepers)+1), conjoined) specs = append(specs, keepers...) newContents := manifestContents{ vers: constants.NomsVersion, root: upstream.root, lock: generateLockHash(upstream.root, specs), specs: specs, } upstream = mm.Update(upstream.lock, newContents, stats, nil) if newContents.lock == upstream.lock { return upstream // Success! } // Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us. // If we can re-use the conjoin we already performed, we want to try again. Currently, we will only do so if ALL conjoinees are still present upstream. If we can't re-use...then someone else almost certainly landed a conjoin upstream. In this case, bail and let clients ask again if they think they still can't proceed. conjoineeSet := map[addr]struct{}{} upstreamNames := map[addr]struct{}{} for _, spec := range upstream.specs { upstreamNames[spec.name] = struct{}{} } for _, c := range conjoinees { if _, present := upstreamNames[c.name]; !present { return upstream // Bail! } conjoineeSet[c.name] = struct{}{} } // Filter conjoinees out of upstream.specs to generate new set of keepers keepers = make([]tableSpec, 0, len(upstream.specs)-len(conjoinees)) for _, spec := range upstream.specs { if _, present := conjoineeSet[spec.name]; !present { keepers = append(keepers, spec) } } } } func conjoinTables(p tablePersister, upstream []tableSpec, stats *Stats) (conjoined tableSpec, conjoinees, keepers []tableSpec) { // Open all the upstream tables concurrently sources := make(chunkSources, len(upstream)) wg := sync.WaitGroup{} for i, spec := range upstream { wg.Add(1) go func(idx int, spec tableSpec) { sources[idx] = p.Open(spec.name, spec.chunkCount, stats) wg.Done() }(i, spec) i++ } wg.Wait() t1 := time.Now() toConjoin, toKeep := chooseConjoinees(sources) conjoinedSrc := p.ConjoinAll(toConjoin, stats) stats.ConjoinLatency.SampleTimeSince(t1) stats.TablesPerConjoin.SampleLen(len(toConjoin)) stats.ChunksPerConjoin.Sample(uint64(conjoinedSrc.count())) return tableSpec{conjoinedSrc.hash(), conjoinedSrc.count()}, toSpecs(toConjoin), toSpecs(toKeep) } // Current approach is to choose the smallest N tables which, when removed and replaced with the conjoinment, will leave the conjoinment as the smallest table. func chooseConjoinees(upstream chunkSources) (toConjoin, toKeep chunkSources) { sortedUpstream := make(chunkSources, len(upstream)) copy(sortedUpstream, upstream) sort.Sort(chunkSourcesByAscendingCount(sortedUpstream)) partition := 2 sum := sortedUpstream[0].count() + sortedUpstream[1].count() for partition < len(sortedUpstream) && sum > sortedUpstream[partition].count() { sum += sortedUpstream[partition].count() partition++ } return sortedUpstream[:partition], sortedUpstream[partition:] } func toSpecs(srcs chunkSources) []tableSpec { specs := make([]tableSpec, len(srcs)) for i, src := range srcs { d.PanicIfFalse(src.count() > 0) specs[i] = tableSpec{src.hash(), src.count()} } return specs } ================================================ FILE: go/nbs/conjoiner_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "encoding/binary" "sort" "testing" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) type tableSpecsByAscendingCount []tableSpec func (ts tableSpecsByAscendingCount) Len() int { return len(ts) } func (ts tableSpecsByAscendingCount) Less(i, j int) bool { tsI, tsJ := ts[i], ts[j] if tsI.chunkCount == tsJ.chunkCount { return bytes.Compare(tsI.name[:], tsJ.name[:]) < 0 } return tsI.chunkCount < tsJ.chunkCount } func (ts tableSpecsByAscendingCount) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] } func makeTestSrcs(tableSizes []uint32, p tablePersister) (srcs chunkSources) { count := uint32(0) nextChunk := func() (chunk []byte) { chunk = make([]byte, 4) binary.BigEndian.PutUint32(chunk, count) count++ return chunk } for _, s := range tableSizes { mt := newMemTable(testMemTableSize) for i := uint32(0); i < s; i++ { c := nextChunk() mt.addChunk(computeAddr(c), c) } srcs = append(srcs, p.Persist(mt, nil, &Stats{})) } return } func TestConjoin(t *testing.T) { // Makes a tableSet with len(tableSizes) upstream tables containing tableSizes[N] unique chunks makeTestTableSpecs := func(tableSizes []uint32, p tablePersister) (specs []tableSpec) { for _, src := range makeTestSrcs(tableSizes, p) { specs = append(specs, tableSpec{src.hash(), src.count()}) } return } // Returns the chunk counts of the tables in ts.compacted & ts.upstream in ascending order getSortedSizes := func(specs []tableSpec) (sorted []uint32) { all := append([]tableSpec{}, specs...) sort.Sort(tableSpecsByAscendingCount(all)) for _, ts := range all { sorted = append(sorted, ts.chunkCount) } return } assertContainAll := func(t *testing.T, p tablePersister, expect, actual []tableSpec) { open := func(specs []tableSpec) (srcs chunkReaderGroup) { for _, sp := range specs { srcs = append(srcs, p.Open(sp.name, sp.chunkCount, nil)) } return } expectSrcs, actualSrcs := open(expect), open(actual) chunkChan := make(chan extractRecord, expectSrcs.count()) expectSrcs.extract(chunkChan) close(chunkChan) for rec := range chunkChan { assert.True(t, actualSrcs.has(rec.a)) } } setup := func(lock addr, root hash.Hash, sizes []uint32) (fm *fakeManifest, p tablePersister, upstream manifestContents) { p = newFakeTablePersister() fm = &fakeManifest{} fm.set(constants.NomsVersion, lock, root, makeTestTableSpecs(sizes, p)) _, upstream = fm.ParseIfExists(nil, nil) return } tc := []struct { name string precompact []uint32 postcompact []uint32 }{ {"uniform", []uint32{1, 1, 1, 1, 1}, []uint32{5}}, {"all but last", []uint32{1, 1, 1, 1, 5}, []uint32{4, 5}}, {"all", []uint32{5, 5, 5}, []uint32{15}}, {"first four", []uint32{5, 6, 10, 11, 35, 64}, []uint32{32, 35, 64}}, {"log, first two", []uint32{1, 2, 4, 8, 16, 32, 64}, []uint32{3, 4, 8, 16, 32, 64}}, {"log, all", []uint32{2, 3, 4, 8, 16, 32, 64}, []uint32{129}}, } stats := &Stats{} startLock, startRoot := computeAddr([]byte("lock")), hash.Of([]byte("root")) t.Run("Success", func(t *testing.T) { // Compact some tables, no one interrupts for _, c := range tc { t.Run(c.name, func(t *testing.T) { fm, p, upstream := setup(startLock, startRoot, c.precompact) conjoin(upstream, fm, p, stats) exists, newUpstream := fm.ParseIfExists(stats, nil) assert.True(t, exists) assert.Equal(t, c.postcompact, getSortedSizes(newUpstream.specs)) assertContainAll(t, p, upstream.specs, newUpstream.specs) }) } }) t.Run("Retry", func(t *testing.T) { // Compact some tables, interloper slips in a new table makeExtra := func(p tablePersister) tableSpec { mt := newMemTable(testMemTableSize) data := []byte{0xde, 0xad} mt.addChunk(computeAddr(data), data) src := p.Persist(mt, nil, &Stats{}) return tableSpec{src.hash(), src.count()} } for _, c := range tc { t.Run(c.name, func(t *testing.T) { fm, p, upstream := setup(startLock, startRoot, c.precompact) newTable := makeExtra(p) u := updatePreemptManifest{fm, func() { specs := append([]tableSpec{}, upstream.specs...) fm.set(constants.NomsVersion, computeAddr([]byte("lock2")), startRoot, append(specs, newTable)) }} conjoin(upstream, u, p, stats) exists, newUpstream := fm.ParseIfExists(stats, nil) assert.True(t, exists) assert.Equal(t, append([]uint32{1}, c.postcompact...), getSortedSizes(newUpstream.specs)) assertContainAll(t, p, append(upstream.specs, newTable), newUpstream.specs) }) } }) t.Run("TablesDroppedUpstream", func(t *testing.T) { // Interloper drops some compactees for _, c := range tc { t.Run(c.name, func(t *testing.T) { fm, p, upstream := setup(startLock, startRoot, c.precompact) u := updatePreemptManifest{fm, func() { fm.set(constants.NomsVersion, computeAddr([]byte("lock2")), startRoot, upstream.specs[1:]) }} conjoin(upstream, u, p, stats) exists, newUpstream := fm.ParseIfExists(stats, nil) assert.True(t, exists) assert.Equal(t, c.precompact[1:], getSortedSizes(newUpstream.specs)) }) } }) } type updatePreemptManifest struct { manifest preUpdate func() } func (u updatePreemptManifest) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents { if u.preUpdate != nil { u.preUpdate() } return u.manifest.Update(lastLock, newContents, stats, writeHook) } ================================================ FILE: go/nbs/dynamo_fake_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "testing" "github.com/attic-labs/noms/go/constants" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/dynamodb" "github.com/stretchr/testify/assert" ) type fakeDDB struct { data map[string]interface{} t *testing.T numPuts, numGets int } type record struct { lock, root []byte vers, specs string } func makeFakeDDB(t *testing.T) *fakeDDB { return &fakeDDB{ data: map[string]interface{}{}, t: t, } } func (m *fakeDDB) readerForTable(name addr) chunkReader { if i, present := m.data[fmtTableName(name)]; present { buff, ok := i.([]byte) assert.True(m.t, ok) return newTableReader(parseTableIndex(buff), tableReaderAtFromBytes(buff), fileBlockSize) } return nil } func (m *fakeDDB) GetItem(input *dynamodb.GetItemInput) (*dynamodb.GetItemOutput, error) { key := input.Key[dbAttr].S assert.NotNil(m.t, key, "key should have been a String: %+v", input.Key[dbAttr]) item := map[string]*dynamodb.AttributeValue{} if e, present := m.data[*key]; present { item[dbAttr] = &dynamodb.AttributeValue{S: key} switch e := e.(type) { case record: item[nbsVersAttr] = &dynamodb.AttributeValue{S: aws.String(StorageVersion)} item[versAttr] = &dynamodb.AttributeValue{S: aws.String(e.vers)} item[rootAttr] = &dynamodb.AttributeValue{B: e.root} item[lockAttr] = &dynamodb.AttributeValue{B: e.lock} if e.specs != "" { item[tableSpecsAttr] = &dynamodb.AttributeValue{S: aws.String(e.specs)} } case []byte: item[dataAttr] = &dynamodb.AttributeValue{B: e} } } m.numGets++ return &dynamodb.GetItemOutput{Item: item}, nil } func (m *fakeDDB) putRecord(k string, l, r []byte, v string, s string) { m.data[k] = record{l, r, v, s} } func (m *fakeDDB) putData(k string, d []byte) { m.data[k] = d } func (m *fakeDDB) PutItem(input *dynamodb.PutItemInput) (*dynamodb.PutItemOutput, error) { assert.NotNil(m.t, input.Item[dbAttr], "%s should have been present", dbAttr) assert.NotNil(m.t, input.Item[dbAttr].S, "key should have been a String: %+v", input.Item[dbAttr]) key := *input.Item[dbAttr].S if input.Item[dataAttr] != nil { assert.NotNil(m.t, input.Item[dataAttr].B, "data should have been a blob: %+v", input.Item[dataAttr]) m.putData(key, input.Item[dataAttr].B) return &dynamodb.PutItemOutput{}, nil } assert.NotNil(m.t, input.Item[nbsVersAttr], "%s should have been present", nbsVersAttr) assert.NotNil(m.t, input.Item[nbsVersAttr].S, "nbsVers should have been a String: %+v", input.Item[nbsVersAttr]) assert.Equal(m.t, StorageVersion, *input.Item[nbsVersAttr].S) assert.NotNil(m.t, input.Item[versAttr], "%s should have been present", versAttr) assert.NotNil(m.t, input.Item[versAttr].S, "nbsVers should have been a String: %+v", input.Item[versAttr]) assert.Equal(m.t, constants.NomsVersion, *input.Item[versAttr].S) assert.NotNil(m.t, input.Item[lockAttr], "%s should have been present", lockAttr) assert.NotNil(m.t, input.Item[lockAttr].B, "lock should have been a blob: %+v", input.Item[lockAttr]) lock := input.Item[lockAttr].B assert.NotNil(m.t, input.Item[rootAttr], "%s should have been present", rootAttr) assert.NotNil(m.t, input.Item[rootAttr].B, "root should have been a blob: %+v", input.Item[rootAttr]) root := input.Item[rootAttr].B specs := "" if attr, present := input.Item[tableSpecsAttr]; present { assert.NotNil(m.t, attr.S, "specs should have been a String: %+v", input.Item[tableSpecsAttr]) specs = *attr.S } mustNotExist := *(input.ConditionExpression) == valueNotExistsOrEqualsExpression current, present := m.data[key] if mustNotExist && present { return nil, mockAWSError("ConditionalCheckFailedException") } else if !mustNotExist && !checkCondition(current.(record), input.ExpressionAttributeValues) { return nil, mockAWSError("ConditionalCheckFailedException") } m.putRecord(key, lock, root, constants.NomsVersion, specs) m.numPuts++ return &dynamodb.PutItemOutput{}, nil } func checkCondition(current record, expressionAttrVals map[string]*dynamodb.AttributeValue) bool { return current.vers == *expressionAttrVals[":vers"].S && bytes.Equal(current.lock, expressionAttrVals[":prev"].B) } ================================================ FILE: go/nbs/dynamo_manifest.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "strings" "time" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/dynamodb" ) const ( dbAttr = "db" lockAttr = "lck" // 'lock' is a reserved word in dynamo rootAttr = "root" versAttr = "vers" nbsVersAttr = "nbsVers" tableSpecsAttr = "specs" ) var ( valueEqualsExpression = fmt.Sprintf("(%s = :prev) and (%s = :vers)", lockAttr, versAttr) valueNotExistsOrEqualsExpression = fmt.Sprintf("attribute_not_exists("+lockAttr+") or %s", valueEqualsExpression) ) type ddbsvc interface { GetItem(input *dynamodb.GetItemInput) (*dynamodb.GetItemOutput, error) PutItem(input *dynamodb.PutItemInput) (*dynamodb.PutItemOutput, error) } // dynamoManifest assumes the existence of a DynamoDB table whose primary partition key is in String format and named `db`. type dynamoManifest struct { table, db string ddbsvc ddbsvc } func newDynamoManifest(table, namespace string, ddb ddbsvc) manifest { d.PanicIfTrue(table == "") d.PanicIfTrue(namespace == "") return dynamoManifest{table, namespace, ddb} } func (dm dynamoManifest) Name() string { return dm.table + dm.db } func (dm dynamoManifest) ParseIfExists(stats *Stats, readHook func()) (exists bool, contents manifestContents) { t1 := time.Now() defer func() { stats.ReadManifestLatency.SampleTimeSince(t1) }() result, err := dm.ddbsvc.GetItem(&dynamodb.GetItemInput{ ConsistentRead: aws.Bool(true), // This doubles the cost :-( TableName: aws.String(dm.table), Key: map[string]*dynamodb.AttributeValue{ dbAttr: {S: aws.String(dm.db)}, }, }) d.PanicIfError(err) // !exists(dbAttr) => unitialized store if len(result.Item) > 0 { valid, hasSpecs := validateManifest(result.Item) if !valid { d.Panic("Malformed manifest for %s: %+v", dm.db, result.Item) } exists = true contents.vers = *result.Item[versAttr].S contents.root = hash.New(result.Item[rootAttr].B) copy(contents.lock[:], result.Item[lockAttr].B) if hasSpecs { contents.specs = parseSpecs(strings.Split(*result.Item[tableSpecsAttr].S, ":")) } } return } func validateManifest(item map[string]*dynamodb.AttributeValue) (valid, hasSpecs bool) { if item[nbsVersAttr] != nil && item[nbsVersAttr].S != nil && StorageVersion == *item[nbsVersAttr].S && item[versAttr] != nil && item[versAttr].S != nil && item[lockAttr] != nil && item[lockAttr].B != nil && item[rootAttr] != nil && item[rootAttr].B != nil { if len(item) == 6 && item[tableSpecsAttr] != nil && item[tableSpecsAttr].S != nil { return true, true } return len(item) == 5, false } return false, false } func (dm dynamoManifest) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents { t1 := time.Now() defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }() putArgs := dynamodb.PutItemInput{ TableName: aws.String(dm.table), Item: map[string]*dynamodb.AttributeValue{ dbAttr: {S: aws.String(dm.db)}, nbsVersAttr: {S: aws.String(StorageVersion)}, versAttr: {S: aws.String(newContents.vers)}, rootAttr: {B: newContents.root[:]}, lockAttr: {B: newContents.lock[:]}, }, } if len(newContents.specs) > 0 { tableInfo := make([]string, 2*len(newContents.specs)) formatSpecs(newContents.specs, tableInfo) putArgs.Item[tableSpecsAttr] = &dynamodb.AttributeValue{S: aws.String(strings.Join(tableInfo, ":"))} } expr := valueEqualsExpression if lastLock == (addr{}) { expr = valueNotExistsOrEqualsExpression } putArgs.ConditionExpression = aws.String(expr) putArgs.ExpressionAttributeValues = map[string]*dynamodb.AttributeValue{ ":prev": {B: lastLock[:]}, ":vers": {S: aws.String(newContents.vers)}, } _, ddberr := dm.ddbsvc.PutItem(&putArgs) if ddberr != nil { if errIsConditionalCheckFailed(ddberr) { exists, upstream := dm.ParseIfExists(stats, nil) d.Chk.True(exists) d.Chk.True(upstream.vers == constants.NomsVersion) return upstream } // TODO handle other aws errors? d.PanicIfError(ddberr) } return newContents } func errIsConditionalCheckFailed(err error) bool { awsErr, ok := err.(awserr.Error) return ok && awsErr.Code() == "ConditionalCheckFailedException" } ================================================ FILE: go/nbs/dynamo_manifest_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) const ( table = "testTable" db = "testDB" ) func makeDynamoManifestFake(t *testing.T) (mm manifest, ddb *fakeDDB) { ddb = makeFakeDDB(t) mm = newDynamoManifest(table, db, ddb) return } func TestDynamoManifestParseIfExists(t *testing.T) { assert := assert.New(t) mm, ddb := makeDynamoManifestFake(t) stats := &Stats{} exists, _ := mm.ParseIfExists(stats, nil) assert.False(exists) // Simulate another process writing a manifest (with an old Noms version). newLock := computeAddr([]byte("locker")) newRoot := hash.Of([]byte("new root")) tableName := hash.Of([]byte("table1")) ddb.putRecord(db, newLock[:], newRoot[:], "0", tableName.String()+":"+"0") // ParseIfExists should now reflect the manifest written above. exists, contents := mm.ParseIfExists(stats, nil) assert.True(exists) assert.Equal("0", contents.vers) assert.Equal(newLock, contents.lock) assert.Equal(newRoot, contents.root) if assert.Len(contents.specs, 1) { assert.Equal(tableName.String(), contents.specs[0].name.String()) assert.Equal(uint32(0), contents.specs[0].chunkCount) } } func makeContents(lock, root string, specs []tableSpec) manifestContents { return manifestContents{constants.NomsVersion, computeAddr([]byte(lock)), hash.Of([]byte(root)), specs} } func TestDynamoManifestUpdateWontClobberOldVersion(t *testing.T) { assert := assert.New(t) mm, ddb := makeDynamoManifestFake(t) stats := &Stats{} // Simulate another process having already put old Noms data in dir/. lock := computeAddr([]byte("locker")) badRoot := hash.Of([]byte("bad root")) ddb.putRecord(db, lock[:], badRoot[:], "0", "") assert.Panics(func() { mm.Update(lock, manifestContents{vers: constants.NomsVersion}, stats, nil) }) } func TestDynamoManifestUpdate(t *testing.T) { assert := assert.New(t) mm, ddb := makeDynamoManifestFake(t) stats := &Stats{} // First, test winning the race against another process. contents := makeContents("locker", "nuroot", []tableSpec{{computeAddr([]byte("a")), 3}}) upstream := mm.Update(addr{}, contents, stats, func() { // This should fail to get the lock, and therefore _not_ clobber the manifest. So the Update should succeed. lock := computeAddr([]byte("nolock")) newRoot2 := hash.Of([]byte("noroot")) ddb.putRecord(db, lock[:], newRoot2[:], constants.NomsVersion, "") }) assert.Equal(contents.lock, upstream.lock) assert.Equal(contents.root, upstream.root) assert.Equal(contents.specs, upstream.specs) // Now, test the case where the optimistic lock fails, and someone else updated the root since last we checked. rejected := makeContents("locker 2", "new root 2", nil) upstream = mm.Update(addr{}, rejected, stats, nil) assert.Equal(contents.lock, upstream.lock) assert.Equal(contents.root, upstream.root) assert.Equal(contents.specs, upstream.specs) upstream = mm.Update(upstream.lock, rejected, stats, nil) assert.Equal(rejected.lock, upstream.lock) assert.Equal(rejected.root, upstream.root) assert.Empty(upstream.specs) // Now, test the case where the optimistic lock fails because someone else updated only the tables since last we checked jerkLock := computeAddr([]byte("jerk")) tableName := computeAddr([]byte("table1")) ddb.putRecord(db, jerkLock[:], upstream.root[:], constants.NomsVersion, tableName.String()+":1") newContents3 := makeContents("locker 3", "new root 3", nil) upstream = mm.Update(upstream.lock, newContents3, stats, nil) assert.Equal(jerkLock, upstream.lock) assert.Equal(rejected.root, upstream.root) assert.Equal([]tableSpec{{tableName, 1}}, upstream.specs) } func TestDynamoManifestCaching(t *testing.T) { assert := assert.New(t) mm, ddb := makeDynamoManifestFake(t) stats := &Stats{} // ParseIfExists should hit persistent storage no matter what reads := ddb.numGets exists, _ := mm.ParseIfExists(stats, nil) assert.False(exists) assert.Equal(reads+1, ddb.numGets) lock, root := computeAddr([]byte("lock")), hash.Of([]byte("root")) ddb.putRecord(db, lock[:], root[:], constants.NomsVersion, "") reads = ddb.numGets exists, _ = mm.ParseIfExists(stats, nil) assert.True(exists) assert.Equal(reads+1, ddb.numGets) // When failing the optimistic lock, we should hit persistent storage. reads = ddb.numGets contents := makeContents("lock2", "nuroot", []tableSpec{{computeAddr([]byte("a")), 3}}) upstream := mm.Update(addr{}, contents, stats, nil) assert.NotEqual(contents.lock, upstream.lock) assert.Equal(reads+1, ddb.numGets) // Successful update should NOT hit persistent storage. reads = ddb.numGets upstream = mm.Update(upstream.lock, contents, stats, nil) assert.Equal(contents.lock, upstream.lock) assert.Equal(reads, ddb.numGets) } func TestDynamoManifestUpdateEmpty(t *testing.T) { assert := assert.New(t) mm, _ := makeDynamoManifestFake(t) stats := &Stats{} contents := manifestContents{vers: constants.NomsVersion, lock: computeAddr([]byte{0x01})} upstream := mm.Update(addr{}, contents, stats, nil) assert.Equal(contents.lock, upstream.lock) assert.True(upstream.root.IsEmpty()) assert.Empty(upstream.specs) } ================================================ FILE: go/nbs/dynamo_table_reader.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "io" "log" "time" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/sizecache" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/dynamodb" ) const ( dataAttr = "data" tablePrefix = "*" // I want to use NBS table names as keys when they are written to DynamoDB, but a bare table name is a legal Noms Database name as well. To avoid collisions, dynamoTableReader prepends this prefix (which is not a legal character in a Noms Database name). ) // dynamoTableReaderAt assumes the existence of a DynamoDB table whose primary partition key is in String format and named `db`. type dynamoTableReaderAt struct { ddb *ddbTableStore h addr } type tableNotInDynamoErr struct { nbs, dynamo string } func (t tableNotInDynamoErr) Error() string { return fmt.Sprintf("NBS table %s not present in DynamoDB table %s", t.nbs, t.dynamo) } func (dtra *dynamoTableReaderAt) ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) { data, err := dtra.ddb.ReadTable(dtra.h, stats) d.PanicIfError(err) n = copy(p, data[off:]) if n < len(p) { err = io.ErrUnexpectedEOF } return } type ddbTableStore struct { ddb ddbsvc table string readRl chan struct{} cache *sizecache.SizeCache // TODO: merge this with tableCache as part of BUG 3601 } func (dts *ddbTableStore) ReadTable(name addr, stats *Stats) (data []byte, err error) { t1 := time.Now() if dts.cache != nil { if i, present := dts.cache.Get(name); present { data = i.([]byte) defer func() { stats.MemBytesPerRead.Sample(uint64(len(data))) stats.MemReadLatency.SampleTimeSince(t1) }() return data, nil } } data, err = dts.readTable(name) if data != nil { defer func() { stats.DynamoBytesPerRead.Sample(uint64(len(data))) stats.DynamoReadLatency.SampleTimeSince(t1) }() } if dts.cache != nil && err == nil { dts.cache.Add(name, uint64(len(data)), data) } return data, err } func (dts *ddbTableStore) readTable(name addr) (data []byte, err error) { try := func(input *dynamodb.GetItemInput) (data []byte, err error) { if dts.readRl != nil { dts.readRl <- struct{}{} defer func() { <-dts.readRl }() } result, rerr := dts.ddb.GetItem(input) if rerr != nil { return nil, rerr } else if len(result.Item) == 0 { return nil, tableNotInDynamoErr{name.String(), dts.table} } else if result.Item[dataAttr] == nil || result.Item[dataAttr].B == nil { return nil, fmt.Errorf("NBS table %s in DynamoDB table %s is malformed", name, dts.table) } return result.Item[dataAttr].B, nil } input := dynamodb.GetItemInput{ TableName: aws.String(dts.table), Key: map[string]*dynamodb.AttributeValue{ dbAttr: {S: aws.String(fmtTableName(name))}, }, } data, err = try(&input) if _, isNotFound := err.(tableNotInDynamoErr); isNotFound { log.Printf("Eventually consistent read for %s failed; trying fully-consistent", name) input.ConsistentRead = aws.Bool(true) return try(&input) } return data, err } func fmtTableName(name addr) string { return tablePrefix + name.String() } func (dts *ddbTableStore) Write(name addr, data []byte) error { _, err := dts.ddb.PutItem(&dynamodb.PutItemInput{ TableName: aws.String(dts.table), Item: map[string]*dynamodb.AttributeValue{ dbAttr: {S: aws.String(fmtTableName(name))}, dataAttr: {B: data}, }, }) if dts.cache != nil && err == nil { dts.cache.Add(name, uint64(len(data)), data) } return err } ================================================ FILE: go/nbs/dynamo_table_reader_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "github.com/attic-labs/noms/go/util/sizecache" "github.com/aws/aws-sdk-go/service/dynamodb" "github.com/stretchr/testify/assert" ) func TestDynamoTableReaderAt(t *testing.T) { ddb := makeFakeDDB(t) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, h := buildTable(chunks) ddb.putData(fmtTableName(h), tableData) t.Run("ddbTableStore", func(t *testing.T) { t.Run("ReadTable", func(t *testing.T) { test := func(dts *ddbTableStore) { assert := assert.New(t) data, err := dts.ReadTable(h, &Stats{}) assert.NoError(err) assert.Equal(tableData, data) data, err = dts.ReadTable(computeAddr([]byte{}), &Stats{}) assert.Error(err) assert.IsType(tableNotInDynamoErr{}, err) assert.Nil(data) } t.Run("EventuallyConsistentSuccess", func(t *testing.T) { test(&ddbTableStore{ddb, "table", nil, nil}) }) t.Run("EventuallyConsistentFailure", func(t *testing.T) { test(&ddbTableStore{&eventuallyConsistentDDB{ddb}, "table", nil, nil}) }) t.Run("WithCache", func(t *testing.T) { tc := sizecache.New(uint64(2 * len(tableData))) dts := &ddbTableStore{ddb, "table", nil, tc} test(dts) // Table should have been cached on read baseline := ddb.numGets _, err := dts.ReadTable(h, &Stats{}) assert.NoError(t, err) assert.Zero(t, ddb.numGets-baseline) }) }) t.Run("WriteTable", func(t *testing.T) { t.Run("WithoutCache", func(t *testing.T) { assert := assert.New(t) dts := &ddbTableStore{makeFakeDDB(t), "table", nil, nil} assert.NoError(dts.Write(h, tableData)) data, err := dts.ReadTable(h, &Stats{}) assert.NoError(err) assert.Equal(tableData, data) }) t.Run("WithCache", func(t *testing.T) { assert := assert.New(t) tc := sizecache.New(uint64(2 * len(tableData))) dts := &ddbTableStore{makeFakeDDB(t), "table", nil, tc} assert.NoError(dts.Write(h, tableData)) // Table should have been cached on write baseline := ddb.numGets data, err := dts.ReadTable(h, &Stats{}) assert.NoError(err) assert.Equal(tableData, data) assert.Zero(ddb.numGets - baseline) }) }) }) t.Run("ReadAtWithCache", func(t *testing.T) { assert := assert.New(t) stats := &Stats{} tc := sizecache.New(uint64(2 * len(tableData))) tra := &dynamoTableReaderAt{&ddbTableStore{ddb, "table", nil, tc}, h} // First, read when table is not yet cached scratch := make([]byte, len(tableData)/4) baseline := ddb.numGets _, err := tra.ReadAtWithStats(scratch, 0, stats) assert.NoError(err) assert.True(ddb.numGets > baseline) // Table should have been cached on read so read again, a different slice this time baseline = ddb.numGets _, err = tra.ReadAtWithStats(scratch, int64(len(scratch)), stats) assert.NoError(err) assert.Zero(ddb.numGets - baseline) }) } type eventuallyConsistentDDB struct { ddbsvc } func (ec *eventuallyConsistentDDB) GetItem(input *dynamodb.GetItemInput) (*dynamodb.GetItemOutput, error) { if input.ConsistentRead != nil && *(input.ConsistentRead) { return ec.ddbsvc.GetItem(input) } return &dynamodb.GetItemOutput{}, nil } ================================================ FILE: go/nbs/factory.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "os" "path" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/sizecache" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/dynamodb" "github.com/aws/aws-sdk-go/service/s3" ) const ( defaultAWSReadLimit = 1024 awsMaxTables = 128 defaultSmallTableCacheSize = 1 << 28 // 256MB ) // AWSStoreFactory vends NomsBlockStores built on top of DynamoDB and S3. type AWSStoreFactory struct { ddb ddbsvc persister tablePersister table string conjoiner conjoiner manifestLocks *manifestLocks manifestCache *manifestCache } // NewAWSStoreFactory returns a ChunkStore factory that vends NomsBlockStore // instances that store manifests in the named DynamoDB table, and chunk data // in the named S3 bucket. All connections to AWS services share |sess|. func NewAWSStoreFactory(sess *session.Session, table, bucket string, maxOpenFiles int, indexCacheSize, tableCacheSize uint64, tableCacheDir string) chunks.Factory { var indexCache *indexCache if indexCacheSize > 0 { indexCache = newIndexCache(indexCacheSize) } var tc *fsTableCache if tableCacheSize > 0 { tc = newFSTableCache(tableCacheDir, tableCacheSize, maxOpenFiles) } ddb := dynamodb.New(sess) readRateLimiter := make(chan struct{}, defaultAWSReadLimit) return &AWSStoreFactory{ ddb: ddb, persister: &awsTablePersister{ s3.New(sess), bucket, readRateLimiter, tc, &ddbTableStore{ddb, table, readRateLimiter, sizecache.New(defaultSmallTableCacheSize)}, awsLimits{defaultS3PartSize, minS3PartSize, maxS3PartSize, maxDynamoItemSize, maxDynamoChunks}, indexCache, }, table: table, conjoiner: inlineConjoiner{awsMaxTables}, manifestLocks: newManifestLocks(), manifestCache: newManifestCache(defaultManifestCacheSize), } } func (asf *AWSStoreFactory) CreateStore(ns string) chunks.ChunkStore { mm := manifestManager{newDynamoManifest(asf.table, ns, asf.ddb), asf.manifestCache, asf.manifestLocks} return newNomsBlockStore(mm, asf.persister, asf.conjoiner, defaultMemTableSize) } func (asf *AWSStoreFactory) CreateStoreFromCache(ns string) chunks.ChunkStore { mm := manifestManager{newDynamoManifest(asf.table, ns, asf.ddb), asf.manifestCache, asf.manifestLocks} contents, _, present := asf.manifestCache.Get(mm.Name()) if present { return newNomsBlockStoreWithContents(mm, contents, asf.persister, asf.conjoiner, defaultMemTableSize) } return nil } func (asf *AWSStoreFactory) Shutter() { } type LocalStoreFactory struct { dir string fc *fdCache indexCache *indexCache conjoiner conjoiner manifestLocks *manifestLocks manifestCache *manifestCache } func checkDir(dir string) error { stat, err := os.Stat(dir) if err != nil { return err } if !stat.IsDir() { return fmt.Errorf("Path is not a directory: %s", dir) } return nil } func NewLocalStoreFactory(dir string, indexCacheSize uint64, maxOpenFiles int) chunks.Factory { err := checkDir(dir) d.PanicIfError(err) var indexCache *indexCache if indexCacheSize > 0 { indexCache = newIndexCache(indexCacheSize) } return &LocalStoreFactory{ dir: dir, fc: newFDCache(maxOpenFiles), indexCache: indexCache, conjoiner: inlineConjoiner{defaultMaxTables}, manifestLocks: newManifestLocks(), manifestCache: newManifestCache(defaultManifestCacheSize), } } func (lsf *LocalStoreFactory) CreateStore(ns string) chunks.ChunkStore { path := path.Join(lsf.dir, ns) d.PanicIfError(os.MkdirAll(path, 0777)) mm := manifestManager{fileManifest{path}, lsf.manifestCache, lsf.manifestLocks} p := newFSTablePersister(path, lsf.fc, lsf.indexCache) return newNomsBlockStore(mm, p, lsf.conjoiner, defaultMemTableSize) } func (lsf *LocalStoreFactory) CreateStoreFromCache(ns string) chunks.ChunkStore { path := path.Join(lsf.dir, ns) mm := manifestManager{fileManifest{path}, lsf.manifestCache, lsf.manifestLocks} contents, _, present := lsf.manifestCache.Get(mm.Name()) if present { _, err := os.Stat(path) d.PanicIfTrue(os.IsNotExist(err)) p := newFSTablePersister(path, lsf.fc, lsf.indexCache) return newNomsBlockStoreWithContents(mm, contents, p, lsf.conjoiner, defaultMemTableSize) } return nil } func (lsf *LocalStoreFactory) Shutter() { lsf.fc.Drop() } ================================================ FILE: go/nbs/factory_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "os" "path/filepath" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestLocalStoreFactory(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer os.RemoveAll(dir) f := NewLocalStoreFactory(dir, 0, 8) stats := &Stats{} dbName := "db" store := f.CreateStore(dbName) c := chunks.NewChunk([]byte{0xff}) store.Put(c) assert.True(store.Commit(c.Hash(), hash.Hash{})) dbDir := filepath.Join(dir, dbName) exists, contents := fileManifest{dbDir}.ParseIfExists(stats, nil) assert.True(exists) assert.Len(contents.specs, 1) _, err := os.Stat(filepath.Join(dbDir, contents.specs[0].name.String())) assert.NoError(err) // Simulate another process writing a manifest. lock := computeAddr([]byte("locker")) newRoot := hash.Of([]byte("new root")) err = clobberManifest(dbDir, strings.Join([]string{StorageVersion, constants.NomsVersion, lock.String(), newRoot.String(), contents.specs[0].name.String(), "1"}, ":")) assert.NoError(err) cached := f.CreateStoreFromCache(dbName) assert.Equal(c.Hash(), cached.Root()) } ================================================ FILE: go/nbs/fd_cache.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "os" "sort" "sync" "github.com/attic-labs/noms/go/d" ) func newFDCache(targetSize int) *fdCache { return &fdCache{targetSize: targetSize, cache: map[string]fdCacheEntry{}} } // fdCache ref-counts open file descriptors, but doesn't keep a hard cap on // the number of open files. Once the cache's target size is exceeded, opening // a new file causes the cache to try to get the cache back to the target size // by closing fds with zero refs. If there aren't enough such fds, fdCache // gives up and tries again next time a caller refs a file. type fdCache struct { targetSize int mu sync.Mutex cache map[string]fdCacheEntry } type fdCacheEntry struct { refCount uint32 f *os.File } // RefFile returns an opened *os.File for the file at |path|, or an error // indicating why the file could not be opened. If the cache already had an // entry for |path|, RefFile increments its refcount and returns the cached // pointer. If not, it opens the file and caches the pointer for others to // use. If RefFile returns an error, it's guaranteed that no refCounts were // changed, so it's an error to make a subsequent call to UnrefFile(). // This is intended for clients that hold fds for extremely short periods. func (fc *fdCache) RefFile(path string) (f *os.File, err error) { refFile := func() *os.File { if ce, present := fc.cache[path]; present { ce.refCount++ fc.cache[path] = ce return ce.f } return nil } f = func() *os.File { fc.mu.Lock() defer fc.mu.Unlock() return refFile() }() if f != nil { return f, nil } // Very much want this to be outside the lock, but the downside is that multiple callers may get here concurrently. That means we need to deal with the raciness below. f, err = os.Open(path) if err != nil { return nil, err } fc.mu.Lock() defer fc.mu.Unlock() if cached := refFile(); cached != nil { // Someone beat us to it, so close f and return cached fd f.Close() return cached, nil } // I won the race! fc.cache[path] = fdCacheEntry{f: f, refCount: 1} return f, nil } // UnrefFile reduces the refcount of the entry at |path|. If the cache is over // |fc.targetSize|, UnrefFile makes a best effort to shrink the cache by dumping // entries with a zero refcount. If there aren't enough zero refcount entries // to drop to get the cache back to |fc.targetSize|, the cache will remain // over |fc.targetSize| until the next call to UnrefFile(). func (fc *fdCache) UnrefFile(path string) { fc.mu.Lock() defer fc.mu.Unlock() if ce, present := fc.cache[path]; present { ce.refCount-- fc.cache[path] = ce } if len(fc.cache) > fc.targetSize { // Sadly, we can't remove items from a map while iterating, so we'll record the stuff we want to drop and then do it after needed := len(fc.cache) - fc.targetSize toDrop := make([]string, 0, needed) for p, ce := range fc.cache { if ce.refCount != 0 { continue } toDrop = append(toDrop, p) err := ce.f.Close() d.PanicIfError(err) needed-- if needed == 0 { break } } for _, p := range toDrop { delete(fc.cache, p) } } } // Drop dumps the entire cache and closes all currently open files. func (fc *fdCache) Drop() { fc.mu.Lock() defer fc.mu.Unlock() for _, ce := range fc.cache { ce.f.Close() } fc.cache = map[string]fdCacheEntry{} } // reportEntries is meant for testing. func (fc *fdCache) reportEntries() sort.StringSlice { fc.mu.Lock() defer fc.mu.Unlock() ret := make(sort.StringSlice, 0, len(fc.cache)) for p := range fc.cache { ret = append(ret, p) } sort.Sort(ret) return ret } ================================================ FILE: go/nbs/fd_cache_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "io/ioutil" "os" "path/filepath" "sort" "sync" "testing" "github.com/stretchr/testify/assert" ) func TestFDCache(t *testing.T) { dir := makeTempDir(t) defer os.RemoveAll(dir) paths := [3]string{} for i := range paths { name := fmt.Sprintf("file%d", i) paths[i] = filepath.Join(dir, name) err := ioutil.WriteFile(paths[i], []byte(name), 0644) assert.NoError(t, err) } refNoError := func(fc *fdCache, p string, assert *assert.Assertions) *os.File { f, err := fc.RefFile(p) assert.NoError(err) assert.NotNil(f) return f } t.Run("ConcurrentOpen", func(t *testing.T) { assert := assert.New(t) concurrency := 3 fc := newFDCache(3) defer fc.Drop() trigger := make(chan struct{}) wg := sync.WaitGroup{} for i := 0; i < concurrency; i++ { wg.Add(1) go func() { defer wg.Done() <-trigger fc.RefFile(paths[0]) }() } close(trigger) wg.Wait() present := fc.reportEntries() if assert.Len(present, 1) { ce := fc.cache[present[0]] assert.EqualValues(concurrency, ce.refCount) } }) t.Run("NoEvictions", func(t *testing.T) { assert := assert.New(t) fc := newFDCache(2) defer fc.Drop() f := refNoError(fc, paths[0], assert) f2 := refNoError(fc, paths[1], assert) assert.NotEqual(f, f2) dup := refNoError(fc, paths[0], assert) assert.Equal(f, dup) }) t.Run("Evictions", func(t *testing.T) { assert := assert.New(t) fc := newFDCache(1) defer fc.Drop() f0 := refNoError(fc, paths[0], assert) f1 := refNoError(fc, paths[1], assert) assert.NotEqual(f0, f1) // f0 wasn't evicted, because that doesn't happen until UnrefFile() dup := refNoError(fc, paths[0], assert) assert.Equal(f0, dup) expected := sort.StringSlice(paths[:2]) sort.Sort(expected) assert.EqualValues(expected, fc.reportEntries()) // Unreffing f1 now should evict it fc.UnrefFile(paths[1]) assert.EqualValues(paths[:1], fc.reportEntries()) // Bring f1 back so we can test multiple evictions in a row f1 = refNoError(fc, paths[1], assert) assert.NotEqual(f0, f1) // After adding f3, we should be able to evict both f0 and f1 f2 := refNoError(fc, paths[2], assert) assert.NotEqual(f0, f2) assert.NotEqual(f1, f2) fc.UnrefFile(paths[0]) fc.UnrefFile(paths[0]) fc.UnrefFile(paths[1]) assert.EqualValues(paths[2:], fc.reportEntries()) }) } ================================================ FILE: go/nbs/file_manifest.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io" "io/ioutil" "os" "path/filepath" "strings" "time" "golang.org/x/sys/unix" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) const ( manifestFileName = "manifest" lockFileName = "LOCK" ) // fileManifest provides access to a NomsBlockStore manifest stored on disk in |dir|. The format // is currently human readable: // // |-- String --|-- String --|-------- String --------|-------- String --------|-- String --|- String --|...|-- String --|- String --| // | nbs version:Noms version:Base32-encoded lock hash:Base32-encoded root hash:table 1 hash:table 1 cnt:...:table N hash:table N cnt| type fileManifest struct { dir string } func (fm fileManifest) Name() string { return fm.dir } // ParseIfExists looks for a LOCK and manifest file in fm.dir. If it finds // them, it takes the lock, parses the manifest and returns its contents, // setting |exists| to true. If not, it sets |exists| to false and returns. In // that case, the other return values are undefined. If |readHook| is non-nil, // it will be executed while ParseIfExists() holds the manifest file lock. // This is to allow for race condition testing. func (fm fileManifest) ParseIfExists(stats *Stats, readHook func()) (exists bool, contents manifestContents) { t1 := time.Now() defer func() { stats.ReadManifestLatency.SampleTimeSince(t1) }() // !exists(lockFileName) => unitialized store if l := openIfExists(filepath.Join(fm.dir, lockFileName)); l != nil { var f io.ReadCloser func() { d.PanicIfError(unix.Flock(int(l.Fd()), unix.LOCK_EX)) defer checkClose(l) // releases the flock() if readHook != nil { readHook() } f = openIfExists(filepath.Join(fm.dir, manifestFileName)) }() if f != nil { defer checkClose(f) exists = true contents = parseManifest(f) } } return } // Returns nil if path does not exist func openIfExists(path string) *os.File { f, err := os.Open(path) if os.IsNotExist(err) { return nil } d.PanicIfError(err) return f } func parseManifest(r io.Reader) manifestContents { manifest, err := ioutil.ReadAll(r) d.PanicIfError(err) slices := strings.Split(string(manifest), ":") if len(slices) < 4 || len(slices)%2 == 1 { d.Chk.Fail("Malformed manifest: " + string(manifest)) } d.PanicIfFalse(StorageVersion == string(slices[0])) return manifestContents{ vers: slices[1], lock: ParseAddr([]byte(slices[2])), root: hash.Parse(slices[3]), specs: parseSpecs(slices[4:]), } } func (fm fileManifest) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents { t1 := time.Now() defer func() { stats.WriteManifestLatency.SampleTimeSince(t1) }() // Write a temporary manifest file, to be renamed over manifestFileName upon success. // The closure here ensures this file is closed before moving on. tempManifestPath := func() string { temp, err := ioutil.TempFile(fm.dir, "nbs_manifest_") d.PanicIfError(err) defer checkClose(temp) writeManifest(temp, newContents) return temp.Name() }() defer os.Remove(tempManifestPath) // If we rename below, this will be a no-op // Take manifest file lock defer checkClose(flock(filepath.Join(fm.dir, lockFileName))) // closing releases the lock // writeHook is for testing, allowing other code to slip in and try to do stuff while we hold the lock. if writeHook != nil { writeHook() } // Read current manifest (if it exists). The closure ensures that the file is closed before moving on, so we can rename over it later if need be. manifestPath := filepath.Join(fm.dir, manifestFileName) upstream := func() manifestContents { if f := openIfExists(manifestPath); f != nil { defer checkClose(f) upstream := parseManifest(f) d.PanicIfFalse(constants.NomsVersion == upstream.vers) return upstream } d.Chk.True(lastLock == addr{}) return manifestContents{} }() if lastLock != upstream.lock { return upstream } rerr := os.Rename(tempManifestPath, manifestPath) d.PanicIfError(rerr) return newContents } func writeManifest(temp io.Writer, contents manifestContents) { strs := make([]string, 2*len(contents.specs)+4) strs[0], strs[1], strs[2], strs[3] = StorageVersion, contents.vers, contents.lock.String(), contents.root.String() tableInfo := strs[4:] formatSpecs(contents.specs, tableInfo) _, err := io.WriteString(temp, strings.Join(strs, ":")) d.PanicIfError(err) } func checkClose(c io.Closer) { d.PanicIfError(c.Close()) } func flock(lockFilePath string) io.Closer { l, err := os.Create(lockFilePath) d.PanicIfError(err) d.PanicIfError(unix.Flock(int(l.Fd()), unix.LOCK_EX)) return l } ================================================ FILE: go/nbs/file_manifest_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io/ioutil" "os" "os/exec" "path/filepath" "runtime" "strings" "testing" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func makeFileManifestTempDir(t *testing.T) fileManifest { dir, err := ioutil.TempDir("", "") assert.NoError(t, err) return fileManifest{dir: dir} //, cache: newManifestCache(defaultManifestCacheSize)} } func TestFileManifestLoadIfExists(t *testing.T) { assert := assert.New(t) fm := makeFileManifestTempDir(t) defer os.RemoveAll(fm.dir) stats := &Stats{} exists, upstream := fm.ParseIfExists(stats, nil) assert.False(exists) // Simulate another process writing a manifest (with an old Noms version). jerk := computeAddr([]byte("jerk")) newRoot := hash.Of([]byte("new root")) tableName := hash.Of([]byte("table1")) err := clobberManifest(fm.dir, strings.Join([]string{StorageVersion, "0", jerk.String(), newRoot.String(), tableName.String(), "0"}, ":")) assert.NoError(err) // ParseIfExists should now reflect the manifest written above. exists, upstream = fm.ParseIfExists(stats, nil) assert.True(exists) assert.Equal("0", upstream.vers) assert.Equal(jerk, upstream.lock) assert.Equal(newRoot, upstream.root) if assert.Len(upstream.specs, 1) { assert.Equal(tableName.String(), upstream.specs[0].name.String()) assert.Equal(uint32(0), upstream.specs[0].chunkCount) } } func TestFileManifestLoadIfExistsHoldsLock(t *testing.T) { assert := assert.New(t) fm := makeFileManifestTempDir(t) defer os.RemoveAll(fm.dir) stats := &Stats{} // Simulate another process writing a manifest. lock := computeAddr([]byte("locker")) newRoot := hash.Of([]byte("new root")) tableName := hash.Of([]byte("table1")) err := clobberManifest(fm.dir, strings.Join([]string{StorageVersion, constants.NomsVersion, lock.String(), newRoot.String(), tableName.String(), "0"}, ":")) assert.NoError(err) // ParseIfExists should now reflect the manifest written above. exists, upstream := fm.ParseIfExists(stats, func() { // This should fail to get the lock, and therefore _not_ clobber the manifest. lock := computeAddr([]byte("newlock")) badRoot := hash.Of([]byte("bad root")) b, err := tryClobberManifest(fm.dir, strings.Join([]string{StorageVersion, "0", lock.String(), badRoot.String(), tableName.String(), "0"}, ":")) assert.NoError(err, string(b)) }) assert.True(exists) assert.Equal(constants.NomsVersion, upstream.vers) assert.Equal(newRoot, upstream.root) if assert.Len(upstream.specs, 1) { assert.Equal(tableName.String(), upstream.specs[0].name.String()) assert.Equal(uint32(0), upstream.specs[0].chunkCount) } } func TestFileManifestUpdateWontClobberOldVersion(t *testing.T) { assert := assert.New(t) fm := makeFileManifestTempDir(t) defer os.RemoveAll(fm.dir) stats := &Stats{} // Simulate another process having already put old Noms data in dir/. err := clobberManifest(fm.dir, strings.Join([]string{StorageVersion, "0", addr{}.String(), hash.Hash{}.String()}, ":")) assert.NoError(err) assert.Panics(func() { fm.Update(addr{}, manifestContents{}, stats, nil) }) } func TestFileManifestUpdateEmpty(t *testing.T) { assert := assert.New(t) fm := makeFileManifestTempDir(t) defer os.RemoveAll(fm.dir) stats := &Stats{} l := computeAddr([]byte{0x01}) upstream := fm.Update(addr{}, manifestContents{vers: constants.NomsVersion, lock: l}, stats, nil) assert.Equal(l, upstream.lock) assert.True(upstream.root.IsEmpty()) assert.Empty(upstream.specs) fm2 := fileManifest{fm.dir} // Open existent, but empty manifest exists, upstream := fm2.ParseIfExists(stats, nil) assert.True(exists) assert.Equal(l, upstream.lock) assert.True(upstream.root.IsEmpty()) assert.Empty(upstream.specs) l2 := computeAddr([]byte{0x02}) upstream = fm2.Update(l, manifestContents{vers: constants.NomsVersion, lock: l2}, stats, nil) assert.Equal(l2, upstream.lock) assert.True(upstream.root.IsEmpty()) assert.Empty(upstream.specs) } func TestFileManifestUpdate(t *testing.T) { assert := assert.New(t) fm := makeFileManifestTempDir(t) defer os.RemoveAll(fm.dir) stats := &Stats{} // First, test winning the race against another process. contents := manifestContents{ vers: constants.NomsVersion, lock: computeAddr([]byte("locker")), root: hash.Of([]byte("new root")), specs: []tableSpec{{computeAddr([]byte("a")), 3}}, } upstream := fm.Update(addr{}, contents, stats, func() { // This should fail to get the lock, and therefore _not_ clobber the manifest. So the Update should succeed. lock := computeAddr([]byte("nolock")) newRoot2 := hash.Of([]byte("noroot")) b, err := tryClobberManifest(fm.dir, strings.Join([]string{StorageVersion, constants.NomsVersion, lock.String(), newRoot2.String()}, ":")) assert.NoError(err, string(b)) }) assert.Equal(contents.lock, upstream.lock) assert.Equal(contents.root, upstream.root) assert.Equal(contents.specs, upstream.specs) // Now, test the case where the optimistic lock fails, and someone else updated the root since last we checked. contents2 := manifestContents{lock: computeAddr([]byte("locker 2")), root: hash.Of([]byte("new root 2"))} upstream = fm.Update(addr{}, contents2, stats, nil) assert.Equal(contents.lock, upstream.lock) assert.Equal(contents.root, upstream.root) assert.Equal(contents.specs, upstream.specs) upstream = fm.Update(upstream.lock, contents2, stats, nil) assert.Equal(contents2.lock, upstream.lock) assert.Equal(contents2.root, upstream.root) assert.Empty(upstream.specs) // Now, test the case where the optimistic lock fails because someone else updated only the tables since last we checked jerkLock := computeAddr([]byte("jerk")) tableName := computeAddr([]byte("table1")) err := clobberManifest(fm.dir, strings.Join([]string{StorageVersion, constants.NomsVersion, jerkLock.String(), contents2.root.String(), tableName.String(), "1"}, ":")) assert.NoError(err) contents3 := manifestContents{lock: computeAddr([]byte("locker 3")), root: hash.Of([]byte("new root 3"))} upstream = fm.Update(upstream.lock, contents3, stats, nil) assert.Equal(jerkLock, upstream.lock) assert.Equal(contents2.root, upstream.root) assert.Equal([]tableSpec{{tableName, 1}}, upstream.specs) } // tryClobberManifest simulates another process trying to access dir/manifestFileName concurrently. To avoid deadlock, it does a non-blocking lock of dir/lockFileName. If it can get the lock, it clobbers the manifest. func tryClobberManifest(dir, contents string) ([]byte, error) { return runClobber(dir, contents) } // clobberManifest simulates another process writing dir/manifestFileName concurrently. It ignores the lock file, so it's up to the caller to ensure correctness. func clobberManifest(dir, contents string) error { if err := ioutil.WriteFile(filepath.Join(dir, lockFileName), nil, 0666); err != nil { return err } return ioutil.WriteFile(filepath.Join(dir, manifestFileName), []byte(contents), 0666) } func runClobber(dir, contents string) ([]byte, error) { _, filename, _, _ := runtime.Caller(1) clobber := filepath.Join(filepath.Dir(filename), "test/manifest_clobber.go") mkPath := func(f string) string { return filepath.Join(dir, f) } c := exec.Command("go", "run", clobber, mkPath(lockFileName), mkPath(manifestFileName), contents) return c.CombinedOutput() } ================================================ FILE: go/nbs/file_table_persister.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "io" "io/ioutil" "os" "path/filepath" "github.com/attic-labs/noms/go/d" ) const tempTablePrefix = "nbs_table_" func newFSTablePersister(dir string, fc *fdCache, indexCache *indexCache) tablePersister { d.PanicIfTrue(fc == nil) return &fsTablePersister{dir, fc, indexCache} } type fsTablePersister struct { dir string fc *fdCache indexCache *indexCache } func (ftp *fsTablePersister) Open(name addr, chunkCount uint32, stats *Stats) chunkSource { return newMmapTableReader(ftp.dir, name, chunkCount, ftp.indexCache, ftp.fc) } func (ftp *fsTablePersister) Persist(mt *memTable, haver chunkReader, stats *Stats) chunkSource { name, data, chunkCount := mt.write(haver, stats) return ftp.persistTable(name, data, chunkCount, stats) } func (ftp *fsTablePersister) persistTable(name addr, data []byte, chunkCount uint32, stats *Stats) chunkSource { if chunkCount == 0 { return emptyChunkSource{} } tempName := func() string { temp, err := ioutil.TempFile(ftp.dir, tempTablePrefix) d.PanicIfError(err) defer checkClose(temp) io.Copy(temp, bytes.NewReader(data)) index := parseTableIndex(data) if ftp.indexCache != nil { ftp.indexCache.lockEntry(name) defer ftp.indexCache.unlockEntry(name) ftp.indexCache.put(name, index) } return temp.Name() }() err := os.Rename(tempName, filepath.Join(ftp.dir, name.String())) d.PanicIfError(err) return ftp.Open(name, chunkCount, stats) } func (ftp *fsTablePersister) ConjoinAll(sources chunkSources, stats *Stats) chunkSource { plan := planConjoin(sources, stats) if plan.chunkCount == 0 { return emptyChunkSource{} } name := nameFromSuffixes(plan.suffixes()) tempName := func() string { temp, err := ioutil.TempFile(ftp.dir, tempTablePrefix) d.PanicIfError(err) defer checkClose(temp) for _, sws := range plan.sources { r := sws.source.reader() n, err := io.CopyN(temp, r, int64(sws.dataLen)) d.PanicIfError(err) d.PanicIfFalse(uint64(n) == sws.dataLen) } _, err = temp.Write(plan.mergedIndex) d.PanicIfError(err) index := parseTableIndex(plan.mergedIndex) if ftp.indexCache != nil { ftp.indexCache.put(name, index) } return temp.Name() }() err := os.Rename(tempName, filepath.Join(ftp.dir, name.String())) d.PanicIfError(err) return ftp.Open(name, plan.chunkCount, stats) } ================================================ FILE: go/nbs/file_table_persister_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "crypto/rand" "fmt" "io/ioutil" "os" "path/filepath" "sort" "testing" "github.com/stretchr/testify/assert" ) func TestFSTableCacheOnOpen(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer os.RemoveAll(dir) names := []addr{} cacheSize := 2 fc := newFDCache(cacheSize) defer fc.Drop() fts := newFSTablePersister(dir, fc, nil) // Create some tables manually, load them into the cache, and then blow them away func() { for i := 0; i < cacheSize; i++ { name, err := writeTableData(dir, []byte{byte(i)}) assert.NoError(err) names = append(names, name) } for _, name := range names { fts.Open(name, 1, nil) } removeTables(dir, names...) }() // Tables should still be cached, even though they're gone from disk for i, name := range names { src := fts.Open(name, 1, nil) h := computeAddr([]byte{byte(i)}) assert.True(src.has(h)) } // Kick a table out of the cache name, err := writeTableData(dir, []byte{0xff}) assert.NoError(err) fts.Open(name, 1, nil) present := fc.reportEntries() // Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size assert.Len(present, cacheSize) } func makeTempDir(t *testing.T) string { dir, err := ioutil.TempDir("", "") assert.NoError(t, err) return dir } func writeTableData(dir string, chunx ...[]byte) (name addr, err error) { var tableData []byte tableData, name = buildTable(chunx) err = ioutil.WriteFile(filepath.Join(dir, name.String()), tableData, 0666) return } func removeTables(dir string, names ...addr) error { for _, name := range names { if err := os.Remove(filepath.Join(dir, name.String())); err != nil { return err } } return nil } func contains(s sort.StringSlice, e string) bool { for _, c := range s { if c == e { return true } } return false } func TestFSTablePersisterPersist(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer os.RemoveAll(dir) fc := newFDCache(defaultMaxTables) defer fc.Drop() fts := newFSTablePersister(dir, fc, nil) src, err := persistTableData(fts, testChunks...) assert.NoError(err) if assert.True(src.count() > 0) { buff, err := ioutil.ReadFile(filepath.Join(dir, src.hash().String())) assert.NoError(err) tr := newTableReader(parseTableIndex(buff), tableReaderAtFromBytes(buff), fileBlockSize) assertChunksInReader(testChunks, tr, assert) } } func persistTableData(p tablePersister, chunx ...[]byte) (src chunkSource, err error) { mt := newMemTable(testMemTableSize) for _, c := range chunx { if !mt.addChunk(computeAddr(c), c) { return nil, fmt.Errorf("memTable too full to add %s", computeAddr(c)) } } return p.Persist(mt, nil, &Stats{}), nil } func TestFSTablePersisterPersistNoData(t *testing.T) { assert := assert.New(t) mt := newMemTable(testMemTableSize) existingTable := newMemTable(testMemTableSize) for _, c := range testChunks { assert.True(mt.addChunk(computeAddr(c), c)) assert.True(existingTable.addChunk(computeAddr(c), c)) } dir := makeTempDir(t) defer os.RemoveAll(dir) fc := newFDCache(defaultMaxTables) defer fc.Drop() fts := newFSTablePersister(dir, fc, nil) src := fts.Persist(mt, existingTable, &Stats{}) assert.True(src.count() == 0) _, err := os.Stat(filepath.Join(dir, src.hash().String())) assert.True(os.IsNotExist(err), "%v", err) } func TestFSTablePersisterCacheOnPersist(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) fc := newFDCache(1) defer fc.Drop() fts := newFSTablePersister(dir, fc, nil) defer os.RemoveAll(dir) var name addr func() { src, err := persistTableData(fts, testChunks...) assert.NoError(err) name = src.hash() removeTables(dir, name) }() // Table should still be cached, even though it's gone from disk src := fts.Open(name, uint32(len(testChunks)), nil) assertChunksInReader(testChunks, src, assert) // Evict |name| from cache src, err := persistTableData(fts, []byte{0xff}) assert.NoError(err) present := fc.reportEntries() // Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size assert.Len(present, 1) } func TestFSTablePersisterConjoinAll(t *testing.T) { assert := assert.New(t) assert.True(len(testChunks) > 1, "Whoops, this test isn't meaningful") sources := make(chunkSources, len(testChunks)) dir := makeTempDir(t) defer os.RemoveAll(dir) fc := newFDCache(len(sources)) defer fc.Drop() fts := newFSTablePersister(dir, fc, nil) for i, c := range testChunks { randChunk := make([]byte, (i+1)*13) _, err := rand.Read(randChunk) assert.NoError(err) name, err := writeTableData(dir, c, randChunk) assert.NoError(err) sources[i] = fts.Open(name, 2, nil) } src := fts.ConjoinAll(sources, &Stats{}) if assert.True(src.count() > 0) { buff, err := ioutil.ReadFile(filepath.Join(dir, src.hash().String())) assert.NoError(err) tr := newTableReader(parseTableIndex(buff), tableReaderAtFromBytes(buff), fileBlockSize) assertChunksInReader(testChunks, tr, assert) } present := fc.reportEntries() // Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size assert.Len(present, len(sources)) } func TestFSTablePersisterConjoinAllDups(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer os.RemoveAll(dir) fc := newFDCache(defaultMaxTables) defer fc.Drop() fts := newFSTablePersister(dir, fc, nil) reps := 3 sources := make(chunkSources, reps) for i := 0; i < reps; i++ { mt := newMemTable(1 << 10) for _, c := range testChunks { mt.addChunk(computeAddr(c), c) } sources[i] = fts.Persist(mt, nil, &Stats{}) } src := fts.ConjoinAll(sources, &Stats{}) if assert.True(src.count() > 0) { buff, err := ioutil.ReadFile(filepath.Join(dir, src.hash().String())) assert.NoError(err) tr := newTableReader(parseTableIndex(buff), tableReaderAtFromBytes(buff), fileBlockSize) assertChunksInReader(testChunks, tr, assert) assert.EqualValues(reps*len(testChunks), tr.count()) } } ================================================ FILE: go/nbs/fs_table_cache.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "errors" "io" "io/ioutil" "os" "path/filepath" "strings" "sync" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/sizecache" ) type tableCache interface { checkout(h addr) io.ReaderAt checkin(h addr) store(h addr, data io.Reader, size uint64) } type fsTableCache struct { dir string cache *sizecache.SizeCache fd *fdCache } func newFSTableCache(dir string, cacheSize uint64, maxOpenFds int) *fsTableCache { ftc := &fsTableCache{dir: dir, fd: newFDCache(maxOpenFds)} ftc.cache = sizecache.NewWithExpireCallback(cacheSize, func(elm interface{}) { ftc.expire(elm.(addr)) }) ftc.init(maxOpenFds) return ftc } func (ftc *fsTableCache) init(concurrency int) { type finfo struct { path string h addr size uint64 } infos := make(chan finfo) errc := make(chan error, 1) go func() { isTableFile := func(info os.FileInfo) bool { return info.Mode().IsRegular() && ValidateAddr(info.Name()) } isTempTableFile := func(info os.FileInfo) bool { return info.Mode().IsRegular() && strings.HasPrefix(info.Name(), tempTablePrefix) } defer close(errc) defer close(infos) // No select needed for this send, since errc is buffered. errc <- filepath.Walk(ftc.dir, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if path == ftc.dir { return nil } if isTempTableFile(info) { os.Remove(path) return nil } if !isTableFile(info) { return errors.New(path + " is not a table file; cache dir must contain only table files") } infos <- finfo{path, ParseAddr([]byte(info.Name())), uint64(info.Size())} return nil }) }() wg := sync.WaitGroup{} wg.Add(concurrency) for i := 0; i < concurrency; i++ { go func() { defer wg.Done() for info := range infos { ftc.cache.Add(info.h, info.size, true) ftc.fd.RefFile(info.path) ftc.fd.UnrefFile(info.path) } }() } wg.Wait() d.PanicIfError(<-errc) } func (ftc *fsTableCache) checkout(h addr) io.ReaderAt { if _, ok := ftc.cache.Get(h); !ok { return nil } if fd, err := ftc.fd.RefFile(filepath.Join(ftc.dir, h.String())); err == nil { return fd } return nil } func (ftc *fsTableCache) checkin(h addr) { ftc.fd.UnrefFile(filepath.Join(ftc.dir, h.String())) } func (ftc *fsTableCache) store(h addr, data io.Reader, size uint64) { path := filepath.Join(ftc.dir, h.String()) tempName := func() string { temp, err := ioutil.TempFile(ftc.dir, tempTablePrefix) d.PanicIfError(err) defer checkClose(temp) io.Copy(temp, data) return temp.Name() }() err := os.Rename(tempName, path) d.PanicIfError(err) ftc.cache.Add(h, size, true) ftc.fd.RefFile(path) // Prime the file in the fd cache ftc.fd.UnrefFile(path) } func (ftc *fsTableCache) expire(h addr) { err := os.Remove(filepath.Join(ftc.dir, h.String())) d.PanicIfError(err) } ================================================ FILE: go/nbs/fs_table_cache_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "io" "io/ioutil" "os" "path/filepath" "sort" "testing" "github.com/stretchr/testify/assert" ) func TestFSTableCache(t *testing.T) { datas := [][]byte{[]byte("hello"), []byte("world"), []byte("goodbye")} sort.SliceStable(datas, func(i, j int) bool { return len(datas[i]) < len(datas[j]) }) t.Run("ExpireLRU", func(t *testing.T) { t.Parallel() dir := makeTempDir(t) defer os.RemoveAll(dir) sum := 0 for _, s := range datas[1:] { sum += len(s) } tc := newFSTableCache(dir, uint64(sum), len(datas)) for _, d := range datas { tc.store(computeAddr(d), bytes.NewReader(d), uint64(len(d))) } expiredName := computeAddr(datas[0]) assert.Nil(t, tc.checkout(expiredName)) _, fserr := os.Stat(filepath.Join(dir, expiredName.String())) assert.True(t, os.IsNotExist(fserr)) for _, d := range datas[1:] { name := computeAddr(d) r := tc.checkout(name) assert.NotNil(t, r) assertDataInReaderAt(t, d, r) _, fserr := os.Stat(filepath.Join(dir, name.String())) assert.False(t, os.IsNotExist(fserr)) } }) t.Run("Init", func(t *testing.T) { t.Run("Success", func(t *testing.T) { t.Parallel() dir := makeTempDir(t) defer os.RemoveAll(dir) assert := assert.New(t) names := []addr{} for i := byte(0); i < 4; i++ { name := computeAddr([]byte{i}) assert.NoError(ioutil.WriteFile(filepath.Join(dir, name.String()), nil, 0666)) names = append(names, name) } var ftc *fsTableCache assert.NotPanics(func() { ftc = newFSTableCache(dir, 1024, 4) }) assert.NotNil(ftc) for _, name := range names { assert.NotNil(ftc.checkout(name)) } }) t.Run("BadFile", func(t *testing.T) { t.Parallel() dir := makeTempDir(t) defer os.RemoveAll(dir) assert.NoError(t, ioutil.WriteFile(filepath.Join(dir, "boo"), nil, 0666)) assert.Panics(t, func() { newFSTableCache(dir, 1024, 4) }) }) t.Run("ClearTempFile", func(t *testing.T) { t.Parallel() dir := makeTempDir(t) defer os.RemoveAll(dir) tempFile := filepath.Join(dir, tempTablePrefix+"boo") assert.NoError(t, ioutil.WriteFile(tempFile, nil, 0666)) assert.NotPanics(t, func() { newFSTableCache(dir, 1024, 4) }) _, fserr := os.Stat(tempFile) assert.True(t, os.IsNotExist(fserr)) }) t.Run("Dir", func(t *testing.T) { t.Parallel() dir := makeTempDir(t) defer os.RemoveAll(dir) assert.NoError(t, os.Mkdir(filepath.Join(dir, "sub"), 0777)) assert.Panics(t, func() { newFSTableCache(dir, 1024, 4) }) }) }) } func assertDataInReaderAt(t *testing.T, data []byte, r io.ReaderAt) { p := make([]byte, len(data)) n, err := r.ReadAt(p, 0) assert.NoError(t, err) assert.Equal(t, len(data), n) assert.Equal(t, data, p) } ================================================ FILE: go/nbs/manifest.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "crypto/sha512" "strconv" "sync" "time" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) type manifest interface { // Name returns a stable, unique identifier for the store this manifest describes. Name() string // ParseIfExists extracts and returns values from a NomsBlockStore // manifest, if one exists. Concrete implementations are responsible for // defining how to find and parse the desired manifest, e.g. a // particularly-named file in a given directory. Implementations are also // responsible for managing whatever concurrency guarantees they require // for correctness. If the manifest exists, |exists| is set to true and // manifest data is returned, including the version of the Noms data in // the store, the root root hash.Hash of the store, and a tableSpec // describing every table that comprises the store. // If the manifest doesn't exist, |exists| is set to false and the other // return values are undefined. The |readHook| parameter allows race // condition testing. If it is non-nil, it will be invoked while the // implementation is guaranteeing exclusive access to the manifest. ParseIfExists(stats *Stats, readHook func()) (exists bool, contents manifestContents) manifestUpdater } type manifestUpdater interface { // Update optimistically tries to write a new manifest containing // |newContents|. If |lastLock| matches the lock hash in the currently // persisted manifest (logically, the lock that would be returned by // ParseIfExists), then Update succeeds and subsequent calls to both // Update and ParseIfExists will reflect a manifest containing // |newContents|. If not, Update fails. Regardless, the returned // manifestContents will reflect the current state of the world. Callers // should check that the returned root == the proposed root and, if not, // merge any desired new table information with the contents of the // returned []tableSpec before trying again. // Concrete implementations are responsible for ensuring that concurrent // Update calls (and ParseIfExists calls) are correct. // If writeHook is non-nil, it will be invoked while the implementation is // guaranteeing exclusive access to the manifest. This allows for testing // of race conditions. Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents } type manifestContents struct { vers string lock addr root hash.Hash specs []tableSpec } func (mc manifestContents) size() (size uint64) { size += uint64(len(mc.vers)) + addrSize + hash.ByteLen for _, sp := range mc.specs { size += uint64(len(sp.name)) + uint32Size // for sp.chunkCount } return } func newManifestLocks() *manifestLocks { return &manifestLocks{map[string]struct{}{}, map[string]struct{}{}, sync.NewCond(&sync.Mutex{})} } type manifestLocks struct { updating map[string]struct{} fetching map[string]struct{} cond *sync.Cond } func (ml *manifestLocks) lockForFetch(db string) { lockByName(db, ml.cond, ml.fetching) } func (ml *manifestLocks) unlockForFetch(db string) { unlockByName(db, ml.cond, ml.fetching) } func (ml *manifestLocks) lockForUpdate(db string) { lockByName(db, ml.cond, ml.updating) } func (ml *manifestLocks) unlockForUpdate(db string) { unlockByName(db, ml.cond, ml.updating) } func lockByName(db string, c *sync.Cond, locks map[string]struct{}) { c.L.Lock() defer c.L.Unlock() for { if _, inProgress := locks[db]; !inProgress { locks[db] = struct{}{} break } c.Wait() } } func unlockByName(db string, c *sync.Cond, locks map[string]struct{}) { c.L.Lock() defer c.L.Unlock() _, ok := locks[db] d.PanicIfFalse(ok) delete(locks, db) c.Broadcast() } type manifestManager struct { m manifest cache *manifestCache locks *manifestLocks } func (mm manifestManager) lockOutFetch() { mm.locks.lockForFetch(mm.Name()) } func (mm manifestManager) allowFetch() { mm.locks.unlockForFetch(mm.Name()) } func (mm manifestManager) LockForUpdate() { mm.locks.lockForUpdate(mm.Name()) } func (mm manifestManager) UnlockForUpdate() { mm.locks.unlockForUpdate(mm.Name()) } func (mm manifestManager) updateWillFail(lastLock addr) (cached manifestContents, doomed bool) { if upstream, _, hit := mm.cache.Get(mm.Name()); hit { if lastLock != upstream.lock { doomed, cached = true, upstream } } return } func (mm manifestManager) Fetch(stats *Stats) (exists bool, contents manifestContents) { entryTime := time.Now() mm.lockOutFetch() defer mm.allowFetch() cached, t, hit := mm.cache.Get(mm.Name()) if hit && t.After(entryTime) { // Cache contains a manifest which is newer than entry time. return true, cached } t = time.Now() exists, contents = mm.m.ParseIfExists(stats, nil) mm.cache.Put(mm.Name(), contents, t) return } // Callers MUST protect uses of Update with Lock/UnlockForUpdate. // Update does not call Lock/UnlockForUpdate() on its own because it is // intended to be used in a larger critical section along with updateWillFail. func (mm manifestManager) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents { if upstream, _, hit := mm.cache.Get(mm.Name()); hit { if lastLock != upstream.lock { return upstream } } t := time.Now() mm.lockOutFetch() defer mm.allowFetch() contents := mm.m.Update(lastLock, newContents, stats, writeHook) mm.cache.Put(mm.Name(), contents, t) return contents } func (mm manifestManager) Name() string { return mm.m.Name() } type tableSpec struct { name addr chunkCount uint32 } func parseSpecs(tableInfo []string) []tableSpec { specs := make([]tableSpec, len(tableInfo)/2) for i := range specs { specs[i].name = ParseAddr([]byte(tableInfo[2*i])) c, err := strconv.ParseUint(tableInfo[2*i+1], 10, 32) d.PanicIfError(err) specs[i].chunkCount = uint32(c) } return specs } func formatSpecs(specs []tableSpec, tableInfo []string) { d.Chk.True(len(tableInfo) == 2*len(specs)) for i, t := range specs { tableInfo[2*i] = t.name.String() tableInfo[2*i+1] = strconv.FormatUint(uint64(t.chunkCount), 10) } } // generateLockHash returns a hash of root and the names of all the tables in // specs, which should be included in all persisted manifests. When a client // attempts to update a manifest, it must check the lock hash in the currently // persisted manifest against the lock hash it saw last time it loaded the // contents of a manifest. If they do not match, the client must not update // the persisted manifest. func generateLockHash(root hash.Hash, specs []tableSpec) (lock addr) { blockHash := sha512.New() blockHash.Write(root[:]) for _, spec := range specs { blockHash.Write(spec.name[:]) } var h []byte h = blockHash.Sum(h) // Appends hash to h copy(lock[:], h) return } ================================================ FILE: go/nbs/manifest_cache.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "container/list" "time" "sync" "github.com/attic-labs/noms/go/d" ) func newManifestCache(maxSize uint64) *manifestCache { return &manifestCache{ maxSize: maxSize, cache: map[string]manifestCacheEntry{}, mu: &sync.Mutex{}, } } type manifestCacheEntry struct { lruEntry *list.Element contents manifestContents t time.Time } type manifestCache struct { totalSize uint64 maxSize uint64 mu *sync.Mutex lru list.List cache map[string]manifestCacheEntry } // Get() checks the searches the cache for an entry. If it exists, it moves it's // lru entry to the back of the queue and returns (value, true). Otherwise, it // returns (nil, false). func (mc *manifestCache) Get(db string) (contents manifestContents, t time.Time, present bool) { mc.mu.Lock() defer mc.mu.Unlock() if entry, ok := mc.entry(db); ok { contents, t, present = entry.contents, entry.t, true } return } // entry() checks if the value is in the cache. If not in the cache, it returns an // empty manifestCacheEntry and false. It it is in the cache, it moves it to // to the back of lru and returns the entry and true. func (mc *manifestCache) entry(key string) (manifestCacheEntry, bool) { entry, ok := mc.cache[key] if !ok { return manifestCacheEntry{}, false } mc.lru.MoveToBack(entry.lruEntry) return entry, true } // Put inserts |contents| into the cache with the key |db|, replacing any // currently cached value. Put() will add this element to the cache at the // back of the queue as long it's size does not exceed maxSize. If the // addition of this entry causes the size of the cache to exceed maxSize, the // necessary entries at the front of the queue will be deleted in order to // keep the total cache size below maxSize. |t| must be *prior* to initiating // the call which read/wrote |contents|. func (mc *manifestCache) Put(db string, contents manifestContents, t time.Time) { mc.mu.Lock() defer mc.mu.Unlock() if entry, ok := mc.entry(db); ok { mc.totalSize -= entry.contents.size() mc.lru.Remove(entry.lruEntry) delete(mc.cache, db) } if contents.size() <= mc.maxSize { newEl := mc.lru.PushBack(db) ce := manifestCacheEntry{lruEntry: newEl, contents: contents, t: t} mc.cache[db] = ce mc.totalSize += ce.contents.size() for el := mc.lru.Front(); el != nil && mc.totalSize > mc.maxSize; { key1 := el.Value.(string) ce, ok := mc.cache[key1] if !ok { d.Panic("manifestCache is missing expected value") } next := el.Next() delete(mc.cache, key1) mc.totalSize -= ce.contents.size() mc.lru.Remove(el) el = next } } } ================================================ FILE: go/nbs/manifest_cache_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "time" "github.com/stretchr/testify/assert" ) func TestSizeCache(t *testing.T) { defSize := manifestContents{}.size() t.Run("GetAndPut", func(t *testing.T) { assert := assert.New(t) c := newManifestCache(2 * defSize) t1 := time.Now() dbA, contentsA := "dbA", manifestContents{lock: computeAddr([]byte("lockA"))} dbB, contentsB := "dbB", manifestContents{lock: computeAddr([]byte("lockB"))} c.Put(dbA, contentsA, t1) c.Put(dbB, contentsB, t1) cont, _, present := c.Get(dbA) assert.True(present) assert.Equal(contentsA, cont) cont, _, present = c.Get(dbB) assert.True(present) assert.Equal(contentsB, cont) }) t.Run("PutDropsLRU", func(t *testing.T) { assert := assert.New(t) capacity := uint64(5) c := newManifestCache(capacity * defSize) keys := []string{"db1", "db2", "db3", "db4", "db5", "db6", "db7", "db8", "db9"} for i, v := range keys { c.Put(v, manifestContents{}, time.Now()) expected := uint64(i + 1) if expected >= capacity { expected = capacity } assert.Equal(expected*defSize, c.totalSize) } lru := len(keys) - int(capacity) for _, db := range keys[:lru] { _, _, present := c.Get(db) assert.False(present) } for _, db := range keys[lru:] { _, _, present := c.Get(db) assert.True(present) } // Bump |keys[lru]| to the back of the queue, making |keys[lru+1]| the next one to be dropped _, _, ok := c.Get(keys[lru]) assert.True(ok) lru++ c.Put("novel", manifestContents{}, time.Now()) _, _, ok = c.Get(keys[lru]) assert.False(ok) // |keys[lru]| is gone, so |keys[lru+1]| is next lru++ // Putting a bigger value will dump multiple existing entries c.Put("big", manifestContents{vers: "big version"}, time.Now()) _, _, ok = c.Get(keys[lru]) assert.False(ok) lru++ _, _, ok = c.Get(keys[lru]) assert.False(ok) lru++ // Make sure expected stuff is still in the cache for i := lru; i < len(keys); i++ { _, _, ok := c.Get(keys[i]) assert.True(ok) } for _, key := range []string{"novel", "big"} { _, _, ok := c.Get(key) assert.True(ok) } }) t.Run("TooLargeValue", func(t *testing.T) { c := newManifestCache(16) c.Put("db", manifestContents{}, time.Now()) _, _, ok := c.Get("db") assert.False(t, ok) }) t.Run("ZeroSizeCache", func(t *testing.T) { c := newManifestCache(0) c.Put("db", manifestContents{}, time.Now()) _, _, ok := c.Get("db") assert.False(t, ok) }) } ================================================ FILE: go/nbs/mem_table.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "sort" "sync" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/hash" ) type memTable struct { chunks map[addr][]byte order []hasRecord // Must maintain the invariant that these are sorted by rec.order maxData, totalData uint64 snapper snappyEncoder } func newMemTable(memTableSize uint64) *memTable { return &memTable{chunks: map[addr][]byte{}, maxData: memTableSize} } func (mt *memTable) addChunk(h addr, data []byte) bool { if len(data) == 0 { panic("NBS blocks cannont be zero length") } if _, ok := mt.chunks[h]; ok { return true } dataLen := uint64(len(data)) if mt.totalData+dataLen > mt.maxData { return false } mt.totalData += dataLen mt.chunks[h] = data mt.order = append(mt.order, hasRecord{ &h, h.Prefix(), len(mt.order), false, }) return true } func (mt *memTable) count() uint32 { return uint32(len(mt.order)) } func (mt *memTable) uncompressedLen() uint64 { return mt.totalData } func (mt *memTable) has(h addr) (has bool) { _, has = mt.chunks[h] return } func (mt *memTable) hasMany(addrs []hasRecord) (remaining bool) { for i, addr := range addrs { if addr.has { continue } if mt.has(*addr.a) { addrs[i].has = true } else { remaining = true } } return } func (mt *memTable) get(h addr, stats *Stats) []byte { return mt.chunks[h] } func (mt *memTable) getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) (remaining bool) { for _, r := range reqs { data := mt.chunks[*r.a] if data != nil { c := chunks.NewChunkWithHash(hash.Hash(*r.a), data) foundChunks <- &c } else { remaining = true } } return } func (mt *memTable) extract(chunks chan<- extractRecord) { for _, hrec := range mt.order { chunks <- extractRecord{a: *hrec.a, data: mt.chunks[*hrec.a]} } return } func (mt *memTable) write(haver chunkReader, stats *Stats) (name addr, data []byte, count uint32) { maxSize := maxTableSize(uint64(len(mt.order)), mt.totalData) buff := make([]byte, maxSize) tw := newTableWriter(buff, mt.snapper) if haver != nil { sort.Sort(hasRecordByPrefix(mt.order)) // hasMany() requires addresses to be sorted. haver.hasMany(mt.order) sort.Sort(hasRecordByOrder(mt.order)) // restore "insertion" order for write } for _, addr := range mt.order { if !addr.has { h := addr.a tw.addChunk(*h, mt.chunks[*h]) count++ } } tableSize, name := tw.finish() if count > 0 { stats.BytesPerPersist.Sample(uint64(tableSize)) stats.CompressedChunkBytesPerPersist.Sample(uint64(tw.totalCompressedData)) stats.UncompressedChunkBytesPerPersist.Sample(uint64(tw.totalUncompressedData)) stats.ChunksPerPersist.Sample(uint64(count)) } return name, buff[:tableSize], count } ================================================ FILE: go/nbs/mem_table_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "sync" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/golang/snappy" "github.com/stretchr/testify/assert" ) func TestMemTableAddHasGetChunk(t *testing.T) { assert := assert.New(t) mt := newMemTable(1024) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } for _, c := range chunks { assert.True(mt.addChunk(computeAddr(c), c)) } assertChunksInReader(chunks, mt, assert) for _, c := range chunks { assert.Equal(bytes.Compare(c, mt.get(computeAddr(c), &Stats{})), 0) } notPresent := []byte("nope") assert.False(mt.has(computeAddr(notPresent))) assert.Nil(mt.get(computeAddr(notPresent), &Stats{})) } func TestMemTableAddOverflowChunk(t *testing.T) { memTableSize := uint64(1024) assert := assert.New(t) big := make([]byte, memTableSize) little := []byte{0x01} { bigAddr := computeAddr(big) mt := newMemTable(memTableSize) assert.True(mt.addChunk(bigAddr, big)) assert.True(mt.has(bigAddr)) assert.False(mt.addChunk(computeAddr(little), little)) assert.False(mt.has(computeAddr(little))) } { big := big[:memTableSize-1] bigAddr := computeAddr(big) mt := newMemTable(memTableSize) assert.True(mt.addChunk(bigAddr, big)) assert.True(mt.has(bigAddr)) assert.True(mt.addChunk(computeAddr(little), little)) assert.True(mt.has(computeAddr(little))) other := []byte("o") assert.False(mt.addChunk(computeAddr(other), other)) assert.False(mt.has(computeAddr(other))) } } func TestMemTableWrite(t *testing.T) { assert := assert.New(t) mt := newMemTable(1024) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } for _, c := range chunks { assert.True(mt.addChunk(computeAddr(c), c)) } td1, _ := buildTable(chunks[1:2]) td2, _ := buildTable(chunks[2:]) tr1 := newTableReader(parseTableIndex(td1), tableReaderAtFromBytes(td1), fileBlockSize) tr2 := newTableReader(parseTableIndex(td2), tableReaderAtFromBytes(td2), fileBlockSize) assert.True(tr1.has(computeAddr(chunks[1]))) assert.True(tr2.has(computeAddr(chunks[2]))) _, data, count := mt.write(chunkReaderGroup{tr1, tr2}, &Stats{}) assert.Equal(uint32(1), count) outReader := newTableReader(parseTableIndex(data), tableReaderAtFromBytes(data), fileBlockSize) assert.True(outReader.has(computeAddr(chunks[0]))) assert.False(outReader.has(computeAddr(chunks[1]))) assert.False(outReader.has(computeAddr(chunks[2]))) } type tableReaderAtAdapter struct { *bytes.Reader } func tableReaderAtFromBytes(b []byte) tableReaderAt { return tableReaderAtAdapter{bytes.NewReader(b)} } func (adapter tableReaderAtAdapter) ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) { return adapter.ReadAt(p, off) } func TestMemTableSnappyWriteOutOfLine(t *testing.T) { assert := assert.New(t) mt := newMemTable(1024) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } for _, c := range chunks { assert.True(mt.addChunk(computeAddr(c), c)) } mt.snapper = &outOfLineSnappy{[]bool{false, true, false}} // chunks[1] should trigger a panic assert.Panics(func() { mt.write(nil, &Stats{}) }) } type outOfLineSnappy struct { policy []bool } func (o *outOfLineSnappy) Encode(dst, src []byte) []byte { outOfLine := false if len(o.policy) > 0 { outOfLine = o.policy[0] o.policy = o.policy[1:] } if outOfLine { return snappy.Encode(nil, src) } return snappy.Encode(dst, src) } type chunkReaderGroup []chunkReader func (crg chunkReaderGroup) has(h addr) bool { for _, haver := range crg { if haver.has(h) { return true } } return false } func (crg chunkReaderGroup) get(h addr, stats *Stats) []byte { for _, haver := range crg { if data := haver.get(h, stats); data != nil { return data } } return nil } func (crg chunkReaderGroup) hasMany(addrs []hasRecord) (remaining bool) { for _, haver := range crg { if !haver.hasMany(addrs) { return false } } return true } func (crg chunkReaderGroup) getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) (remaining bool) { for _, haver := range crg { if !haver.getMany(reqs, foundChunks, wg, stats) { return false } } return true } func (crg chunkReaderGroup) count() (count uint32) { for _, haver := range crg { count += haver.count() } return } func (crg chunkReaderGroup) uncompressedLen() (data uint64) { for _, haver := range crg { data += haver.uncompressedLen() } return } func (crg chunkReaderGroup) extract(chunks chan<- extractRecord) { for _, haver := range crg { haver.extract(chunks) } } ================================================ FILE: go/nbs/mmap_table_reader.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io" "math" "os" "path/filepath" "strconv" "time" "golang.org/x/sys/unix" "github.com/attic-labs/noms/go/d" ) type mmapTableReader struct { tableReader fc *fdCache h addr } const ( fileBlockSize = 1 << 12 ) var ( pageSize = int64(os.Getpagesize()) maxInt = int64(math.MaxInt64) ) func init() { if strconv.IntSize == 32 { maxInt = math.MaxInt32 } } func newMmapTableReader(dir string, h addr, chunkCount uint32, indexCache *indexCache, fc *fdCache) chunkSource { path := filepath.Join(dir, h.String()) var index tableIndex found := false if indexCache != nil { indexCache.lockEntry(h) defer indexCache.unlockEntry(h) index, found = indexCache.get(h) } if !found { f, err := fc.RefFile(path) d.PanicIfError(err) defer fc.UnrefFile(path) fi, err := f.Stat() d.PanicIfError(err) d.PanicIfTrue(fi.Size() < 0) // index. Mmap won't take an offset that's not page-aligned, so find the nearest page boundary preceding the index. indexOffset := fi.Size() - int64(footerSize) - int64(indexSize(chunkCount)) aligned := indexOffset / pageSize * pageSize // Thanks, integer arithmetic! d.PanicIfTrue(fi.Size()-aligned > maxInt) buff, err := unix.Mmap(int(f.Fd()), aligned, int(fi.Size()-aligned), unix.PROT_READ, unix.MAP_SHARED) d.PanicIfError(err) index = parseTableIndex(buff[indexOffset-aligned:]) if indexCache != nil { indexCache.put(h, index) } err = unix.Munmap(buff) d.PanicIfError(err) } d.PanicIfFalse(chunkCount == index.chunkCount) return &mmapTableReader{ newTableReader(index, &cacheReaderAt{path, fc}, fileBlockSize), fc, h, } } func (mmtr *mmapTableReader) hash() addr { return mmtr.h } type cacheReaderAt struct { path string fc *fdCache } func (cra *cacheReaderAt) ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) { var r io.ReaderAt t1 := time.Now() if r, err = cra.fc.RefFile(cra.path); err != nil { return } defer func() { stats.FileBytesPerRead.Sample(uint64(len(p))) stats.FileReadLatency.SampleTimeSince(t1) }() defer cra.fc.UnrefFile(cra.path) return r.ReadAt(p, off) } ================================================ FILE: go/nbs/mmap_table_reader_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io/ioutil" "os" "path/filepath" "testing" "github.com/stretchr/testify/assert" ) func TestMmapTableReader(t *testing.T) { assert := assert.New(t) dir, err := ioutil.TempDir("", "") assert.NoError(err) defer os.RemoveAll(dir) fc := newFDCache(1) defer fc.Drop() chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, h := buildTable(chunks) err = ioutil.WriteFile(filepath.Join(dir, h.String()), tableData, 0666) assert.NoError(err) trc := newMmapTableReader(dir, h, uint32(len(chunks)), nil, fc) assertChunksInReader(chunks, trc, assert) } ================================================ FILE: go/nbs/persisting_chunk_source.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "io" "sync" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" ) func newPersistingChunkSource(mt *memTable, haver chunkReader, p tablePersister, rl chan struct{}, stats *Stats) *persistingChunkSource { t1 := time.Now() ccs := &persistingChunkSource{mt: mt} ccs.wg.Add(1) rl <- struct{}{} go func() { defer ccs.wg.Done() cs := p.Persist(mt, haver, stats) ccs.mu.Lock() defer ccs.mu.Unlock() ccs.cs = cs ccs.mt = nil <-rl if cs.count() > 0 { stats.PersistLatency.SampleTimeSince(t1) } }() return ccs } type persistingChunkSource struct { mu sync.RWMutex mt *memTable wg sync.WaitGroup cs chunkSource } func (ccs *persistingChunkSource) getReader() chunkReader { ccs.mu.RLock() defer ccs.mu.RUnlock() if ccs.mt != nil { return ccs.mt } return ccs.cs } func (ccs *persistingChunkSource) has(h addr) bool { cr := ccs.getReader() d.Chk.True(cr != nil) return cr.has(h) } func (ccs *persistingChunkSource) hasMany(addrs []hasRecord) bool { cr := ccs.getReader() d.Chk.True(cr != nil) return cr.hasMany(addrs) } func (ccs *persistingChunkSource) get(h addr, stats *Stats) []byte { cr := ccs.getReader() d.Chk.True(cr != nil) return cr.get(h, stats) } func (ccs *persistingChunkSource) getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) bool { cr := ccs.getReader() d.Chk.True(cr != nil) return cr.getMany(reqs, foundChunks, wg, stats) } func (ccs *persistingChunkSource) count() uint32 { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) return ccs.cs.count() } func (ccs *persistingChunkSource) uncompressedLen() uint64 { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) return ccs.cs.uncompressedLen() } func (ccs *persistingChunkSource) hash() addr { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) return ccs.cs.hash() } func (ccs *persistingChunkSource) index() tableIndex { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) return ccs.cs.index() } func (ccs *persistingChunkSource) reader() io.Reader { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) return ccs.cs.reader() } func (ccs *persistingChunkSource) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool) { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) return ccs.cs.calcReads(reqs, blockSize) } func (ccs *persistingChunkSource) extract(chunks chan<- extractRecord) { ccs.wg.Wait() d.Chk.True(ccs.cs != nil) ccs.cs.extract(chunks) } type emptyChunkSource struct{} func (ecs emptyChunkSource) has(h addr) bool { return false } func (ecs emptyChunkSource) hasMany(addrs []hasRecord) bool { return true } func (ecs emptyChunkSource) get(h addr, stats *Stats) []byte { return nil } func (ecs emptyChunkSource) getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) bool { return true } func (ecs emptyChunkSource) count() uint32 { return 0 } func (ecs emptyChunkSource) uncompressedLen() uint64 { return 0 } func (ecs emptyChunkSource) hash() addr { return addr{} } func (ecs emptyChunkSource) index() tableIndex { return tableIndex{} } func (ecs emptyChunkSource) reader() io.Reader { return &bytes.Buffer{} } func (ecs emptyChunkSource) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool) { return 0, true } func (ecs emptyChunkSource) extract(chunks chan<- extractRecord) {} ================================================ FILE: go/nbs/persisting_chunk_source_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "github.com/stretchr/testify/assert" ) func TestPersistingChunkStoreEmpty(t *testing.T) { mt := newMemTable(testMemTableSize) ccs := newPersistingChunkSource(mt, nil, newFakeTablePersister(), make(chan struct{}, 1), &Stats{}) assert.Equal(t, addr{}, ccs.hash()) assert.Zero(t, ccs.count()) } type pausingFakeTablePersister struct { tablePersister trigger <-chan struct{} } func (ftp pausingFakeTablePersister) Persist(mt *memTable, haver chunkReader, stats *Stats) chunkSource { <-ftp.trigger return ftp.tablePersister.Persist(mt, haver, stats) } func TestPersistingChunkStore(t *testing.T) { assert := assert.New(t) mt := newMemTable(testMemTableSize) for _, c := range testChunks { assert.True(mt.addChunk(computeAddr(c), c)) } trigger := make(chan struct{}) ccs := newPersistingChunkSource(mt, nil, pausingFakeTablePersister{newFakeTablePersister(), trigger}, make(chan struct{}, 1), &Stats{}) assertChunksInReader(testChunks, ccs, assert) assert.EqualValues(mt.count(), ccs.getReader().count()) close(trigger) assert.NotEqual(addr{}, ccs.hash()) assert.EqualValues(len(testChunks), ccs.count()) assertChunksInReader(testChunks, ccs, assert) assert.Nil(ccs.mt) newChunk := []byte("additional") mt.addChunk(computeAddr(newChunk), newChunk) assert.NotEqual(mt.count(), ccs.count()) assert.False(ccs.has(computeAddr(newChunk))) } ================================================ FILE: go/nbs/root_tracker_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "sync" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestChunkStoreZeroValue(t *testing.T) { assert := assert.New(t) _, _, store := makeStoreWithFakes(t) defer store.Close() // No manifest file gets written until the first call to Commit(). Prior to that, Root() will simply return hash.Hash{}. assert.Equal(hash.Hash{}, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) } func TestChunkStoreVersion(t *testing.T) { assert := assert.New(t) _, _, store := makeStoreWithFakes(t) defer store.Close() assert.Equal(constants.NomsVersion, store.Version()) newRoot := hash.Of([]byte("new root")) if assert.True(store.Commit(newRoot, hash.Hash{})) { assert.Equal(constants.NomsVersion, store.Version()) } } func TestChunkStoreRebase(t *testing.T) { assert := assert.New(t) fm, p, store := makeStoreWithFakes(t) defer store.Close() assert.Equal(hash.Hash{}, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) // Simulate another process writing a manifest behind store's back. newRoot, chunks := interloperWrite(fm, p, []byte("new root"), []byte("hello2"), []byte("goodbye2"), []byte("badbye2")) // state in store shouldn't change assert.Equal(hash.Hash{}, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) store.Rebase() // NOW it should assert.Equal(newRoot, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) assertDataInStore(chunks, store, assert) } func TestChunkStoreCommit(t *testing.T) { assert := assert.New(t) _, _, store := makeStoreWithFakes(t) defer store.Close() assert.Equal(hash.Hash{}, store.Root()) newRootChunk := chunks.NewChunk([]byte("new root")) newRoot := newRootChunk.Hash() store.Put(newRootChunk) if assert.True(store.Commit(newRoot, hash.Hash{})) { assert.True(store.Has(newRoot)) assert.Equal(newRoot, store.Root()) } secondRootChunk := chunks.NewChunk([]byte("newer root")) secondRoot := secondRootChunk.Hash() store.Put(secondRootChunk) if assert.True(store.Commit(secondRoot, newRoot)) { assert.Equal(secondRoot, store.Root()) assert.True(store.Has(newRoot)) assert.True(store.Has(secondRoot)) } } func TestChunkStoreManifestAppearsAfterConstruction(t *testing.T) { assert := assert.New(t) fm, p, store := makeStoreWithFakes(t) defer store.Close() assert.Equal(hash.Hash{}, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) // Simulate another process writing a manifest behind store's back. interloperWrite(fm, p, []byte("new root"), []byte("hello2"), []byte("goodbye2"), []byte("badbye2")) // state in store shouldn't change assert.Equal(hash.Hash{}, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) } func TestChunkStoreManifestFirstWriteByOtherProcess(t *testing.T) { assert := assert.New(t) fm := &fakeManifest{} mm := manifestManager{fm, newManifestCache(0), newManifestLocks()} p := newFakeTablePersister() // Simulate another process writing a manifest behind store's back. newRoot, chunks := interloperWrite(fm, p, []byte("new root"), []byte("hello2"), []byte("goodbye2"), []byte("badbye2")) store := newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, defaultMemTableSize) defer store.Close() assert.Equal(newRoot, store.Root()) assert.Equal(constants.NomsVersion, store.Version()) assertDataInStore(chunks, store, assert) } func TestChunkStoreCommitOptimisticLockFail(t *testing.T) { assert := assert.New(t) fm, p, store := makeStoreWithFakes(t) defer store.Close() // Simulate another process writing a manifest behind store's back. newRoot, chunks := interloperWrite(fm, p, []byte("new root"), []byte("hello2"), []byte("goodbye2"), []byte("badbye2")) newRoot2 := hash.Of([]byte("new root 2")) assert.False(store.Commit(newRoot2, hash.Hash{})) assertDataInStore(chunks, store, assert) assert.True(store.Commit(newRoot2, newRoot)) } func TestChunkStoreManifestPreemptiveOptimisticLockFail(t *testing.T) { assert := assert.New(t) fm := &fakeManifest{} mm := manifestManager{fm, newManifestCache(defaultManifestCacheSize), newManifestLocks()} p := newFakeTablePersister() c := inlineConjoiner{defaultMaxTables} store := newNomsBlockStore(mm, p, c, defaultMemTableSize) defer store.Close() // Simulate another goroutine writing a manifest behind store's back. interloper := newNomsBlockStore(mm, p, c, defaultMemTableSize) defer interloper.Close() chunk := chunks.NewChunk([]byte("hello")) interloper.Put(chunk) assert.True(interloper.Commit(chunk.Hash(), hash.Hash{})) // Try to land a new chunk in store, which should fail AND not persist the contents of store.mt chunk = chunks.NewChunk([]byte("goodbye")) store.Put(chunk) assert.NotNil(store.mt) assert.False(store.Commit(chunk.Hash(), hash.Hash{})) assert.NotNil(store.mt) assert.True(store.Commit(chunk.Hash(), store.Root())) assert.Nil(store.mt) assert.Equal(chunk.Hash(), store.Root()) assert.Equal(constants.NomsVersion, store.Version()) } func TestChunkStoreCommitLocksOutFetch(t *testing.T) { assert := assert.New(t) fm := &fakeManifest{name: "foo"} upm := &updatePreemptManifest{manifest: fm} mm := manifestManager{upm, newManifestCache(defaultManifestCacheSize), newManifestLocks()} p := newFakeTablePersister() c := inlineConjoiner{defaultMaxTables} store := newNomsBlockStore(mm, p, c, defaultMemTableSize) defer store.Close() // store.Commit() should lock out calls to mm.Fetch() wg := sync.WaitGroup{} fetched := manifestContents{} upm.preUpdate = func() { wg.Add(1) go func() { defer wg.Done() _, fetched = mm.Fetch(nil) }() } rootChunk := chunks.NewChunk([]byte("new root")) store.Put(rootChunk) assert.True(store.Commit(rootChunk.Hash(), store.Root())) wg.Wait() assert.Equal(store.Root(), fetched.root) } func TestChunkStoreSerializeCommits(t *testing.T) { assert := assert.New(t) fm := &fakeManifest{name: "foo"} upm := &updatePreemptManifest{manifest: fm} mc := newManifestCache(defaultManifestCacheSize) l := newManifestLocks() p := newFakeTablePersister() c := inlineConjoiner{defaultMaxTables} store := newNomsBlockStore(manifestManager{upm, mc, l}, p, c, defaultMemTableSize) defer store.Close() storeChunk := chunks.NewChunk([]byte("store")) interloperChunk := chunks.NewChunk([]byte("interloper")) updateCount := 0 interloper := newNomsBlockStore( manifestManager{ updatePreemptManifest{fm, func() { updateCount++ }}, mc, l, }, p, c, defaultMemTableSize) defer interloper.Close() wg := sync.WaitGroup{} upm.preUpdate = func() { wg.Add(1) go func() { defer wg.Done() interloper.Put(interloperChunk) assert.True(interloper.Commit(interloper.Root(), interloper.Root())) }() updateCount++ } store.Put(storeChunk) assert.True(store.Commit(store.Root(), store.Root())) wg.Wait() assert.Equal(2, updateCount) assert.True(interloper.Has(storeChunk.Hash())) assert.True(interloper.Has(interloperChunk.Hash())) } func makeStoreWithFakes(t *testing.T) (fm *fakeManifest, p tablePersister, store *NomsBlockStore) { fm = &fakeManifest{} mm := manifestManager{fm, newManifestCache(0), newManifestLocks()} p = newFakeTablePersister() store = newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, 0) return } // Simulate another process writing a manifest behind store's back. func interloperWrite(fm *fakeManifest, p tablePersister, rootChunk []byte, chunks ...[]byte) (newRoot hash.Hash, persisted [][]byte) { newLock, newRoot := computeAddr([]byte("locker")), hash.Of(rootChunk) persisted = append(chunks, rootChunk) src := p.Persist(createMemTable(persisted), nil, &Stats{}) fm.set(constants.NomsVersion, newLock, newRoot, []tableSpec{{src.hash(), uint32(len(chunks))}}) return } func createMemTable(chunks [][]byte) *memTable { mt := newMemTable(1 << 10) for _, c := range chunks { mt.addChunk(computeAddr(c), c) } return mt } func assertDataInStore(slices [][]byte, store chunks.ChunkStore, assert *assert.Assertions) { for _, data := range slices { assert.True(store.Has(chunks.NewChunk(data).Hash())) } } // fakeManifest simulates a fileManifest without touching disk. type fakeManifest struct { name string contents manifestContents mu sync.RWMutex } func (fm *fakeManifest) Name() string { return fm.name } // ParseIfExists returns any fake manifest data the caller has injected using // Update() or set(). It treats an empty |fm.lock| as a non-existent manifest. func (fm *fakeManifest) ParseIfExists(stats *Stats, readHook func()) (exists bool, contents manifestContents) { fm.mu.RLock() defer fm.mu.RUnlock() if fm.contents.lock != (addr{}) { return true, fm.contents } return false, manifestContents{} } // Update checks whether |lastLock| == |fm.lock| and, if so, updates internal // fake manifest state as per the manifest.Update() contract: |fm.lock| is set // to |newLock|, |fm.root| is set to |newRoot|, and the contents of |specs| // replace |fm.tableSpecs|. If |lastLock| != |fm.lock|, then the update // fails. Regardless of success or failure, the current state is returned. func (fm *fakeManifest) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents { fm.mu.Lock() defer fm.mu.Unlock() if fm.contents.lock == lastLock { fm.contents = manifestContents{newContents.vers, newContents.lock, newContents.root, nil} fm.contents.specs = make([]tableSpec, len(newContents.specs)) copy(fm.contents.specs, newContents.specs) } return fm.contents } func (fm *fakeManifest) set(version string, lock addr, root hash.Hash, specs []tableSpec) { fm.contents = manifestContents{version, lock, root, specs} } func newFakeTableSet() tableSet { return tableSet{p: newFakeTablePersister(), rl: make(chan struct{}, 1)} } func newFakeTablePersister() tablePersister { return fakeTablePersister{map[addr]tableReader{}, &sync.RWMutex{}} } type fakeTablePersister struct { sources map[addr]tableReader mu *sync.RWMutex } func (ftp fakeTablePersister) Persist(mt *memTable, haver chunkReader, stats *Stats) chunkSource { if mt.count() > 0 { name, data, chunkCount := mt.write(haver, stats) if chunkCount > 0 { ftp.mu.Lock() defer ftp.mu.Unlock() ftp.sources[name] = newTableReader(parseTableIndex(data), tableReaderAtFromBytes(data), fileBlockSize) return chunkSourceAdapter{ftp.sources[name], name} } } return emptyChunkSource{} } func (ftp fakeTablePersister) ConjoinAll(sources chunkSources, stats *Stats) chunkSource { name, data, chunkCount := compactSourcesToBuffer(sources) if chunkCount > 0 { ftp.mu.Lock() defer ftp.mu.Unlock() ftp.sources[name] = newTableReader(parseTableIndex(data), tableReaderAtFromBytes(data), fileBlockSize) return chunkSourceAdapter{ftp.sources[name], name} } return emptyChunkSource{} } func compactSourcesToBuffer(sources chunkSources) (name addr, data []byte, chunkCount uint32) { totalData := uint64(0) for _, src := range sources { chunkCount += src.count() totalData += src.uncompressedLen() } if chunkCount == 0 { return } maxSize := maxTableSize(uint64(chunkCount), totalData) buff := make([]byte, maxSize) // This can blow up RAM tw := newTableWriter(buff, nil) errString := "" for _, src := range sources { chunks := make(chan extractRecord) go func() { defer close(chunks) defer func() { if r := recover(); r != nil { chunks <- extractRecord{a: src.hash(), err: r} } }() src.extract(chunks) }() for rec := range chunks { if rec.err != nil { errString += fmt.Sprintf("Failed to extract %s:\n %v\n******\n\n", rec.a, rec.err) continue } tw.addChunk(rec.a, rec.data) } } if errString != "" { panic(fmt.Errorf(errString)) } tableSize, name := tw.finish() return name, buff[:tableSize], chunkCount } func (ftp fakeTablePersister) Open(name addr, chunkCount uint32, stats *Stats) chunkSource { ftp.mu.RLock() defer ftp.mu.RUnlock() return chunkSourceAdapter{ftp.sources[name], name} } type chunkSourceAdapter struct { tableReader h addr } func (csa chunkSourceAdapter) hash() addr { return csa.h } func (csa chunkSourceAdapter) index() tableIndex { return csa.tableIndex } ================================================ FILE: go/nbs/s3_fake_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "io" "io/ioutil" "net/url" "strconv" "strings" "sync" "testing" "time" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3" "github.com/stretchr/testify/assert" ) type mockAWSError string func (m mockAWSError) Error() string { return string(m) } func (m mockAWSError) Code() string { return string(m) } func (m mockAWSError) Message() string { return string(m) } func (m mockAWSError) OrigErr() error { return nil } func makeFakeS3(t *testing.T) *fakeS3 { return &fakeS3{ assert: assert.New(t), data: map[string][]byte{}, inProgress: map[string]fakeS3Multipart{}, parts: map[string][]byte{}, } } type fakeS3 struct { assert *assert.Assertions mu sync.Mutex data map[string][]byte inProgressCounter int inProgress map[string]fakeS3Multipart // Key -> {UploadId, Etags...} parts map[string][]byte // ETag -> data getCount int } type fakeS3Multipart struct { uploadID string etags []string } func (m *fakeS3) readerForTable(name addr) chunkReader { m.mu.Lock() defer m.mu.Unlock() if buff, present := m.data[name.String()]; present { return newTableReader(parseTableIndex(buff), tableReaderAtFromBytes(buff), s3BlockSize) } return nil } func (m *fakeS3) AbortMultipartUpload(input *s3.AbortMultipartUploadInput) (*s3.AbortMultipartUploadOutput, error) { m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") m.assert.NotNil(input.UploadId, "UploadId is a required field") m.mu.Lock() defer m.mu.Unlock() m.assert.Equal(m.inProgress[*input.Key].uploadID, *input.UploadId) for _, etag := range m.inProgress[*input.Key].etags { delete(m.parts, etag) } delete(m.inProgress, *input.Key) return &s3.AbortMultipartUploadOutput{}, nil } func (m *fakeS3) CreateMultipartUpload(input *s3.CreateMultipartUploadInput) (*s3.CreateMultipartUploadOutput, error) { m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") out := &s3.CreateMultipartUploadOutput{ Bucket: input.Bucket, Key: input.Key, } m.mu.Lock() defer m.mu.Unlock() uploadID := strconv.Itoa(m.inProgressCounter) out.UploadId = aws.String(uploadID) m.inProgress[*input.Key] = fakeS3Multipart{uploadID, nil} m.inProgressCounter++ return out, nil } func (m *fakeS3) UploadPart(input *s3.UploadPartInput) (*s3.UploadPartOutput, error) { m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") m.assert.NotNil(input.PartNumber, "PartNumber is a required field") m.assert.NotNil(input.UploadId, "UploadId is a required field") m.assert.NotNil(input.Body, "Body is a required field") data, err := ioutil.ReadAll(input.Body) m.assert.NoError(err) m.mu.Lock() defer m.mu.Unlock() etag := hash.Of(data).String() + time.Now().String() m.parts[etag] = data inProgress, present := m.inProgress[*input.Key] m.assert.True(present) m.assert.Equal(inProgress.uploadID, *input.UploadId) inProgress.etags = append(inProgress.etags, etag) m.inProgress[*input.Key] = inProgress return &s3.UploadPartOutput{ETag: aws.String(etag)}, nil } func (m *fakeS3) UploadPartCopy(input *s3.UploadPartCopyInput) (*s3.UploadPartCopyOutput, error) { m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") m.assert.NotNil(input.PartNumber, "PartNumber is a required field") m.assert.NotNil(input.UploadId, "UploadId is a required field") m.assert.NotNil(input.CopySource, "CopySource is a required field") unescaped, err := url.QueryUnescape(*input.CopySource) m.assert.NoError(err) slash := strings.LastIndex(unescaped, "/") m.assert.NotEqual(-1, slash, "Malformed CopySource %s", unescaped) src := unescaped[slash+1:] m.mu.Lock() defer m.mu.Unlock() obj, present := m.data[src] if !present { return nil, mockAWSError("NoSuchKey") } if input.CopySourceRange != nil { start, end := parseRange(*input.CopySourceRange, len(obj)) obj = obj[start:end] } etag := hash.Of(obj).String() + time.Now().String() m.parts[etag] = obj inProgress, present := m.inProgress[*input.Key] m.assert.True(present) m.assert.Equal(inProgress.uploadID, *input.UploadId) inProgress.etags = append(inProgress.etags, etag) m.inProgress[*input.Key] = inProgress return &s3.UploadPartCopyOutput{CopyPartResult: &s3.CopyPartResult{ETag: aws.String(etag)}}, nil } func (m *fakeS3) CompleteMultipartUpload(input *s3.CompleteMultipartUploadInput) (*s3.CompleteMultipartUploadOutput, error) { m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") m.assert.NotNil(input.UploadId, "UploadId is a required field") m.assert.NotNil(input.MultipartUpload, "MultipartUpload is a required field") m.assert.True(len(input.MultipartUpload.Parts) > 0) m.mu.Lock() defer m.mu.Unlock() m.assert.Equal(m.inProgress[*input.Key].uploadID, *input.UploadId) for idx, part := range input.MultipartUpload.Parts { m.assert.EqualValues(idx+1, *part.PartNumber) // Part numbers are 1-indexed m.data[*input.Key] = append(m.data[*input.Key], m.parts[*part.ETag]...) delete(m.parts, *part.ETag) } delete(m.inProgress, *input.Key) return &s3.CompleteMultipartUploadOutput{Bucket: input.Bucket, Key: input.Key}, nil } func (m *fakeS3) GetObject(input *s3.GetObjectInput) (*s3.GetObjectOutput, error) { m.getCount++ m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") m.mu.Lock() defer m.mu.Unlock() obj, present := m.data[*input.Key] if !present { return nil, mockAWSError("NoSuchKey") } if input.Range != nil { start, end := parseRange(*input.Range, len(obj)) obj = obj[start:end] } return &s3.GetObjectOutput{ Body: ioutil.NopCloser(bytes.NewReader(obj)), ContentLength: aws.Int64(int64(len(obj))), }, nil } func parseRange(hdr string, total int) (start, end int) { d.PanicIfFalse(len(hdr) > len(s3RangePrefix)) hdr = hdr[len(s3RangePrefix):] d.PanicIfFalse(hdr[0] == '=') hdr = hdr[1:] if hdr[0] == '-' { // negative range fromEnd, err := strconv.Atoi(hdr[1:]) d.PanicIfError(err) return total - fromEnd, total } ends := strings.Split(hdr, "-") d.PanicIfFalse(len(ends) == 2) start, err := strconv.Atoi(ends[0]) d.PanicIfError(err) end, err = strconv.Atoi(ends[1]) d.PanicIfError(err) return start, end + 1 // insanely, the HTTP range header specifies ranges inclusively. } func (m *fakeS3) PutObject(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { m.assert.NotNil(input.Bucket, "Bucket is a required field") m.assert.NotNil(input.Key, "Key is a required field") buff := &bytes.Buffer{} io.Copy(buff, input.Body) m.mu.Lock() defer m.mu.Unlock() m.data[*input.Key] = buff.Bytes() return &s3.PutObjectOutput{}, nil } ================================================ FILE: go/nbs/s3_table_reader.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "io" "net" "os" "time" "golang.org/x/sys/unix" "github.com/attic-labs/noms/go/d" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3" "github.com/jpillora/backoff" ) const ( s3RangePrefix = "bytes" s3BlockSize = (1 << 10) * 512 // 512K ) type s3TableReaderAt struct { s3 *s3ObjectReader h addr } type s3svc interface { AbortMultipartUpload(input *s3.AbortMultipartUploadInput) (*s3.AbortMultipartUploadOutput, error) CreateMultipartUpload(input *s3.CreateMultipartUploadInput) (*s3.CreateMultipartUploadOutput, error) UploadPart(input *s3.UploadPartInput) (*s3.UploadPartOutput, error) UploadPartCopy(input *s3.UploadPartCopyInput) (*s3.UploadPartCopyOutput, error) CompleteMultipartUpload(input *s3.CompleteMultipartUploadInput) (*s3.CompleteMultipartUploadOutput, error) GetObject(input *s3.GetObjectInput) (*s3.GetObjectOutput, error) PutObject(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) } func (s3tra *s3TableReaderAt) ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) { return s3tra.s3.ReadAt(s3tra.h, p, off, stats) } // TODO: Bring all the multipart upload and remote-conjoin stuff over here and make this a better analogue to ddbTableStore type s3ObjectReader struct { s3 s3svc bucket string readRl chan struct{} tc tableCache } func (s3or *s3ObjectReader) ReadAt(name addr, p []byte, off int64, stats *Stats) (n int, err error) { t1 := time.Now() if s3or.tc != nil { r := s3or.tc.checkout(name) if r != nil { defer func() { stats.FileBytesPerRead.Sample(uint64(len(p))) stats.FileReadLatency.SampleTimeSince(t1) }() defer s3or.tc.checkin(name) return r.ReadAt(p, off) } } defer func() { stats.S3BytesPerRead.Sample(uint64(len(p))) stats.S3ReadLatency.SampleTimeSince(t1) }() return s3or.readRange(name, p, s3RangeHeader(off, int64(len(p)))) } func s3RangeHeader(off, length int64) string { lastByte := off + length - 1 // insanely, the HTTP range header specifies ranges inclusively. return fmt.Sprintf("%s=%d-%d", s3RangePrefix, off, lastByte) } func (s3or *s3ObjectReader) ReadFromEnd(name addr, p []byte, stats *Stats) (n int, err error) { // TODO: enable this to use the tableCache. The wrinkle is the tableCache currently just returns a ReaderAt, which doesn't give you the length of the object that backs it, so you can't calculate an offset if all you know is that you want the last N bytes. defer func(t1 time.Time) { stats.S3BytesPerRead.Sample(uint64(len(p))) stats.S3ReadLatency.SampleTimeSince(t1) }(time.Now()) return s3or.readRange(name, p, fmt.Sprintf("%s=-%d", s3RangePrefix, len(p))) } func (s3or *s3ObjectReader) readRange(name addr, p []byte, rangeHeader string) (n int, err error) { read := func() (int, error) { if s3or.readRl != nil { s3or.readRl <- struct{}{} defer func() { <-s3or.readRl }() } input := &s3.GetObjectInput{ Bucket: aws.String(s3or.bucket), Key: aws.String(name.String()), Range: aws.String(rangeHeader), } result, err := s3or.s3.GetObject(input) d.PanicIfError(err) d.PanicIfFalse(*result.ContentLength == int64(len(p))) n, err := io.ReadFull(result.Body, p) if err != nil { fmt.Fprintf(os.Stderr, "Failed ranged read from S3\n%s\nerr type: %T\nerror: %v\n", input.GoString(), err, err) } return n, err } n, err = read() // We hit the point of diminishing returns investigating #3255, so add retries. In conversations with AWS people, it's not surprising to get transient failures when talking to S3, though SDKs are intended to have their own retrying. The issue may be that, in Go, making the S3 request and reading the data are separate operations, and the SDK kind of can't do its own retrying to handle failures in the latter. if isConnReset(err) { // We are backing off here because its possible and likely that the rate of requests to S3 is the underlying issue. b := &backoff.Backoff{ Min: 128 * time.Microsecond, Max: 1024 * time.Millisecond, Factor: 2, Jitter: true, } for ; isConnReset(err); n, err = read() { dur := b.Duration() fmt.Fprintf(os.Stderr, "Retrying S3 read in %s\n", dur.String()) time.Sleep(dur) } } return } func isConnReset(err error) bool { nErr, ok := err.(*net.OpError) if !ok { return false } scErr, ok := nErr.Err.(*os.SyscallError) return ok && scErr.Err == unix.ECONNRESET } ================================================ FILE: go/nbs/s3_table_reader_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "io/ioutil" "net" "os" "testing" "golang.org/x/sys/unix" "github.com/aws/aws-sdk-go/service/s3" "github.com/stretchr/testify/assert" ) func TestS3TableReaderAt(t *testing.T) { s3 := makeFakeS3(t) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, h := buildTable(chunks) s3.data[h.String()] = tableData t.Run("TolerateFailingReads", func(t *testing.T) { assert := assert.New(t) baseline := s3.getCount tra := &s3TableReaderAt{&s3ObjectReader{makeFlakyS3(s3), "bucket", nil, nil}, h} scratch := make([]byte, len(tableData)) _, err := tra.ReadAtWithStats(scratch, 0, &Stats{}) assert.NoError(err) // constructing the table reader should have resulted in 2 reads assert.Equal(2, s3.getCount-baseline) assert.Equal(tableData, scratch) }) t.Run("WithTableCache", func(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer os.RemoveAll(dir) stats := &Stats{} tc := newFSTableCache(dir, uint64(2*len(tableData)), 4) tra := &s3TableReaderAt{&s3ObjectReader{s3, "bucket", nil, tc}, h} // First, read when table is not yet cached scratch := make([]byte, len(tableData)) baseline := s3.getCount _, err := tra.ReadAtWithStats(scratch, 0, stats) assert.NoError(err) assert.True(s3.getCount > baseline) // Cache the table and read again tc.store(h, bytes.NewReader(tableData), uint64(len(tableData))) baseline = s3.getCount _, err = tra.ReadAtWithStats(scratch, 0, stats) assert.NoError(err) assert.Zero(s3.getCount - baseline) }) } type flakyS3 struct { s3svc alreadyFailed map[string]struct{} } func makeFlakyS3(svc s3svc) *flakyS3 { return &flakyS3{svc, map[string]struct{}{}} } func (fs3 *flakyS3) GetObject(input *s3.GetObjectInput) (output *s3.GetObjectOutput, err error) { output, err = fs3.s3svc.GetObject(input) if _, ok := fs3.alreadyFailed[*input.Key]; !ok { fs3.alreadyFailed[*input.Key] = struct{}{} output.Body = ioutil.NopCloser(resettingReader{}) } return } type resettingReader struct{} func (rr resettingReader) Read(p []byte) (n int, err error) { return 0, &net.OpError{Op: "read", Net: "tcp", Err: &os.SyscallError{Syscall: "read", Err: unix.ECONNRESET}} } ================================================ FILE: go/nbs/stats.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "github.com/attic-labs/noms/go/metrics" ) type Stats struct { OpenLatency metrics.Histogram CommitLatency metrics.Histogram IndexReadLatency metrics.Histogram IndexBytesPerRead metrics.Histogram GetLatency metrics.Histogram ChunksPerGet metrics.Histogram FileReadLatency metrics.Histogram FileBytesPerRead metrics.Histogram S3ReadLatency metrics.Histogram S3BytesPerRead metrics.Histogram MemReadLatency metrics.Histogram MemBytesPerRead metrics.Histogram DynamoReadLatency metrics.Histogram DynamoBytesPerRead metrics.Histogram HasLatency metrics.Histogram AddressesPerHas metrics.Histogram PutLatency metrics.Histogram PersistLatency metrics.Histogram BytesPerPersist metrics.Histogram ChunksPerPersist metrics.Histogram CompressedChunkBytesPerPersist metrics.Histogram UncompressedChunkBytesPerPersist metrics.Histogram ConjoinLatency metrics.Histogram BytesPerConjoin metrics.Histogram ChunksPerConjoin metrics.Histogram TablesPerConjoin metrics.Histogram ReadManifestLatency metrics.Histogram WriteManifestLatency metrics.Histogram } func NewStats() *Stats { return &Stats{ OpenLatency: metrics.NewTimeHistogram(), CommitLatency: metrics.NewTimeHistogram(), IndexReadLatency: metrics.NewTimeHistogram(), IndexBytesPerRead: metrics.NewByteHistogram(), GetLatency: metrics.NewTimeHistogram(), FileReadLatency: metrics.NewTimeHistogram(), FileBytesPerRead: metrics.NewByteHistogram(), S3ReadLatency: metrics.NewTimeHistogram(), S3BytesPerRead: metrics.NewByteHistogram(), MemReadLatency: metrics.NewTimeHistogram(), MemBytesPerRead: metrics.NewByteHistogram(), DynamoReadLatency: metrics.NewTimeHistogram(), DynamoBytesPerRead: metrics.NewByteHistogram(), HasLatency: metrics.NewTimeHistogram(), PutLatency: metrics.NewTimeHistogram(), PersistLatency: metrics.NewTimeHistogram(), BytesPerPersist: metrics.NewByteHistogram(), CompressedChunkBytesPerPersist: metrics.NewByteHistogram(), UncompressedChunkBytesPerPersist: metrics.NewByteHistogram(), ConjoinLatency: metrics.NewTimeHistogram(), BytesPerConjoin: metrics.NewByteHistogram(), ReadManifestLatency: metrics.NewTimeHistogram(), WriteManifestLatency: metrics.NewTimeHistogram(), } } func (s *Stats) Add(other Stats) { s.OpenLatency.Add(other.OpenLatency) s.CommitLatency.Add(other.CommitLatency) s.IndexReadLatency.Add(other.IndexReadLatency) s.IndexBytesPerRead.Add(other.IndexBytesPerRead) s.GetLatency.Add(other.GetLatency) s.ChunksPerGet.Add(other.ChunksPerGet) s.FileReadLatency.Add(other.FileReadLatency) s.FileBytesPerRead.Add(other.FileBytesPerRead) s.S3ReadLatency.Add(other.S3ReadLatency) s.S3BytesPerRead.Add(other.S3BytesPerRead) s.MemReadLatency.Add(other.MemReadLatency) s.MemBytesPerRead.Add(other.MemBytesPerRead) s.DynamoReadLatency.Add(other.DynamoReadLatency) s.DynamoBytesPerRead.Add(other.DynamoBytesPerRead) s.HasLatency.Add(other.HasLatency) s.AddressesPerHas.Add(other.AddressesPerHas) s.PutLatency.Add(other.PutLatency) s.PersistLatency.Add(other.PersistLatency) s.BytesPerPersist.Add(other.BytesPerPersist) s.ChunksPerPersist.Add(other.ChunksPerPersist) s.CompressedChunkBytesPerPersist.Add(other.CompressedChunkBytesPerPersist) s.UncompressedChunkBytesPerPersist.Add(other.UncompressedChunkBytesPerPersist) s.ConjoinLatency.Add(other.ConjoinLatency) s.BytesPerConjoin.Add(other.BytesPerConjoin) s.ChunksPerConjoin.Add(other.ChunksPerConjoin) s.TablesPerConjoin.Add(other.TablesPerConjoin) s.ReadManifestLatency.Add(other.ReadManifestLatency) s.WriteManifestLatency.Add(other.WriteManifestLatency) } func (s Stats) Delta(other Stats) Stats { return Stats{ s.OpenLatency.Delta(other.OpenLatency), s.CommitLatency.Delta(other.CommitLatency), s.IndexReadLatency.Delta(other.IndexReadLatency), s.IndexBytesPerRead.Delta(other.IndexBytesPerRead), s.GetLatency.Delta(other.GetLatency), s.ChunksPerGet.Delta(other.ChunksPerGet), s.FileReadLatency.Delta(other.FileReadLatency), s.FileBytesPerRead.Delta(other.FileBytesPerRead), s.S3ReadLatency.Delta(other.S3ReadLatency), s.S3BytesPerRead.Delta(other.S3BytesPerRead), s.MemReadLatency.Delta(other.MemReadLatency), s.MemBytesPerRead.Delta(other.MemBytesPerRead), s.DynamoReadLatency.Delta(other.DynamoReadLatency), s.DynamoBytesPerRead.Delta(other.DynamoBytesPerRead), s.HasLatency.Delta(other.HasLatency), s.AddressesPerHas.Delta(other.AddressesPerHas), s.PutLatency.Delta(other.PutLatency), s.PersistLatency.Delta(other.PersistLatency), s.BytesPerPersist.Delta(other.BytesPerPersist), s.ChunksPerPersist.Delta(other.ChunksPerPersist), s.CompressedChunkBytesPerPersist.Delta(other.CompressedChunkBytesPerPersist), s.UncompressedChunkBytesPerPersist.Delta(other.UncompressedChunkBytesPerPersist), s.ConjoinLatency.Delta(other.ConjoinLatency), s.BytesPerConjoin.Delta(other.BytesPerConjoin), s.ChunksPerConjoin.Delta(other.ChunksPerConjoin), s.TablesPerConjoin.Delta(other.TablesPerConjoin), s.ReadManifestLatency.Delta(other.ReadManifestLatency), s.WriteManifestLatency.Delta(other.WriteManifestLatency), } } func (s Stats) String() string { return fmt.Sprintf(`---NBS Stats--- OpenLatecy: %s CommitLatency: %s IndexReadLatency: %s IndexBytesPerRead: %s GetLatency: %s ChunksPerGet: %s FileReadLatency: %s FileBytesPerRead: %s S3ReadLatency: %s S3BytesPerRead: %s MemReadLatency: %s MemBytesPerRead: %s DynamoReadLatency: %s DynamoBytesPerRead: %s HasLatency: %s AddressesHasGet: %s PutLatency: %s PersistLatency: %s BytesPerPersist: %s ChunksPerPersist: %s CompressedChunkBytesPerPersist: %s UncompressedChunkBytesPerPersist: %s ConjoinLatency: %s BytesPerConjoin: %s ChunksPerConjoin: %s TablesPerConjoin: %s ReadManifestLatency: %s WriteManifestLatency: %s `, s.OpenLatency, s.CommitLatency, s.IndexReadLatency, s.IndexBytesPerRead, s.GetLatency, s.ChunksPerGet, s.FileReadLatency, s.FileBytesPerRead, s.S3ReadLatency, s.S3BytesPerRead, s.MemReadLatency, s.MemBytesPerRead, s.DynamoReadLatency, s.DynamoBytesPerRead, s.HasLatency, s.AddressesPerHas, s.PutLatency, s.PersistLatency, s.BytesPerPersist, s.ChunksPerPersist, s.CompressedChunkBytesPerPersist, s.UncompressedChunkBytesPerPersist, s.ConjoinLatency, s.BytesPerConjoin, s.ChunksPerConjoin, s.TablesPerConjoin, s.ReadManifestLatency, s.WriteManifestLatency) } ================================================ FILE: go/nbs/stats_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "io/ioutil" "os" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestStats(t *testing.T) { assert := assert.New(t) stats := func(store *NomsBlockStore) Stats { return store.Stats().(Stats) } dir, err := ioutil.TempDir("", "") assert.NoError(err) store := NewLocalStore(dir, testMemTableSize) assert.EqualValues(1, stats(store).OpenLatency.Samples()) // Opening a new store will still incur some read IO, to discover that the manifest doesn't exist assert.EqualValues(1, stats(store).ReadManifestLatency.Samples()) i1, i2, i3, i4, i5 := []byte("abc"), []byte("def"), []byte("ghi"), []byte("jkl"), []byte("mno") c1, c2, c3, c4, c5 := chunks.NewChunk(i1), chunks.NewChunk(i2), chunks.NewChunk(i3), chunks.NewChunk(i4), chunks.NewChunk(i5) // These just go to mem table, only operation stats store.Put(c1) store.Put(c2) store.Put(c3) assert.Equal(uint64(3), stats(store).PutLatency.Samples()) assert.Equal(uint64(0), stats(store).PersistLatency.Samples()) assert.True(store.Has(c1.Hash())) assert.True(store.Has(c2.Hash())) assert.True(store.Has(c3.Hash())) assert.Equal(uint64(3), stats(store).HasLatency.Samples()) assert.Equal(uint64(3), stats(store).AddressesPerHas.Sum()) assert.False(store.Get(c1.Hash()).IsEmpty()) assert.False(store.Get(c2.Hash()).IsEmpty()) assert.False(store.Get(c3.Hash()).IsEmpty()) assert.Equal(uint64(3), stats(store).GetLatency.Samples()) assert.Equal(uint64(0), stats(store).FileReadLatency.Samples()) assert.Equal(uint64(3), stats(store).ChunksPerGet.Sum()) store.Commit(store.Root(), store.Root()) // Commit will update the manifest assert.EqualValues(1, stats(store).WriteManifestLatency.Samples()) assert.EqualValues(1, stats(store).CommitLatency.Samples()) // Now we have write IO assert.Equal(uint64(1), stats(store).PersistLatency.Samples()) assert.Equal(uint64(3), stats(store).ChunksPerPersist.Sum()) assert.Equal(uint64(131), stats(store).BytesPerPersist.Sum()) // Now some gets that will incur read IO store.Get(c1.Hash()) store.Get(c2.Hash()) store.Get(c3.Hash()) assert.Equal(uint64(3), stats(store).FileReadLatency.Samples()) assert.Equal(uint64(27), stats(store).FileBytesPerRead.Sum()) // Try A GetMany chnx := make([]chunks.Chunk, 3) chnx[0] = c1 chnx[1] = c2 chnx[2] = c3 hashes := make(hash.HashSlice, len(chnx)) for i, c := range chnx { hashes[i] = c.Hash() } chunkChan := make(chan *chunks.Chunk, 3) store.GetMany(hashes.HashSet(), chunkChan) assert.Equal(uint64(4), stats(store).FileReadLatency.Samples()) assert.Equal(uint64(54), stats(store).FileBytesPerRead.Sum()) // Force a conjoin store.c = inlineConjoiner{2} store.Put(c4) store.Commit(store.Root(), store.Root()) store.Put(c5) store.Commit(store.Root(), store.Root()) assert.Equal(uint64(1), stats(store).ConjoinLatency.Samples()) // TODO: Once random conjoin hack is out, test other conjoin stats defer store.Close() defer os.RemoveAll(dir) } ================================================ FILE: go/nbs/store.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "fmt" "sort" "sync" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" humanize "github.com/dustin/go-humanize" ) // The root of a Noms Chunk Store is stored in a 'manifest', along with the // names of the tables that hold all the chunks in the store. The number of // chunks in each table is also stored in the manifest. const ( // StorageVersion is the version of the on-disk Noms Chunks Store data format. StorageVersion = "4" defaultMemTableSize uint64 = (1 << 20) * 128 // 128MB defaultMaxTables = 192 defaultIndexCacheSize = (1 << 20) * 8 // 8MB defaultManifestCacheSize = 1 << 23 // 8MB preflushChunkCount = 8 ) var ( cacheOnce = sync.Once{} globalIndexCache *indexCache makeManifestManager func(manifest) manifestManager globalFDCache *fdCache ) func makeGlobalCaches() { globalIndexCache = newIndexCache(defaultIndexCacheSize) globalFDCache = newFDCache(defaultMaxTables) manifestCache := newManifestCache(defaultManifestCacheSize) manifestLocks := newManifestLocks() makeManifestManager = func(m manifest) manifestManager { return manifestManager{m, manifestCache, manifestLocks} } } type NomsBlockStore struct { mm manifestManager p tablePersister c conjoiner mu sync.RWMutex // protects the following state mt *memTable tables tableSet upstream manifestContents mtSize uint64 putCount uint64 stats *Stats } func NewAWSStore(table, ns, bucket string, s3 s3svc, ddb ddbsvc, memTableSize uint64) *NomsBlockStore { cacheOnce.Do(makeGlobalCaches) readRateLimiter := make(chan struct{}, 32) p := &awsTablePersister{ s3, bucket, readRateLimiter, nil, &ddbTableStore{ddb, table, readRateLimiter, nil}, awsLimits{defaultS3PartSize, minS3PartSize, maxS3PartSize, maxDynamoItemSize, maxDynamoChunks}, globalIndexCache, } mm := makeManifestManager(newDynamoManifest(table, ns, ddb)) return newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, memTableSize) } func NewLocalStore(dir string, memTableSize uint64) *NomsBlockStore { cacheOnce.Do(makeGlobalCaches) d.PanicIfError(checkDir(dir)) mm := makeManifestManager(fileManifest{dir}) p := newFSTablePersister(dir, globalFDCache, globalIndexCache) return newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, memTableSize) } func newNomsBlockStore(mm manifestManager, p tablePersister, c conjoiner, memTableSize uint64) *NomsBlockStore { if memTableSize == 0 { memTableSize = defaultMemTableSize } nbs := &NomsBlockStore{ mm: mm, p: p, c: c, tables: newTableSet(p), upstream: manifestContents{vers: constants.NomsVersion}, mtSize: memTableSize, stats: NewStats(), } t1 := time.Now() defer nbs.stats.OpenLatency.SampleTimeSince(t1) if exists, contents := nbs.mm.Fetch(nbs.stats); exists { nbs.upstream = contents nbs.tables = nbs.tables.Rebase(contents.specs, nbs.stats) } return nbs } func newNomsBlockStoreWithContents(mm manifestManager, mc manifestContents, p tablePersister, c conjoiner, memTableSize uint64) *NomsBlockStore { if memTableSize == 0 { memTableSize = defaultMemTableSize } stats := NewStats() return &NomsBlockStore{ mm: mm, p: p, c: c, mtSize: memTableSize, stats: stats, upstream: mc, tables: newTableSet(p).Rebase(mc.specs, stats), } } func (nbs *NomsBlockStore) Put(c chunks.Chunk) { t1 := time.Now() a := addr(c.Hash()) d.PanicIfFalse(nbs.addChunk(a, c.Data())) nbs.putCount++ nbs.stats.PutLatency.SampleTimeSince(t1) } // TODO: figure out if there's a non-error reason for this to return false. If not, get rid of return value. func (nbs *NomsBlockStore) addChunk(h addr, data []byte) bool { nbs.mu.Lock() defer nbs.mu.Unlock() if nbs.mt == nil { nbs.mt = newMemTable(nbs.mtSize) } if !nbs.mt.addChunk(h, data) { nbs.tables = nbs.tables.Prepend(nbs.mt, nbs.stats) nbs.mt = newMemTable(nbs.mtSize) return nbs.mt.addChunk(h, data) } return true } func (nbs *NomsBlockStore) Get(h hash.Hash) chunks.Chunk { t1 := time.Now() defer func() { nbs.stats.GetLatency.SampleTimeSince(t1) nbs.stats.ChunksPerGet.Sample(1) }() a := addr(h) data, tables := func() (data []byte, tables chunkReader) { nbs.mu.RLock() defer nbs.mu.RUnlock() if nbs.mt != nil { data = nbs.mt.get(a, nbs.stats) } return data, nbs.tables }() if data != nil { return chunks.NewChunkWithHash(h, data) } if data := tables.get(a, nbs.stats); data != nil { return chunks.NewChunkWithHash(h, data) } return chunks.EmptyChunk } func (nbs *NomsBlockStore) GetMany(hashes hash.HashSet, foundChunks chan *chunks.Chunk) { t1 := time.Now() reqs := toGetRecords(hashes) defer func() { if len(hashes) > 0 { nbs.stats.GetLatency.SampleTimeSince(t1) nbs.stats.ChunksPerGet.Sample(uint64(len(reqs))) } }() wg := &sync.WaitGroup{} tables, remaining := func() (tables chunkReader, remaining bool) { nbs.mu.RLock() defer nbs.mu.RUnlock() tables = nbs.tables remaining = true if nbs.mt != nil { remaining = nbs.mt.getMany(reqs, foundChunks, nil, nbs.stats) } return }() if remaining { tables.getMany(reqs, foundChunks, wg, nbs.stats) wg.Wait() } } func toGetRecords(hashes hash.HashSet) []getRecord { reqs := make([]getRecord, len(hashes)) idx := 0 for h := range hashes { a := addr(h) reqs[idx] = getRecord{ a: &a, prefix: a.Prefix(), } idx++ } sort.Sort(getRecordByPrefix(reqs)) return reqs } func (nbs *NomsBlockStore) CalcReads(hashes hash.HashSet, blockSize uint64) (reads int, split bool) { reqs := toGetRecords(hashes) tables := func() (tables tableSet) { nbs.mu.RLock() defer nbs.mu.RUnlock() tables = nbs.tables return }() reads, split, remaining := tables.calcReads(reqs, blockSize) d.Chk.False(remaining) return } func (nbs *NomsBlockStore) extractChunks(chunkChan chan<- *chunks.Chunk) { ch := make(chan extractRecord, 1) go func() { defer close(ch) nbs.mu.RLock() defer nbs.mu.RUnlock() // Chunks in nbs.tables were inserted before those in nbs.mt, so extract chunks there _first_ nbs.tables.extract(ch) if nbs.mt != nil { nbs.mt.extract(ch) } }() for rec := range ch { c := chunks.NewChunkWithHash(hash.Hash(rec.a), rec.data) chunkChan <- &c } } func (nbs *NomsBlockStore) Count() uint32 { count, tables := func() (count uint32, tables chunkReader) { nbs.mu.RLock() defer nbs.mu.RUnlock() if nbs.mt != nil { count = nbs.mt.count() } return count, nbs.tables }() return count + tables.count() } func (nbs *NomsBlockStore) Has(h hash.Hash) bool { t1 := time.Now() defer func() { nbs.stats.HasLatency.SampleTimeSince(t1) nbs.stats.AddressesPerHas.Sample(1) }() a := addr(h) has, tables := func() (bool, chunkReader) { nbs.mu.RLock() defer nbs.mu.RUnlock() return nbs.mt != nil && nbs.mt.has(a), nbs.tables }() has = has || tables.has(a) return has } func (nbs *NomsBlockStore) HasMany(hashes hash.HashSet) hash.HashSet { t1 := time.Now() reqs := toHasRecords(hashes) tables, remaining := func() (tables chunkReader, remaining bool) { nbs.mu.RLock() defer nbs.mu.RUnlock() tables = nbs.tables remaining = true if nbs.mt != nil { remaining = nbs.mt.hasMany(reqs) } return }() if remaining { tables.hasMany(reqs) } if len(hashes) > 0 { nbs.stats.HasLatency.SampleTimeSince(t1) nbs.stats.AddressesPerHas.SampleLen(len(reqs)) } absent := hash.HashSet{} for _, r := range reqs { if !r.has { absent.Insert(hash.New(r.a[:])) } } return absent } func toHasRecords(hashes hash.HashSet) []hasRecord { reqs := make([]hasRecord, len(hashes)) idx := 0 for h := range hashes { a := addr(h) reqs[idx] = hasRecord{ a: &a, prefix: a.Prefix(), order: idx, } idx++ } sort.Sort(hasRecordByPrefix(reqs)) return reqs } func (nbs *NomsBlockStore) Rebase() { nbs.mu.Lock() defer nbs.mu.Unlock() if exists, contents := nbs.mm.Fetch(nbs.stats); exists { nbs.upstream = contents nbs.tables = nbs.tables.Rebase(contents.specs, nbs.stats) } } func (nbs *NomsBlockStore) Root() hash.Hash { nbs.mu.RLock() defer nbs.mu.RUnlock() return nbs.upstream.root } func (nbs *NomsBlockStore) Commit(current, last hash.Hash) bool { t1 := time.Now() defer nbs.stats.CommitLatency.SampleTimeSince(t1) anyPossiblyNovelChunks := func() bool { nbs.mu.Lock() defer nbs.mu.Unlock() return nbs.mt != nil || nbs.tables.Novel() > 0 } if !anyPossiblyNovelChunks() && current == last { nbs.Rebase() return true } func() { // This is unfortunate. We want to serialize commits to the same store // so that we avoid writing a bunch of unreachable small tables which result // from optismistic lock failures. However, this means that the time to // write tables is included in "commit" time and if all commits are // serialized, it means alot more waiting. Allow "non-trivial" tables to be // persisted outside of the commit-lock. nbs.mu.Lock() defer nbs.mu.Unlock() if nbs.mt != nil && nbs.mt.count() > preflushChunkCount { nbs.tables = nbs.tables.Prepend(nbs.mt, nbs.stats) nbs.mt = nil } }() nbs.mm.LockForUpdate() defer nbs.mm.UnlockForUpdate() for { if err := nbs.updateManifest(current, last); err == nil { return true } else if err == errOptimisticLockFailedRoot || err == errLastRootMismatch { return false } } } var ( errLastRootMismatch = fmt.Errorf("last does not match nbs.Root()") errOptimisticLockFailedRoot = fmt.Errorf("Root moved") errOptimisticLockFailedTables = fmt.Errorf("Tables changed") ) func (nbs *NomsBlockStore) updateManifest(current, last hash.Hash) error { nbs.mu.Lock() defer nbs.mu.Unlock() if nbs.upstream.root != last { return errLastRootMismatch } handleOptimisticLockFailure := func(upstream manifestContents) error { nbs.upstream = upstream nbs.tables = nbs.tables.Rebase(upstream.specs, nbs.stats) if last != upstream.root { return errOptimisticLockFailedRoot } return errOptimisticLockFailedTables } if cached, doomed := nbs.mm.updateWillFail(nbs.upstream.lock); doomed { // Pre-emptive optimistic lock failure. Someone else in-process moved to the root, the set of tables, or both out from under us. return handleOptimisticLockFailure(cached) } if nbs.mt != nil && nbs.mt.count() > 0 { nbs.tables = nbs.tables.Prepend(nbs.mt, nbs.stats) nbs.mt = nil } if nbs.c.ConjoinRequired(nbs.tables) { nbs.upstream = nbs.c.Conjoin(nbs.upstream, nbs.mm, nbs.p, nbs.stats) nbs.tables = nbs.tables.Rebase(nbs.upstream.specs, nbs.stats) return errOptimisticLockFailedTables } specs := nbs.tables.ToSpecs() newContents := manifestContents{ vers: constants.NomsVersion, root: current, lock: generateLockHash(current, specs), specs: specs, } upstream := nbs.mm.Update(nbs.upstream.lock, newContents, nbs.stats, nil) if newContents.lock != upstream.lock { // Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us. return handleOptimisticLockFailure(upstream) } nbs.upstream = newContents nbs.tables = nbs.tables.Flatten() return nil } func (nbs *NomsBlockStore) Version() string { return nbs.upstream.vers } func (nbs *NomsBlockStore) Close() (err error) { return } func (nbs *NomsBlockStore) Stats() interface{} { return *nbs.stats } func (nbs *NomsBlockStore) StatsSummary() string { nbs.mu.Lock() defer nbs.mu.Unlock() return fmt.Sprintf("Root: %s; Chunk Count %d; Physical Bytes %s", nbs.upstream.root, nbs.tables.count(), humanize.Bytes(nbs.tables.physicalLen())) } ================================================ FILE: go/nbs/table.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "crypto/sha512" "encoding/base32" "encoding/binary" "hash/crc32" "io" "sync" "github.com/attic-labs/noms/go/chunks" ) /* An NBS Table stores N byte slices ("chunks") which are addressed by a 20-byte hash of their contents. The footer encodes N as well as the total bytes consumed by all contained chunks. An Index maps each address to the position of its corresponding chunk. Addresses are logically sorted within the Index, but the corresponding chunks need not be. Table: +----------------+----------------+-----+----------------+-------+--------+ | Chunk Record 0 | Chunk Record 1 | ... | Chunk Record N | Index | Footer | +----------------+----------------+-----+----------------+-------+--------+ Chunk Record: +---------------------------+----------------+ | (Chunk Length) Chunk Data | (Uint32) CRC32 | +---------------------------+----------------+ Index: +------------+---------+----------+ | Prefix Map | Lengths | Suffixes | +------------+---------+----------+ Prefix Map: +--------------+--------------+-----+----------------+ | Prefix Tuple | Prefix Tuple | ... | Prefix Tuple N | +--------------+--------------+-----+----------------+ -The Prefix Map contains N Prefix Tuples. -Each Prefix Tuple corresponds to a unique Chunk Record in the Table. -The Prefix Tuples are sorted in increasing lexicographic order within the Prefix Map. -NB: THE SAME PREFIX MAY APPEAR MULTIPLE TIMES, as distinct Hashes (referring to distinct Chunks) may share the same Prefix. Prefix Tuple: +-----------------+------------------+ | (8) Hash Prefix | (Uint32) Ordinal | +-----------------+------------------+ -First 8 bytes of a Chunk's Hash -Ordinal is the 0-based ordinal position of the associated record within the sequence of chunk records, the associated Length within Lengths, and the associated Hash Suffix within Suffixes. Lengths: +-----------------+-----------------+-----+-------------------+ | (Uint32) Length | (Uint32) Length | ... | (Uint32) Length N | +-----------------+-----------------+-----+-------------------+ - Each Length is the length of a Chunk Record in this Table. - Length M must correspond to Chunk Record M for 0 <= M <= N Suffixes: +------------------+------------------+-----+--------------------+ | (12) Hash Suffix | (12) Hash Suffix | ... | (12) Hash Suffix N | +------------------+------------------+-----+--------------------+ - Each Hash Suffix is the last 12 bytes of a Chunk in this Table. - Hash Suffix M must correspond to Chunk Record M for 0 <= M <= N Footer: +----------------------+----------------------------------------+------------------+ | (Uint32) Chunk Count | (Uint64) Total Uncompressed Chunk Data | (8) Magic Number | +----------------------+----------------------------------------+------------------+ -Total Uncompressed Chunk Data is the sum of the uncompressed byte lengths of all contained chunk byte slices. -Magic Number is the first 8 bytes of the SHA256 hash of "https://github.com/attic-labs/nbs". NOTE: Unsigned integer quanities, hashes and hash suffix are all encoded big-endian Looking up Chunks in an NBS Table There are two phases to loading chunk data for a given Hash from an NBS Table: Checking for the chunk's presence, and fetching the chunk's bytes. When performing a has-check, only the first phase is necessary. Phase one: Chunk presence - Slice off the first 8 bytes of your Hash to create a Prefix - Since the Prefix Tuples in the Prefix Map are in lexicographic order, binary search the Prefix Map for the desired Prefix. - For all Prefix Tuples with a matching Prefix: - Load the Ordinal - Use the Ordinal to index into Suffixes - Check the Suffix of your Hash against the loaded Suffix - If they match, your chunk is in this Table in the Chunk Record indicated by Ordinal - If they don't match, continue to the next matching Prefix Tuple - If not found, your chunk is not in this Table. Phase two: Loading Chunk data - Take the Ordinal discovered in Phase one - Calculate the Offset of your desired Chunk Record: Sum(Lengths[0]...Lengths[Ordinal-1]) - Load Lengths[Ordinal] bytes from Table[Offset] - Check the first 4 bytes of the loaded data against the last 4 bytes of your desired Hash. They should match, and the rest of the data is your Chunk data. */ const ( addrSize uint64 = 20 addrPrefixSize uint64 = 8 addrSuffixSize = addrSize - addrPrefixSize uint64Size uint64 = 8 uint32Size uint64 = 4 ordinalSize uint64 = uint32Size lengthSize uint64 = uint32Size magicNumber = "\xff\xb5\xd8\xc2\x24\x63\xee\x50" magicNumberSize uint64 = uint64(len(magicNumber)) footerSize = uint32Size + uint64Size + magicNumberSize prefixTupleSize = addrPrefixSize + ordinalSize checksumSize uint64 = uint32Size maxChunkLengthSize uint64 = binary.MaxVarintLen64 maxChunkSize uint64 = 0xffffffff // Snappy won't compress slices bigger than this ) var crcTable = crc32.MakeTable(crc32.Castagnoli) func crc(b []byte) uint32 { return crc32.Update(0, crcTable, b) } func computeAddrDefault(data []byte) addr { r := sha512.Sum512(data) h := addr{} copy(h[:], r[:addrSize]) return h } var computeAddr = computeAddrDefault type addr [addrSize]byte var encoding = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv") func (a addr) String() string { return encoding.EncodeToString(a[:]) } func (a addr) Prefix() uint64 { return binary.BigEndian.Uint64(a[:]) } func (a addr) Checksum() uint32 { return binary.BigEndian.Uint32(a[addrSize-checksumSize:]) } func ParseAddr(b []byte) (h addr) { encoding.Decode(h[:], b) return } func ValidateAddr(s string) bool { _, err := encoding.DecodeString(s) return err == nil } type addrSlice []addr func (hs addrSlice) Len() int { return len(hs) } func (hs addrSlice) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:]) < 0 } func (hs addrSlice) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } type hasRecord struct { a *addr prefix uint64 order int has bool } type hasRecordByPrefix []hasRecord func (hs hasRecordByPrefix) Len() int { return len(hs) } func (hs hasRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } func (hs hasRecordByPrefix) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } type hasRecordByOrder []hasRecord func (hs hasRecordByOrder) Len() int { return len(hs) } func (hs hasRecordByOrder) Less(i, j int) bool { return hs[i].order < hs[j].order } func (hs hasRecordByOrder) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } type getRecord struct { a *addr prefix uint64 found bool } type getRecordByPrefix []getRecord func (hs getRecordByPrefix) Len() int { return len(hs) } func (hs getRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } func (hs getRecordByPrefix) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } type extractRecord struct { a addr data []byte err interface{} // only set when there was a panic during extraction. } type chunkReader interface { has(h addr) bool hasMany(addrs []hasRecord) bool get(h addr, stats *Stats) []byte getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) bool count() uint32 uncompressedLen() uint64 extract(chunks chan<- extractRecord) } type chunkReadPlanner interface { findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) getManyAtOffsets( reqs []getRecord, offsetRecords offsetRecSlice, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats, ) (remaining bool) } type chunkSource interface { chunkReader hash() addr calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool) // opens a Reader to the first byte of the chunkData segment of this table. reader() io.Reader index() tableIndex } type chunkSources []chunkSource ================================================ FILE: go/nbs/table_persister.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "crypto/sha512" "encoding/binary" "sort" "sync" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/sizecache" ) // tablePersister allows interaction with persistent storage. It provides // primitives for pushing the contents of a memTable to persistent storage, // opening persistent tables for reading, and conjoining a number of existing // chunkSources into one. A tablePersister implementation must be goroutine- // safe. type tablePersister interface { // Persist makes the contents of mt durable. Chunks already present in // |haver| may be dropped in the process. Persist(mt *memTable, haver chunkReader, stats *Stats) chunkSource // ConjoinAll conjoins all chunks in |sources| into a single, new // chunkSource. ConjoinAll(sources chunkSources, stats *Stats) chunkSource // Open a table named |name|, containing |chunkCount| chunks. Open(name addr, chunkCount uint32, stats *Stats) chunkSource } // indexCache provides sized storage for table indices. While getting and/or // setting the cache entry for a given table name, the caller MUST hold the // lock that for that entry. type indexCache struct { cache *sizecache.SizeCache cond *sync.Cond locked map[addr]struct{} } // Returns an indexCache which will burn roughly |size| bytes of memory. func newIndexCache(size uint64) *indexCache { return &indexCache{sizecache.New(size), sync.NewCond(&sync.Mutex{}), map[addr]struct{}{}} } // Take an exclusive lock on the cache entry for |name|. Callers must do this // before calling get(addr) or put(addr, index) func (sic *indexCache) lockEntry(name addr) { sic.cond.L.Lock() defer sic.cond.L.Unlock() for { if _, present := sic.locked[name]; !present { sic.locked[name] = struct{}{} break } sic.cond.Wait() } } func (sic *indexCache) unlockEntry(name addr) { sic.cond.L.Lock() defer sic.cond.L.Unlock() _, ok := sic.locked[name] d.PanicIfFalse(ok) delete(sic.locked, name) sic.cond.Broadcast() } func (sic *indexCache) get(name addr) (tableIndex, bool) { if idx, found := sic.cache.Get(name); found { return idx.(tableIndex), true } return tableIndex{}, false } func (sic *indexCache) put(name addr, idx tableIndex) { indexSize := uint64(idx.chunkCount) * (addrSize + ordinalSize + lengthSize + uint64Size) sic.cache.Add(name, indexSize, idx) } type chunkSourcesByAscendingCount chunkSources func (csbc chunkSourcesByAscendingCount) Len() int { return len(csbc) } func (csbc chunkSourcesByAscendingCount) Less(i, j int) bool { srcI, srcJ := csbc[i], csbc[j] if srcI.count() == srcJ.count() { hi, hj := srcI.hash(), srcJ.hash() return bytes.Compare(hi[:], hj[:]) < 0 } return srcI.count() < srcJ.count() } func (csbc chunkSourcesByAscendingCount) Swap(i, j int) { csbc[i], csbc[j] = csbc[j], csbc[i] } type chunkSourcesByDescendingDataSize []sourceWithSize func (csbds chunkSourcesByDescendingDataSize) Len() int { return len(csbds) } func (csbds chunkSourcesByDescendingDataSize) Less(i, j int) bool { swsI, swsJ := csbds[i], csbds[j] if swsI.dataLen == swsJ.dataLen { hi, hj := swsI.source.hash(), swsJ.source.hash() return bytes.Compare(hi[:], hj[:]) < 0 } return swsI.dataLen > swsJ.dataLen } func (csbds chunkSourcesByDescendingDataSize) Swap(i, j int) { csbds[i], csbds[j] = csbds[j], csbds[i] } type sourceWithSize struct { source chunkSource dataLen uint64 } type compactionPlan struct { sources chunkSourcesByDescendingDataSize mergedIndex []byte chunkCount uint32 totalCompressedData uint64 } func (cp compactionPlan) lengths() []byte { lengthsStart := uint64(cp.chunkCount) * prefixTupleSize return cp.mergedIndex[lengthsStart : lengthsStart+uint64(cp.chunkCount)*lengthSize] } func (cp compactionPlan) suffixes() []byte { suffixesStart := uint64(cp.chunkCount) * (prefixTupleSize + lengthSize) return cp.mergedIndex[suffixesStart : suffixesStart+uint64(cp.chunkCount)*addrSuffixSize] } func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan) { var totalUncompressedData uint64 for _, src := range sources { totalUncompressedData += src.uncompressedLen() index := src.index() plan.chunkCount += index.chunkCount // Calculate the amount of chunk data in |src| chunkDataLen := calcChunkDataLen(index) plan.sources = append(plan.sources, sourceWithSize{src, chunkDataLen}) plan.totalCompressedData += chunkDataLen } sort.Sort(plan.sources) lengthsPos := lengthsOffset(plan.chunkCount) suffixesPos := suffixesOffset(plan.chunkCount) plan.mergedIndex = make([]byte, indexSize(plan.chunkCount)+footerSize) prefixIndexRecs := make(prefixIndexSlice, 0, plan.chunkCount) var ordinalOffset uint32 for _, sws := range plan.sources { index := sws.source.index() // Add all the prefix tuples from this index to the list of all prefixIndexRecs, modifying the ordinals such that all entries from the 1st item in sources come after those in the 0th and so on. for j, prefix := range index.prefixes { rec := prefixIndexRec{prefix: prefix, order: ordinalOffset + index.ordinals[j]} prefixIndexRecs = append(prefixIndexRecs, rec) } ordinalOffset += sws.source.count() // TODO: copy the lengths and suffixes as a byte-copy from src BUG #3438 // Bring over the lengths block, in order for _, length := range index.lengths { binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], length) lengthsPos += lengthSize } // Bring over the suffixes block, in order n := copy(plan.mergedIndex[suffixesPos:], index.suffixes) d.Chk.True(n == len(index.suffixes)) suffixesPos += uint64(n) } // Sort all prefixTuples by hash and then insert them starting at the beginning of plan.mergedIndex sort.Sort(prefixIndexRecs) var pfxPos uint64 for _, pi := range prefixIndexRecs { binary.BigEndian.PutUint64(plan.mergedIndex[pfxPos:], pi.prefix) pfxPos += addrPrefixSize binary.BigEndian.PutUint32(plan.mergedIndex[pfxPos:], pi.order) pfxPos += ordinalSize } writeFooter(plan.mergedIndex[uint64(len(plan.mergedIndex))-footerSize:], plan.chunkCount, totalUncompressedData) stats.BytesPerConjoin.Sample(uint64(plan.totalCompressedData) + uint64(len(plan.mergedIndex))) return plan } func nameFromSuffixes(suffixes []byte) (name addr) { sha := sha512.New() sha.Write(suffixes) var h []byte h = sha.Sum(h) // Appends hash to h copy(name[:], h) return } func calcChunkDataLen(index tableIndex) uint64 { return index.offsets[index.chunkCount-1] + uint64(index.lengths[index.chunkCount-1]) } ================================================ FILE: go/nbs/table_persister_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "github.com/stretchr/testify/assert" ) func TestPlanCompaction(t *testing.T) { assert := assert.New(t) tableContents := [][][]byte{ {[]byte("hello2"), []byte("goodbye2"), []byte("badbye2")}, {[]byte("red"), []byte("blue")}, {[]byte("solo")}, } var sources chunkSources var dataLens []uint64 var totalUnc uint64 for _, content := range tableContents { for _, chnk := range content { totalUnc += uint64(len(chnk)) } data, name := buildTable(content) src := chunkSourceAdapter{newTableReader(parseTableIndex(data), tableReaderAtFromBytes(data), fileBlockSize), name} dataLens = append(dataLens, uint64(len(data))-indexSize(src.count())-footerSize) sources = append(sources, src) } plan := planConjoin(sources, &Stats{}) var totalChunks uint32 for i, src := range sources { assert.Equal(dataLens[i], plan.sources[i].dataLen) totalChunks += src.count() } idx := parseTableIndex(plan.mergedIndex) assert.Equal(totalChunks, idx.chunkCount) assert.Equal(totalUnc, idx.totalUncompressedData) tr := newTableReader(idx, tableReaderAtFromBytes(nil), fileBlockSize) for _, content := range tableContents { assertChunksInReader(content, tr, assert) } } ================================================ FILE: go/nbs/table_reader.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "bytes" "encoding/binary" "io" "sort" "sync" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/golang/snappy" ) type tableIndex struct { chunkCount uint32 totalUncompressedData uint64 prefixes, offsets []uint64 lengths, ordinals []uint32 suffixes []byte } type tableReaderAt interface { ReadAtWithStats(p []byte, off int64, stats *Stats) (n int, err error) } // tableReader implements get & has queries against a single nbs table. goroutine safe. // |blockSize| refers to the block-size of the underlying storage. We assume that, each time we read data, we actually have to read in blocks of this size. So, we're willing to tolerate up to |blockSize| overhead each time we read a chunk, if it helps us group more chunks together into a single read request to backing storage. type tableReader struct { tableIndex r tableReaderAt blockSize uint64 } // parses a valid nbs tableIndex from a byte stream. |buff| must end with an NBS index and footer, though it may contain an unspecified number of bytes before that data. |tableIndex| doesn't keep alive any references to |buff|. func parseTableIndex(buff []byte) tableIndex { pos := uint64(len(buff)) // footer pos -= magicNumberSize d.Chk.True(string(buff[pos:]) == magicNumber) // total uncompressed chunk data pos -= uint64Size totalUncompressedData := binary.BigEndian.Uint64(buff[pos:]) pos -= uint32Size chunkCount := binary.BigEndian.Uint32(buff[pos:]) // index suffixesSize := uint64(chunkCount) * addrSuffixSize pos -= suffixesSize suffixes := make([]byte, suffixesSize) copy(suffixes, buff[pos:]) lengthsSize := uint64(chunkCount) * lengthSize pos -= lengthsSize lengths, offsets := computeOffsets(chunkCount, buff[pos:pos+lengthsSize]) tuplesSize := uint64(chunkCount) * prefixTupleSize pos -= tuplesSize prefixes, ordinals := computePrefixes(chunkCount, buff[pos:pos+tuplesSize]) return tableIndex{ chunkCount, totalUncompressedData, prefixes, offsets, lengths, ordinals, suffixes, } } func computeOffsets(count uint32, buff []byte) (lengths []uint32, offsets []uint64) { lengths = make([]uint32, count) offsets = make([]uint64, count) lengths[0] = binary.BigEndian.Uint32(buff) for i := uint64(1); i < uint64(count); i++ { lengths[i] = binary.BigEndian.Uint32(buff[i*lengthSize:]) offsets[i] = offsets[i-1] + uint64(lengths[i-1]) } return } func computePrefixes(count uint32, buff []byte) (prefixes []uint64, ordinals []uint32) { prefixes = make([]uint64, count) ordinals = make([]uint32, count) for i := uint64(0); i < uint64(count); i++ { idx := i * prefixTupleSize prefixes[i] = binary.BigEndian.Uint64(buff[idx:]) ordinals[i] = binary.BigEndian.Uint32(buff[idx+addrPrefixSize:]) } return } func (ti tableIndex) prefixIdxToOrdinal(idx uint32) uint32 { return ti.ordinals[idx] } // returns the first position in |tr.prefixes| whose value == |prefix|. Returns |tr.chunkCount| // if absent func (ti tableIndex) prefixIdx(prefix uint64) (idx uint32) { // NOTE: The golang impl of sort.Search is basically inlined here. This method can be called in // an extremely tight loop and inlining the code was a significant perf improvement. idx, j := 0, ti.chunkCount for idx < j { h := idx + (j-idx)/2 // avoid overflow when computing h // i ≤ h < j if ti.prefixes[h] < prefix { idx = h + 1 // preserves f(i-1) == false } else { j = h // preserves f(j) == true } } return } // Return true IFF the suffix at insertion order |ordinal| matches the address |a|. func (ti tableIndex) ordinalSuffixMatches(ordinal uint32, h addr) bool { li := uint64(ordinal) * addrSuffixSize return bytes.Compare(h[addrPrefixSize:], ti.suffixes[li:li+addrSuffixSize]) == 0 } // returns the ordinal of |h| if present. returns |ti.chunkCount| if absent func (ti tableIndex) lookupOrdinal(h addr) uint32 { prefix := h.Prefix() for idx := ti.prefixIdx(prefix); idx < ti.chunkCount && ti.prefixes[idx] == prefix; idx++ { ordinal := ti.prefixIdxToOrdinal(idx) if ti.ordinalSuffixMatches(ordinal, h) { return ordinal } } return ti.chunkCount } // newTableReader parses a valid nbs table byte stream and returns a reader. buff must end with an NBS index and footer, though it may contain an unspecified number of bytes before that data. r should allow retrieving any desired range of bytes from the table. func newTableReader(index tableIndex, r tableReaderAt, blockSize uint64) tableReader { return tableReader{index, r, blockSize} } // Scan across (logically) two ordered slices of address prefixes. func (tr tableReader) hasMany(addrs []hasRecord) (remaining bool) { // TODO: Use findInIndex if (tr.chunkCount - len(addrs)*Log2(tr.chunkCount)) > (tr.chunkCount - len(addrs)) filterIdx := uint32(0) filterLen := uint32(len(tr.prefixes)) for i, addr := range addrs { if addr.has { continue } for filterIdx < filterLen && addr.prefix > tr.prefixes[filterIdx] { filterIdx++ } if filterIdx >= filterLen { remaining = true return } if addr.prefix != tr.prefixes[filterIdx] { remaining = true continue } // prefixes are equal, so locate and compare against the corresponding suffix for j := filterIdx; j < filterLen && addr.prefix == tr.prefixes[j]; j++ { if tr.ordinalSuffixMatches(tr.prefixIdxToOrdinal(j), *addr.a) { addrs[i].has = true break } } if !addrs[i].has { remaining = true } } return } func (tr tableReader) count() uint32 { return tr.chunkCount } func (tr tableReader) uncompressedLen() uint64 { return tr.totalUncompressedData } func (tr tableReader) index() tableIndex { return tr.tableIndex } // returns true iff |h| can be found in this table. func (tr tableReader) has(h addr) bool { ordinal := tr.lookupOrdinal(h) return ordinal < tr.count() } // returns the storage associated with |h|, iff present. Returns nil if absent. On success, // the returned byte slice directly references the underlying storage. func (tr tableReader) get(h addr, stats *Stats) (data []byte) { ordinal := tr.lookupOrdinal(h) if ordinal == tr.count() { return } offset := tr.offsets[ordinal] length := uint64(tr.lengths[ordinal]) buff := make([]byte, length) // TODO: Avoid this allocation for every get n, err := tr.r.ReadAtWithStats(buff, int64(offset), stats) d.Chk.NoError(err) d.Chk.True(n == int(length)) data = tr.parseChunk(buff) d.Chk.True(data != nil) return } type offsetRec struct { a *addr ordinal uint32 offset uint64 } type offsetRecSlice []offsetRec func (hs offsetRecSlice) Len() int { return len(hs) } func (hs offsetRecSlice) Less(i, j int) bool { return hs[i].offset < hs[j].offset } func (hs offsetRecSlice) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } func (tr tableReader) readAtOffsets( readStart, readEnd uint64, reqs []getRecord, offsets offsetRecSlice, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats, ) { readLength := readEnd - readStart buff := make([]byte, readLength) n, err := tr.r.ReadAtWithStats(buff, int64(readStart), stats) d.Chk.NoError(err) d.Chk.True(uint64(n) == readLength) for _, rec := range offsets { d.Chk.True(rec.offset >= readStart) localStart := rec.offset - readStart localEnd := localStart + uint64(tr.lengths[rec.ordinal]) d.Chk.True(localEnd <= readLength) data := tr.parseChunk(buff[localStart:localEnd]) c := chunks.NewChunkWithHash(hash.Hash(*rec.a), data) foundChunks <- &c } wg.Done() } // getMany retrieves multiple stored blocks and optimizes by attempting to read in larger physical // blocks which contain multiple stored blocks. |reqs| must be sorted by address prefix. func (tr tableReader) getMany( reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats, ) (remaining bool) { // Pass #1: Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set // of table locations which must be read in order to satisfy the getMany operation. offsetRecords, remaining := tr.findOffsets(reqs) tr.getManyAtOffsets(reqs, offsetRecords, foundChunks, wg, stats) return remaining } func (tr tableReader) getManyAtOffsets( reqs []getRecord, offsetRecords offsetRecSlice, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats, ) { // Now |offsetRecords| contains all locations within the table which must be search (note // that there may be duplicates of a particular location). Sort by offset and scan forward, // grouping sequences of reads into large physical reads. var batch offsetRecSlice var readStart, readEnd uint64 for i := 0; i < len(offsetRecords); { rec := offsetRecords[i] length := tr.lengths[rec.ordinal] if batch == nil { batch = make(offsetRecSlice, 1) batch[0] = offsetRecords[i] readStart = rec.offset readEnd = readStart + uint64(length) i++ continue } if newReadEnd, canRead := canReadAhead(rec, tr.lengths[rec.ordinal], readStart, readEnd, tr.blockSize); canRead { batch = append(batch, rec) readEnd = newReadEnd i++ continue } wg.Add(1) go tr.readAtOffsets(readStart, readEnd, reqs, batch, foundChunks, wg, stats) batch = nil } if batch != nil { wg.Add(1) go tr.readAtOffsets(readStart, readEnd, reqs, batch, foundChunks, wg, stats) batch = nil } return } // findOffsets iterates over |reqs| and |tr.prefixes| (both sorted by // address) to build the set of table locations which must be read in order to // find each chunk specified by |reqs|. If this table contains all requested // chunks remaining will be set to false upon return. If some are not here, // then remaining will be true. The result offsetRecSlice is sorted in offset // order. func (tr tableReader) findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) { filterIdx := uint32(0) filterLen := uint32(len(tr.prefixes)) ors = make(offsetRecSlice, 0, len(reqs)) // Iterate over |reqs| and |tr.prefixes| (both sorted by address) and build the set // of table locations which must be read in order to satisfy |reqs|. for i, req := range reqs { if req.found { continue } // advance within the prefixes until we reach one which is >= req.prefix for filterIdx < filterLen && tr.prefixes[filterIdx] < req.prefix { filterIdx++ } if filterIdx >= filterLen { remaining = true // last prefix visited. break } if req.prefix != tr.prefixes[filterIdx] { remaining = true continue } // record all offsets within the table which contain the data required. for j := filterIdx; j < filterLen && req.prefix == tr.prefixes[j]; j++ { if tr.ordinalSuffixMatches(tr.prefixIdxToOrdinal(j), *req.a) { reqs[i].found = true ors = append(ors, offsetRec{req.a, tr.ordinals[j], tr.offsets[tr.ordinals[j]]}) } } } sort.Sort(ors) return ors, remaining } func canReadAhead(fRec offsetRec, fLength uint32, readStart, readEnd, blockSize uint64) (newEnd uint64, canRead bool) { if fRec.offset < readEnd { // |offsetRecords| will contain an offsetRecord for *every* chunkRecord whose address // prefix matches the prefix of a requested address. If the set of requests contains // addresses which share a common prefix, then it's possible for multiple offsetRecords // to reference the same table offset position. In that case, we'll see sequential // offsetRecords with the same fRec.offset. return readEnd, true } if fRec.offset-readEnd > blockSize { return readEnd, false } return fRec.offset + uint64(fLength), true } // Fetches the byte stream of data logically encoded within the table starting at |pos|. func (tr tableReader) parseChunk(buff []byte) []byte { dataLen := uint64(len(buff)) - checksumSize chksum := binary.BigEndian.Uint32(buff[dataLen:]) d.Chk.True(chksum == crc(buff[:dataLen])) data, err := snappy.Decode(nil, buff[:dataLen]) d.Chk.NoError(err) return data } func (tr tableReader) calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool) { var offsetRecords offsetRecSlice // Pass #1: Build the set of table locations which must be read in order to find all the elements of |reqs| which are present in this table. offsetRecords, remaining = tr.findOffsets(reqs) // Now |offsetRecords| contains all locations within the table which must // be searched (note that there may be duplicates of a particular // location). Scan forward, grouping sequences of reads into large physical // reads. var readStart, readEnd uint64 readStarted := false for i := 0; i < len(offsetRecords); { rec := offsetRecords[i] length := tr.lengths[rec.ordinal] if !readStarted { readStarted = true reads++ readStart = rec.offset readEnd = readStart + uint64(length) i++ continue } if newReadEnd, canRead := canReadAhead(rec, tr.lengths[rec.ordinal], readStart, readEnd, tr.blockSize); canRead { readEnd = newReadEnd i++ continue } readStarted = false } return } func (tr tableReader) extract(chunks chan<- extractRecord) { // Build reverse lookup table from ordinal -> chunk hash hashes := make(addrSlice, len(tr.prefixes)) for idx, prefix := range tr.prefixes { ordinal := tr.prefixIdxToOrdinal(uint32(idx)) binary.BigEndian.PutUint64(hashes[ordinal][:], prefix) li := uint64(ordinal) * addrSuffixSize copy(hashes[ordinal][addrPrefixSize:], tr.suffixes[li:li+addrSuffixSize]) } chunkLen := tr.offsets[tr.chunkCount-1] + uint64(tr.lengths[tr.chunkCount-1]) buff := make([]byte, chunkLen) n, err := tr.r.ReadAtWithStats(buff, int64(tr.offsets[0]), &Stats{}) d.Chk.NoError(err) d.Chk.True(uint64(n) == chunkLen) sendChunk := func(i uint32) { localOffset := tr.offsets[i] - tr.offsets[0] chunks <- extractRecord{a: hashes[i], data: tr.parseChunk(buff[localOffset : localOffset+uint64(tr.lengths[i])])} } for i := uint32(0); i < tr.chunkCount; i++ { sendChunk(i) } } func (tr tableReader) reader() io.Reader { return &readerAdapter{tr.r, 0} } type readerAdapter struct { rat tableReaderAt off int64 } func (ra *readerAdapter) Read(p []byte) (n int, err error) { n, err = ra.rat.ReadAtWithStats(p, ra.off, &Stats{}) ra.off += int64(n) return } ================================================ FILE: go/nbs/table_set.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "sync" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" ) const concurrentCompactions = 5 func newTableSet(persister tablePersister) tableSet { return tableSet{p: persister, rl: make(chan struct{}, concurrentCompactions)} } // tableSet is an immutable set of persistable chunkSources. type tableSet struct { novel, upstream chunkSources p tablePersister rl chan struct{} } func (ts tableSet) has(h addr) bool { f := func(css chunkSources) bool { for _, haver := range css { if haver.has(h) { return true } } return false } return f(ts.novel) || f(ts.upstream) } func (ts tableSet) hasMany(addrs []hasRecord) (remaining bool) { f := func(css chunkSources) (remaining bool) { for _, haver := range css { if !haver.hasMany(addrs) { return false } } return true } return f(ts.novel) && f(ts.upstream) } func (ts tableSet) get(h addr, stats *Stats) []byte { f := func(css chunkSources) []byte { for _, haver := range css { if data := haver.get(h, stats); data != nil { return data } } return nil } if data := f(ts.novel); data != nil { return data } return f(ts.upstream) } func (ts tableSet) getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) (remaining bool) { f := func(css chunkSources) (remaining bool) { for _, haver := range css { if rp, ok := haver.(chunkReadPlanner); ok { offsets, remaining := rp.findOffsets(reqs) go rp.getManyAtOffsets(reqs, offsets, foundChunks, wg, stats) if !remaining { return false } continue } if !haver.getMany(reqs, foundChunks, wg, stats) { return false } } return true } return f(ts.novel) && f(ts.upstream) } func (ts tableSet) calcReads(reqs []getRecord, blockSize uint64) (reads int, split, remaining bool) { f := func(css chunkSources) (int, bool, bool) { reads, split := 0, false for _, haver := range css { rds, rmn := haver.calcReads(reqs, blockSize) reads += rds if !rmn { return reads, split, false } split = true } return reads, split, true } reads, split, remaining = f(ts.novel) if remaining { var rds int rds, split, remaining = f(ts.upstream) reads += rds } return reads, split, remaining } func (ts tableSet) count() uint32 { f := func(css chunkSources) (count uint32) { for _, haver := range css { count += haver.count() } return } return f(ts.novel) + f(ts.upstream) } func (ts tableSet) uncompressedLen() uint64 { f := func(css chunkSources) (data uint64) { for _, haver := range css { data += haver.uncompressedLen() } return } return f(ts.novel) + f(ts.upstream) } func (ts tableSet) physicalLen() uint64 { f := func(css chunkSources) (data uint64) { for _, haver := range css { index := haver.index() data += indexSize(index.chunkCount) data += index.offsets[index.chunkCount-1] + (uint64(index.lengths[index.chunkCount-1])) } return } return f(ts.novel) + f(ts.upstream) } // Size returns the number of tables in this tableSet. func (ts tableSet) Size() int { return len(ts.novel) + len(ts.upstream) } // Novel returns the number of tables containing novel chunks in this // tableSet. func (ts tableSet) Novel() int { return len(ts.novel) } // Upstream returns the number of known-persisted tables in this tableSet. func (ts tableSet) Upstream() int { return len(ts.upstream) } // Prepend adds a memTable to an existing tableSet, compacting |mt| and // returning a new tableSet with newly compacted table added. func (ts tableSet) Prepend(mt *memTable, stats *Stats) tableSet { newTs := tableSet{ novel: make(chunkSources, len(ts.novel)+1), upstream: make(chunkSources, len(ts.upstream)), p: ts.p, rl: ts.rl, } newTs.novel[0] = newPersistingChunkSource(mt, ts, ts.p, ts.rl, stats) copy(newTs.novel[1:], ts.novel) copy(newTs.upstream, ts.upstream) return newTs } func (ts tableSet) extract(chunks chan<- extractRecord) { // Since new tables are _prepended_ to a tableSet, extracting chunks in insertOrder requires iterating ts.upstream back to front, followed by ts.novel. for i := len(ts.upstream) - 1; i >= 0; i-- { ts.upstream[i].extract(chunks) } for i := len(ts.novel) - 1; i >= 0; i-- { ts.novel[i].extract(chunks) } } // Flatten returns a new tableSet with |upstream| set to the union of ts.novel // and ts.upstream. func (ts tableSet) Flatten() (flattened tableSet) { flattened = tableSet{ upstream: make(chunkSources, 0, ts.Size()), p: ts.p, rl: ts.rl, } for _, src := range ts.novel { if src.count() > 0 { flattened.upstream = append(flattened.upstream, src) } } flattened.upstream = append(flattened.upstream, ts.upstream...) return } // Rebase returns a new tableSet holding the novel tables managed by |ts| and // those specified by |specs|. func (ts tableSet) Rebase(specs []tableSpec, stats *Stats) tableSet { merged := tableSet{ novel: make(chunkSources, 0, len(ts.novel)), upstream: make(chunkSources, 0, len(specs)), p: ts.p, rl: ts.rl, } // Rebase the novel tables, skipping those that are actually empty (usually due to de-duping during table compaction) for _, t := range ts.novel { if t.count() > 0 { merged.novel = append(merged.novel, t) } } // Create a list of tables to open so we can open them in parallel. tablesToOpen := map[addr]tableSpec{} for _, spec := range specs { if _, present := tablesToOpen[spec.name]; !present { // Filter out dups tablesToOpen[spec.name] = spec } } // Open all the new upstream tables concurrently merged.upstream = make(chunkSources, len(tablesToOpen)) wg := &sync.WaitGroup{} i := 0 for _, spec := range tablesToOpen { wg.Add(1) go func(idx int, spec tableSpec) { merged.upstream[idx] = ts.p.Open(spec.name, spec.chunkCount, stats) wg.Done() }(i, spec) i++ } wg.Wait() return merged } func (ts tableSet) ToSpecs() []tableSpec { tableSpecs := make([]tableSpec, 0, ts.Size()) for _, src := range ts.novel { if src.count() > 0 { tableSpecs = append(tableSpecs, tableSpec{src.hash(), src.count()}) } } for _, src := range ts.upstream { d.Chk.True(src.count() > 0) tableSpecs = append(tableSpecs, tableSpec{src.hash(), src.count()}) } return tableSpecs } ================================================ FILE: go/nbs/table_set_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "testing" "github.com/stretchr/testify/assert" ) var testChunks = [][]byte{[]byte("hello2"), []byte("goodbye2"), []byte("badbye2")} func TestTableSetPrependEmpty(t *testing.T) { ts := newFakeTableSet().Prepend(newMemTable(testMemTableSize), &Stats{}) assert.Empty(t, ts.ToSpecs()) } func TestTableSetPrepend(t *testing.T) { assert := assert.New(t) ts := newFakeTableSet() assert.Empty(ts.ToSpecs()) mt := newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[0]), testChunks[0]) ts = ts.Prepend(mt, &Stats{}) firstSpecs := ts.ToSpecs() assert.Len(firstSpecs, 1) mt = newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[1]), testChunks[1]) mt.addChunk(computeAddr(testChunks[2]), testChunks[2]) ts = ts.Prepend(mt, &Stats{}) secondSpecs := ts.ToSpecs() assert.Len(secondSpecs, 2) assert.Equal(firstSpecs, secondSpecs[1:]) } func TestTableSetToSpecsExcludesEmptyTable(t *testing.T) { assert := assert.New(t) ts := newFakeTableSet() assert.Empty(ts.ToSpecs()) mt := newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[0]), testChunks[0]) ts = ts.Prepend(mt, &Stats{}) mt = newMemTable(testMemTableSize) ts = ts.Prepend(mt, &Stats{}) mt = newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[1]), testChunks[1]) mt.addChunk(computeAddr(testChunks[2]), testChunks[2]) ts = ts.Prepend(mt, &Stats{}) specs := ts.ToSpecs() assert.Len(specs, 2) } func TestTableSetFlattenExcludesEmptyTable(t *testing.T) { assert := assert.New(t) ts := newFakeTableSet() assert.Empty(ts.ToSpecs()) mt := newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[0]), testChunks[0]) ts = ts.Prepend(mt, &Stats{}) mt = newMemTable(testMemTableSize) ts = ts.Prepend(mt, &Stats{}) mt = newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[1]), testChunks[1]) mt.addChunk(computeAddr(testChunks[2]), testChunks[2]) ts = ts.Prepend(mt, &Stats{}) ts = ts.Flatten() assert.EqualValues(ts.Size(), 2) } func TestTableSetExtract(t *testing.T) { assert := assert.New(t) ts := newFakeTableSet() assert.Empty(ts.ToSpecs()) // Put in one table mt := newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[0]), testChunks[0]) ts = ts.Prepend(mt, &Stats{}) // Put in a second mt = newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[1]), testChunks[1]) mt.addChunk(computeAddr(testChunks[2]), testChunks[2]) ts = ts.Prepend(mt, &Stats{}) chunkChan := make(chan extractRecord) go func() { defer close(chunkChan); ts.extract(chunkChan) }() i := 0 for rec := range chunkChan { a := computeAddr(testChunks[i]) assert.NotNil(rec.data, "Nothing for", a) assert.Equal(testChunks[i], rec.data, "Item %d: %s != %s", i, string(testChunks[i]), string(rec.data)) assert.Equal(a, rec.a) i++ } } func TestTableSetRebase(t *testing.T) { assert := assert.New(t) persister := newFakeTablePersister() insert := func(ts tableSet, chunks ...[]byte) tableSet { for _, c := range chunks { mt := newMemTable(testMemTableSize) mt.addChunk(computeAddr(c), c) ts = ts.Prepend(mt, &Stats{}) } return ts } fullTS := newTableSet(persister) assert.Empty(fullTS.ToSpecs()) fullTS = insert(fullTS, testChunks...) fullTS = fullTS.Flatten() ts := newTableSet(persister) ts = insert(ts, testChunks[0]) assert.Equal(1, ts.Size()) ts = ts.Flatten() ts = insert(ts, []byte("novel")) ts = ts.Rebase(fullTS.ToSpecs(), nil) assert.Equal(4, ts.Size()) } func TestTableSetPhysicalLen(t *testing.T) { assert := assert.New(t) ts := newFakeTableSet() assert.Empty(ts.ToSpecs()) mt := newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[0]), testChunks[0]) ts = ts.Prepend(mt, &Stats{}) mt = newMemTable(testMemTableSize) mt.addChunk(computeAddr(testChunks[1]), testChunks[1]) mt.addChunk(computeAddr(testChunks[2]), testChunks[2]) ts = ts.Prepend(mt, &Stats{}) assert.True(ts.physicalLen() > indexSize(ts.count())) } ================================================ FILE: go/nbs/table_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "encoding/binary" "fmt" "sort" "testing" "sync" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func buildTable(chunks [][]byte) ([]byte, addr) { totalData := uint64(0) for _, chunk := range chunks { totalData += uint64(len(chunk)) } capacity := maxTableSize(uint64(len(chunks)), totalData) buff := make([]byte, capacity) tw := newTableWriter(buff, nil) for _, chunk := range chunks { tw.addChunk(computeAddr(chunk), chunk) } length, blockHash := tw.finish() return buff[:length], blockHash } func TestSimple(t *testing.T) { assert := assert.New(t) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, _ := buildTable(chunks) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), fileBlockSize) assertChunksInReader(chunks, tr, assert) assert.Equal(string(chunks[0]), string(tr.get(computeAddr(chunks[0]), &Stats{}))) assert.Equal(string(chunks[1]), string(tr.get(computeAddr(chunks[1]), &Stats{}))) assert.Equal(string(chunks[2]), string(tr.get(computeAddr(chunks[2]), &Stats{}))) notPresent := [][]byte{ []byte("yo"), []byte("do"), []byte("so much to do"), } assertChunksNotInReader(notPresent, tr, assert) assert.NotEqual(string(notPresent[0]), string(tr.get(computeAddr(notPresent[0]), &Stats{}))) assert.NotEqual(string(notPresent[1]), string(tr.get(computeAddr(notPresent[1]), &Stats{}))) assert.NotEqual(string(notPresent[2]), string(tr.get(computeAddr(notPresent[2]), &Stats{}))) } func assertChunksInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) { for _, c := range chunks { assert.True(r.has(computeAddr(c))) } } func assertChunksNotInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) { for _, c := range chunks { assert.False(r.has(computeAddr(c))) } } func TestHasMany(t *testing.T) { assert := assert.New(t) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, _ := buildTable(chunks) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), fileBlockSize) addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} hasAddrs := []hasRecord{ {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), 0, false}, {&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), 1, false}, {&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), 2, false}, } sort.Sort(hasRecordByPrefix(hasAddrs)) tr.hasMany(hasAddrs) for _, ha := range hasAddrs { assert.True(ha.has, "Nothing for prefix %d", ha.prefix) } } func TestHasManySequentialPrefix(t *testing.T) { assert := assert.New(t) // Use bogus addrs so we can generate the case of sequentially non-unique prefixes in the index // Note that these are already sorted addrStrings := []string{ "0rfgadopg6h3fk7d253ivbjsij4qo3nv", "0rfgadopg6h3fk7d253ivbjsij4qo4nv", "0rfgadopg6h3fk7d253ivbjsij4qo9nv", } addrs := make([]addr, len(addrStrings)) for i, s := range addrStrings { addrs[i] = addr(hash.Parse(s)) } bogusData := []byte("bogus") // doesn't matter what this is. hasMany() won't check chunkRecords totalData := uint64(len(bogusData) * len(addrs)) capacity := maxTableSize(uint64(len(addrs)), totalData) buff := make([]byte, capacity) tw := newTableWriter(buff, nil) for _, a := range addrs { tw.addChunk(a, bogusData) } length, _ := tw.finish() buff = buff[:length] tr := newTableReader(parseTableIndex(buff), tableReaderAtFromBytes(buff), fileBlockSize) hasAddrs := make([]hasRecord, 2) // Leave out the first address hasAddrs[0] = hasRecord{&addrs[1], addrs[1].Prefix(), 1, false} hasAddrs[1] = hasRecord{&addrs[2], addrs[2].Prefix(), 2, false} tr.hasMany(hasAddrs) for _, ha := range hasAddrs { assert.True(ha.has, fmt.Sprintf("Nothing for prefix %x\n", ha.prefix)) } } func TestGetMany(t *testing.T) { assert := assert.New(t) data := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, _ := buildTable(data) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), fileBlockSize) addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])} getBatch := []getRecord{ {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false}, {&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false}, {&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false}, } sort.Sort(getRecordByPrefix(getBatch)) wg := &sync.WaitGroup{} chunkChan := make(chan *chunks.Chunk, len(getBatch)) tr.getMany(getBatch, chunkChan, wg, &Stats{}) wg.Wait() close(chunkChan) gotCount := 0 for range chunkChan { gotCount++ } assert.True(gotCount == len(getBatch)) } func TestCalcReads(t *testing.T) { assert := assert.New(t) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, _ := buildTable(chunks) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), 0) addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} getBatch := []getRecord{ {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false}, {&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false}, {&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false}, } gb2 := []getRecord{getBatch[0], getBatch[2]} sort.Sort(getRecordByPrefix(getBatch)) reads, remaining := tr.calcReads(getBatch, 0) assert.False(remaining) assert.Equal(1, reads) sort.Sort(getRecordByPrefix(gb2)) reads, remaining = tr.calcReads(gb2, 0) assert.False(remaining) assert.Equal(2, reads) } func TestExtract(t *testing.T) { assert := assert.New(t) chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), []byte("badbye2"), } tableData, _ := buildTable(chunks) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), fileBlockSize) addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} chunkChan := make(chan extractRecord) go func() { tr.extract(chunkChan); close(chunkChan) }() i := 0 for rec := range chunkChan { assert.NotNil(rec.data, "Nothing for", addrs[i]) assert.Equal(addrs[i], rec.a) assert.Equal(chunks[i], rec.data) i++ } } func Test65k(t *testing.T) { assert := assert.New(t) count := 1 << 16 chunks := make([][]byte, count) dataFn := func(i int) []byte { return []byte(fmt.Sprintf("data%d", i*2)) } for i := 0; i < count; i++ { chunks[i] = dataFn(i) } tableData, _ := buildTable(chunks) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), fileBlockSize) for i := 0; i < count; i++ { data := dataFn(i) h := computeAddr(data) assert.True(tr.has(computeAddr(data))) assert.Equal(string(data), string(tr.get(h, &Stats{}))) } for i := count; i < count*2; i++ { data := dataFn(i) h := computeAddr(data) assert.False(tr.has(computeAddr(data))) assert.NotEqual(string(data), string(tr.get(h, &Stats{}))) } } // Ensure all addresses share the first 7 bytes. Useful for easily generating tests which have // "prefix" collisions. func computeAddrCommonPrefix(data []byte) addr { a := computeAddrDefault(data) a[0] = 0x01 a[1] = 0x23 a[2] = 0x45 a[3] = 0x67 a[4] = 0x89 a[5] = 0xab a[6] = 0xcd return a } func doTestNGetMany(t *testing.T, count int) { assert := assert.New(t) data := make([][]byte, count) dataFn := func(i int) []byte { return []byte(fmt.Sprintf("data%d", i*2)) } for i := 0; i < count; i++ { data[i] = dataFn(i) } tableData, _ := buildTable(data) tr := newTableReader(parseTableIndex(tableData), tableReaderAtFromBytes(tableData), fileBlockSize) getBatch := make([]getRecord, len(data)) for i := 0; i < count; i++ { a := computeAddr(dataFn(i)) getBatch[i] = getRecord{&a, a.Prefix(), false} } sort.Sort(getRecordByPrefix(getBatch)) wg := &sync.WaitGroup{} chunkChan := make(chan *chunks.Chunk, len(getBatch)) tr.getMany(getBatch, chunkChan, wg, &Stats{}) wg.Wait() close(chunkChan) gotCount := 0 for range chunkChan { gotCount++ } assert.True(gotCount == len(getBatch)) } func Test65kGetMany(t *testing.T) { doTestNGetMany(t, 1<<16) } func Test2kGetManyCommonPrefix(t *testing.T) { computeAddr = computeAddrCommonPrefix defer func() { computeAddr = computeAddrDefault }() doTestNGetMany(t, 1<<11) } func TestEmpty(t *testing.T) { assert := assert.New(t) buff := make([]byte, footerSize) tw := newTableWriter(buff, nil) length, _ := tw.finish() assert.Equal(length, footerSize) d.PanicIfError(nil) } ================================================ FILE: go/nbs/table_writer.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nbs import ( "crypto/sha512" "encoding/binary" "fmt" "hash" "sort" "github.com/attic-labs/noms/go/d" "github.com/golang/snappy" ) // tableWriter encodes a collection of byte stream chunks into a nbs table. NOT goroutine safe. type tableWriter struct { buff []byte pos uint64 totalCompressedData uint64 totalUncompressedData uint64 prefixes prefixIndexSlice // TODO: This is in danger of exploding memory blockHash hash.Hash snapper snappyEncoder } type snappyEncoder interface { Encode(dst, src []byte) []byte } type realSnappyEncoder struct{} func (r realSnappyEncoder) Encode(dst, src []byte) []byte { return snappy.Encode(dst, src) } func maxTableSize(numChunks, totalData uint64) uint64 { avgChunkSize := totalData / numChunks d.Chk.True(avgChunkSize < maxChunkSize) maxSnappySize := snappy.MaxEncodedLen(int(avgChunkSize)) d.Chk.True(maxSnappySize > 0) return numChunks*(prefixTupleSize+lengthSize+addrSuffixSize+checksumSize+uint64(maxSnappySize)) + footerSize } func indexSize(numChunks uint32) uint64 { return uint64(numChunks) * (addrSuffixSize + lengthSize + prefixTupleSize) } func lengthsOffset(numChunks uint32) uint64 { return uint64(numChunks) * prefixTupleSize } func suffixesOffset(numChunks uint32) uint64 { return uint64(numChunks) * (prefixTupleSize + lengthSize) } // len(buff) must be >= maxTableSize(numChunks, totalData) func newTableWriter(buff []byte, snapper snappyEncoder) *tableWriter { if snapper == nil { snapper = realSnappyEncoder{} } return &tableWriter{ buff: buff, blockHash: sha512.New(), snapper: snapper, } } func (tw *tableWriter) addChunk(h addr, data []byte) bool { if len(data) == 0 { panic("NBS blocks cannont be zero length") } // Compress data straight into tw.buff compressed := tw.snapper.Encode(tw.buff[tw.pos:], data) dataLength := uint64(len(compressed)) tw.totalCompressedData += dataLength // BUG 3156 indicated that, sometimes, snappy decided that there's not enough space in tw.buff[tw.pos:] to encode into. // This _should never happen anymore be_, because we iterate over all chunks to be added and sum the max amount of space that snappy says it might need. // Since we know that |data| can't be 0-length, we also know that the compressed version of |data| has length greater than zero. The first element in a snappy-encoded blob is a Uvarint indicating how much data is present. Therefore, if there's a Uvarint-encoded 0 at tw.buff[tw.pos:], we know that snappy did not write anything there and we have a problem. if v, n := binary.Uvarint(tw.buff[tw.pos:]); v == 0 { d.Chk.True(n != 0) panic(fmt.Errorf("BUG 3156: unbuffered chunk %s: uncompressed %d, compressed %d, snappy max %d, tw.buff %d\n", h.String(), len(data), dataLength, snappy.MaxEncodedLen(len(data)), len(tw.buff[tw.pos:]))) } tw.pos += dataLength tw.totalUncompressedData += uint64(len(data)) // checksum (4 LSBytes, big-endian) binary.BigEndian.PutUint32(tw.buff[tw.pos:], crc(compressed)) tw.pos += checksumSize // Stored in insertion order tw.prefixes = append(tw.prefixes, prefixIndexRec{ h.Prefix(), h[addrPrefixSize:], uint32(len(tw.prefixes)), uint32(checksumSize + dataLength), }) return true } func (tw *tableWriter) finish() (uncompressedLength uint64, blockAddr addr) { tw.writeIndex() tw.writeFooter() uncompressedLength = tw.pos var h []byte h = tw.blockHash.Sum(h) // Appends hash to h copy(blockAddr[:], h) return } type prefixIndexRec struct { prefix uint64 suffix []byte order, size uint32 } type prefixIndexSlice []prefixIndexRec func (hs prefixIndexSlice) Len() int { return len(hs) } func (hs prefixIndexSlice) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } func (hs prefixIndexSlice) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } func (tw *tableWriter) writeIndex() { sort.Sort(tw.prefixes) pfxScratch := [addrPrefixSize]byte{} numRecords := uint32(len(tw.prefixes)) lengthsOffset := tw.pos + lengthsOffset(numRecords) // skip prefix and ordinal for each record suffixesOffset := tw.pos + suffixesOffset(numRecords) // skip size for each record for _, pi := range tw.prefixes { binary.BigEndian.PutUint64(pfxScratch[:], pi.prefix) // hash prefix n := uint64(copy(tw.buff[tw.pos:], pfxScratch[:])) d.Chk.True(n == addrPrefixSize) tw.pos += n // order binary.BigEndian.PutUint32(tw.buff[tw.pos:], pi.order) tw.pos += ordinalSize // length offset := lengthsOffset + uint64(pi.order)*lengthSize binary.BigEndian.PutUint32(tw.buff[offset:], pi.size) // hash suffix offset = suffixesOffset + uint64(pi.order)*addrSuffixSize n = uint64(copy(tw.buff[offset:], pi.suffix)) d.Chk.True(n == addrSuffixSize) } suffixesLen := uint64(numRecords) * addrSuffixSize tw.blockHash.Write(tw.buff[suffixesOffset : suffixesOffset+suffixesLen]) tw.pos = suffixesOffset + suffixesLen } func (tw *tableWriter) writeFooter() { tw.pos += writeFooter(tw.buff[tw.pos:], uint32(len(tw.prefixes)), tw.totalUncompressedData) } func writeFooter(dst []byte, chunkCount uint32, uncData uint64) (consumed uint64) { // chunk count binary.BigEndian.PutUint32(dst[consumed:], chunkCount) consumed += uint32Size // total uncompressed chunk data binary.BigEndian.PutUint64(dst[consumed:], uncData) consumed += uint64Size // magic number copy(dst[consumed:], magicNumber) consumed += magicNumberSize return } ================================================ FILE: go/nbs/test/manifest_clobber.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "flag" "log" "os" "golang.org/x/sys/unix" ) func main() { flag.Parse() if flag.NArg() < 3 { log.Fatalln("Not enough arguments") } l, err := os.Create(flag.Arg(0)) if err != nil { log.Fatalln(err) } defer l.Close() // lock released by closing l. err = unix.Flock(int(l.Fd()), unix.LOCK_EX|unix.LOCK_NB) if err == unix.EWOULDBLOCK { return } if err != nil { log.Fatalln(err) } // Clobber manifest file at flag.Arg(1) with contents at flag.Arg(2) m, err := os.Create(flag.Arg(1)) if err != nil { log.Fatalln(err) } defer m.Close() if _, err = m.WriteString(flag.Arg(2)); err != nil { log.Fatalln(err) } } ================================================ FILE: go/ngql/README.md ================================================ # Noms GraphQL An experimental bridge between Noms and [GraphQL](http://graphql.org/) This is to be used with https://github.com/attic-labs/graphql which is a fork of https://github.com/graphql-go/graphql to handle Noms values. It disables some validations that do not work for Noms due to Noms being schemaless (or more precisely the schema is a function of the value in the dataset). *ngql* provides an API to convert Noms types/values to and from GraphQL types/values, as well as some functions that can be used to implement a GraphQL endpoint using https://github.com/attic-labs/graphql. # Status * All Noms types are supported except * Blob * Type * Unions with non-`Struct` component types * GraphQL does not support unions in input types which limits the input types that can be used. # Type conversion rules ## Value Allmost all Noms values can be represented by GraphQL. All Noms values except the primitives (`Bool`, `Number` & `String`) are represented by a GraphQL struct. ## Bool Is represented by a non nullable `Bool` ## Number Is represented by a non nullable `Float` ## String Is represented by a non nullable `String` ## List Noms lists are expressed as a GraphQL struct with the fields * `values` - The values in the list. * `size` - The number of values in the list. Lists takes a few optional arguments: * `at` - The index to start at, defaults to `0`. * `count` - The number of values to return, defaults to all of the values. ```graphql type FooList { size: Float! values: [Foo!]! } ``` ## Set Noms sets are expressed as a GraphQL struct with the fields * `values` - The values in the set. * `size` - The number of values in the set. Sets takes a few optional arguments: * `at` - The index to start at, defaults to `0`. * `count` - The number of values to return, defaults to all of the values. * `key` - The value to start at. * `through` - The value to end at (inclusive). * `keys` - When provided only values that matches the keys are included in the result. ```graphql type FooSet { size: Float! values: [Foo!]! } ``` ## Map Noms maps are expressed as a GraphQL struct with the fields * `values` - The values in the map. * `keys` - The keys in the map. * `entries` - The entries in the map. An entry is a struct with `key` and `value` fields. * `size` - The number of values in the map. Sets takes a few optional arguments: * `at` - The index to start at, defaults to `0` * `count` - The number of elements to return, defaults to all of the elements. * `key` - The value to start at * `through` - The value to end at (inclusive) * `keys` - When provided only values/keys/entries that matches the keys are included in the result. ```graphql type StringFooMap { size: Float! elements: [StringFooEntry!]! } type StringFloatEntry { key: String! value: Float! } ``` ## Struct Noms structs are expressed as GraphQL structs, with an extra `hash` field. If the field in the Noms struct is optional then the GraphQL type for that field is nullable. ## Ref Noms refs are expressed as a GraphQL struct with a `targetHash` and `targetValue` field. ```graphql type FooRef { targetHash: String! targetValue: Foo! } ``` ================================================ FILE: go/ngql/query.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package ngql import ( "context" "encoding/json" "io" "github.com/attic-labs/graphql" "github.com/attic-labs/graphql/gqlerrors" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) const ( atKey = "at" countKey = "count" elementsKey = "elements" entriesKey = "entries" keyKey = "key" keysKey = "keys" rootKey = "root" rootQueryKey = "Root" scalarValue = "scalarValue" sizeKey = "size" targetHashKey = "targetHash" targetValueKey = "targetValue" throughKey = "through" valueKey = "value" valuesKey = "values" vrwKey = "vrw" ) // NewRootQueryObject creates a "root" query object that can be used to // traverse the value tree of rootValue. func NewRootQueryObject(rootValue types.Value, tm *TypeMap) *graphql.Object { tc := TypeConverter{*tm, DefaultNameFunc} return tc.NewRootQueryObject(rootValue) } // NewRootQueryObject creates a "root" query object that can be used to // traverse the value tree of rootValue. func (tc *TypeConverter) NewRootQueryObject(rootValue types.Value) *graphql.Object { rootNomsType := types.TypeOf(rootValue) rootType := tc.NomsTypeToGraphQLType(rootNomsType) return graphql.NewObject(graphql.ObjectConfig{ Name: rootQueryKey, Fields: graphql.Fields{ rootKey: &graphql.Field{ Type: rootType, Resolve: func(p graphql.ResolveParams) (interface{}, error) { return MaybeGetScalar(rootValue), nil }, }, }}) } // NewContext creates a new context.Context with the extra data added to it // that is required by ngql. func NewContext(vrw types.ValueReader) context.Context { return context.WithValue(context.Background(), vrwKey, vrw) } // Query takes |rootValue|, builds a GraphQL scheme from rootValue.Type() and // executes |query| against it, encoding the result to |w|. func Query(rootValue types.Value, query string, vrw types.ValueReadWriter, w io.Writer) { schemaConfig := graphql.SchemaConfig{} tc := NewTypeConverter() queryWithSchemaConfig(rootValue, query, schemaConfig, vrw, tc, w) } func queryWithSchemaConfig(rootValue types.Value, query string, schemaConfig graphql.SchemaConfig, vrw types.ValueReadWriter, tc *TypeConverter, w io.Writer) { schemaConfig.Query = tc.NewRootQueryObject(rootValue) schema, _ := graphql.NewSchema(schemaConfig) ctx := NewContext(vrw) r := graphql.Do(graphql.Params{ Schema: schema, RequestString: query, Context: ctx, }) err := json.NewEncoder(w).Encode(r) d.PanicIfError(err) } // Error writes an error as a GraphQL error to a writer. func Error(err error, w io.Writer) { r := graphql.Result{ Errors: []gqlerrors.FormattedError{ {Message: err.Error()}, }, } jsonErr := json.NewEncoder(w).Encode(r) d.PanicIfError(jsonErr) } ================================================ FILE: go/ngql/query_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package ngql import ( "bytes" "encoding/json" "errors" "fmt" "testing" "github.com/attic-labs/graphql" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/test" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) type QueryGraphQLSuite struct { suite.Suite vs *types.ValueStore } func TestQueryGraphQL(t *testing.T) { suite.Run(t, &QueryGraphQLSuite{}) } func newTestValueStore() *types.ValueStore { storage := &chunks.MemoryStorage{} return types.NewValueStore(storage.NewView()) } func (suite *QueryGraphQLSuite) SetupTest() { suite.vs = newTestValueStore() } func (suite *QueryGraphQLSuite) assertQueryResult(v types.Value, q, expect string) { buf := &bytes.Buffer{} Query(v, q, suite.vs, buf) suite.JSONEq(test.RemoveHashes(expect), test.RemoveHashes(buf.String())) } func (suite *QueryGraphQLSuite) TestScalars() { suite.assertQueryResult(types.String("aaa"), "{root}", `{"data":{"root":"aaa"}}`) suite.assertQueryResult(types.String(""), "{root}", `{"data":{"root":""}}`) suite.assertQueryResult(types.Number(0), "{root}", `{"data":{"root":0}}`) suite.assertQueryResult(types.Number(1), "{root}", `{"data":{"root":1}}`) suite.assertQueryResult(types.Number(-1), "{root}", `{"data":{"root":-1}}`) suite.assertQueryResult(types.Number(1<<31), "{root}", `{"data":{"root":2.147483648e+09}}`) suite.assertQueryResult(types.Number(-(1 << 31)), "{root}", `{"data":{"root":-2.147483648e+09}}`) suite.assertQueryResult(types.Number(0.001), "{root}", `{"data":{"root":0.001}}`) suite.assertQueryResult(types.Number(0.00000001), "{root}", `{"data":{"root":1e-08}}`) suite.assertQueryResult(types.Bool(false), "{root}", `{"data":{"root":false}}`) suite.assertQueryResult(types.Bool(true), "{root}", `{"data":{"root":true}}`) } func (suite *QueryGraphQLSuite) TestStructBasic() { s1 := types.NewStruct("Foo", types.StructData{ "a": types.String("aaa"), "b": types.Bool(true), "c": types.Number(0.1), }) suite.assertQueryResult(s1, "{root{a}}", `{"data":{"root":{"a":"aaa"}}}`) suite.assertQueryResult(s1, "{root{a b}}", `{"data":{"root":{"a":"aaa","b":true}}}`) suite.assertQueryResult(s1, "{root{a b c}}", `{"data":{"root":{"a":"aaa","b":true,"c":0.1}}}`) suite.assertQueryResult(s1, "{root{a c}}", `{"data":{"root":{"a":"aaa","c":0.1}}}`) } func (suite *QueryGraphQLSuite) TestEmptyStruct() { s1 := types.NewStruct("", types.StructData{}) suite.assertQueryResult(s1, "{root{hash}}", `{"data":{"root":{"hash":"0123456789abcdefghijklmnopqrstuv"}}}`) } func (suite *QueryGraphQLSuite) TestEmbeddedStruct() { s1 := types.NewStruct("Foo", types.StructData{ "a": types.String("aaa"), "b": types.NewStruct("Bar", types.StructData{ "c": types.Bool(true), "d": types.Number(0.1), }), }) suite.assertQueryResult(s1, "{root{a}}", `{"data":{"root":{"a":"aaa"}}}`) suite.assertQueryResult(s1, "{root{a b {c}}}", `{"data":{"root":{"a":"aaa","b":{"c":true}}}}`) suite.assertQueryResult(s1, "{root{a b {c d}}}", `{"data":{"root":{"a":"aaa","b":{"c":true,"d":0.1}}}}`) } func (suite *QueryGraphQLSuite) TestListBasic() { for _, valuesKey := range []string{"elements", "values"} { list := types.NewList(suite.vs) suite.assertQueryResult(list, "{root{size}}", `{"data":{"root":{"size":0}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"}}", `{"data":{"root":{}}}`) list = types.NewList(suite.vs, types.String("foo"), types.String("bar"), types.String("baz")) suite.assertQueryResult(list, "{root{"+valuesKey+"}}", `{"data":{"root":{"`+valuesKey+`":["foo","bar","baz"]}}}`) suite.assertQueryResult(list, "{root{size}}", `{"data":{"root":{"size":3}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"(at:1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["bar","baz"]}}}`) list = types.NewList(suite.vs, types.Bool(true), types.Bool(false), types.Bool(false)) suite.assertQueryResult(list, "{root{"+valuesKey+"}}", `{"data":{"root":{"`+valuesKey+`":[true,false,false]}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"(at:1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":[false,false]}}}`) list = types.NewList(suite.vs, types.Number(1), types.Number(1.1), types.Number(-100)) suite.assertQueryResult(list, "{root{"+valuesKey+"}}", `{"data":{"root":{"`+valuesKey+`":[1,1.1,-100]}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"(at:1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":[1.1,-100]}}}`) list = types.NewList(suite.vs, types.String("a"), types.String("b"), types.String("c")) suite.assertQueryResult(list, "{root{"+valuesKey+"(at:4)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"(count:0)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"(count:10)}}", `{"data":{"root":{"`+valuesKey+`":["a","b","c"]}}}`) suite.assertQueryResult(list, "{root{"+valuesKey+"(at:-1)}}", `{"data":{"root":{"`+valuesKey+`":["a","b","c"]}}}`) } } func (suite *QueryGraphQLSuite) TestListOfStruct() { list := types.NewList(suite.vs, types.NewStruct("Foo", types.StructData{ "a": types.Number(28), "b": types.String("foo"), }), types.NewStruct("Foo", types.StructData{ "a": types.Number(-20.102), "b": types.String("bar"), }), types.NewStruct("Foo", types.StructData{ "a": types.Number(5), "b": types.String("baz"), }), ) suite.assertQueryResult(list, "{root{elements{a b}}}", `{"data":{"root":{"elements":[{"a":28,"b":"foo"},{"a":-20.102,"b":"bar"},{"a":5,"b":"baz"}]}}}`) suite.assertQueryResult(list, "{root{elements{a}}}", `{"data":{"root":{"elements":[{"a":28},{"a":-20.102},{"a":5}]}}}`) } func (suite *QueryGraphQLSuite) TestListOfStructWithOptionalFields() { list := types.NewList(suite.vs, types.NewStruct("Foo", types.StructData{ "a": types.Number(1), }), types.NewStruct("Foo", types.StructData{ "a": types.Number(2), "b": types.String("bar"), }), ) suite.assertQueryResult(list, "{root{elements{a b}}}", `{ "data": { "root": { "elements": [ {"a": 1, "b": null}, {"a": 2, "b": "bar"} ] } } }`) } func (suite *QueryGraphQLSuite) TestSetBasic() { for _, valuesKey := range []string{"elements", "values"} { set := types.NewSet(suite.vs) suite.assertQueryResult(set, "{root{size}}", `{"data":{"root":{"size":0}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"}}", `{"data":{"root":{}}}`) set = types.NewSet(suite.vs, types.String("foo"), types.String("bar"), types.String("baz")) suite.assertQueryResult(set, "{root{"+valuesKey+"}}", `{"data":{"root":{"`+valuesKey+`":["bar","baz","foo"]}}}`) suite.assertQueryResult(set, "{root{size}}", `{"data":{"root":{"size":3}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(count:2)}}", `{"data":{"root":{"`+valuesKey+`":["bar","baz"]}}}`) set = types.NewSet(suite.vs, types.Bool(true), types.Bool(false)) suite.assertQueryResult(set, "{root{"+valuesKey+"}}", `{"data":{"root":{"`+valuesKey+`":[false,true]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(count:1)}}", `{"data":{"root":{"`+valuesKey+`":[false]}}}`) set = types.NewSet(suite.vs, types.Number(1), types.Number(1.1), types.Number(-100)) suite.assertQueryResult(set, "{root{"+valuesKey+"}}", `{"data":{"root":{"`+valuesKey+`":[-100,1,1.1]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(count:2)}}", `{"data":{"root":{"`+valuesKey+`":[-100,1]}}}`) set = types.NewSet(suite.vs, types.String("a"), types.String("b"), types.String("c"), types.String("d")) suite.assertQueryResult(set, "{root{"+valuesKey+"(count:0)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(count:2)}}", `{"data":{"root":{"`+valuesKey+`":["a","b"]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:0,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["a","b"]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:-1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["a","b"]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["b","c"]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:2)}}", `{"data":{"root":{"`+valuesKey+`":["c","d"]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:2,count:1)}}", `{"data":{"root":{"`+valuesKey+`":["c"]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:2,count:0)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(set, "{root{"+valuesKey+"(at:2,count:10)}}", `{"data":{"root":{"`+valuesKey+`":["c","d"]}}}`) } } func (suite *QueryGraphQLSuite) TestSetOfStruct() { set := types.NewSet(suite.vs, types.NewStruct("Foo", types.StructData{ "a": types.Number(28), "b": types.String("foo"), }), types.NewStruct("Foo", types.StructData{ "a": types.Number(-20.102), "b": types.String("bar"), }), types.NewStruct("Foo", types.StructData{ "a": types.Number(5), "b": types.String("baz"), }), ) suite.assertQueryResult(set, "{root{values{a b}}}", `{"data":{"root":{"values":[{"a":28,"b":"foo"},{"a":5,"b":"baz"},{"a":-20.102,"b":"bar"}]}}}`) suite.assertQueryResult(set, "{root{values{a}}}", `{"data":{"root":{"values":[{"a":28},{"a":5},{"a":-20.102}]}}}`) } func (suite *QueryGraphQLSuite) TestMapBasic() { for _, entriesKey := range []string{"elements", "entries"} { m := types.NewMap(suite.vs) suite.assertQueryResult(m, "{root{size}}", `{"data":{"root":{"size":0}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"}}", `{"data":{"root":{}}}`) m = types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("b"), types.Number(2), types.String("c"), types.Number(3), types.String("d"), types.Number(4), ) suite.assertQueryResult(m, "{root{"+entriesKey+"{key value}}}", `{"data":{"root":{"`+entriesKey+`":[{"key":"a","value":1},{"key":"b","value":2},{"key":"c","value":3},{"key":"d","value":4}]}}}`) suite.assertQueryResult(m, "{root{size}}", `{"data":{"root":{"size":4}}}`) } } func (suite *QueryGraphQLSuite) TestMapOfStruct() { m := types.NewMap(suite.vs, types.String("foo"), types.NewStruct("Foo", types.StructData{ "a": types.Number(28), "b": types.String("foo"), }), types.String("bar"), types.NewStruct("Foo", types.StructData{ "a": types.Number(-20.102), "b": types.String("bar"), }), types.String("baz"), types.NewStruct("Foo", types.StructData{ "a": types.Number(5), "b": types.String("baz"), }), ) suite.assertQueryResult(m, "{root{entries{key value{a}}}}", `{"data":{"root":{"entries":[{"key":"bar","value":{"a":-20.102}},{"key":"baz","value":{"a":5}},{"key":"foo","value":{"a":28}}]}}}`) suite.assertQueryResult(m, "{root{entries(count:1){value{a b}}}}", `{"data":{"root":{"entries":[{"value":{"a":-20.102,"b":"bar"}}]}}}`) suite.assertQueryResult(m, "{root{entries(count:3){key}}}", `{"data":{"root":{"entries":[{"key":"bar"},{"key":"baz"},{"key":"foo"}]}}}`) } func (suite *QueryGraphQLSuite) TestRef() { r := suite.vs.WriteValue(types.Number(100)) suite.assertQueryResult(r, "{root{targetValue}}", `{"data":{"root":{"targetValue":100}}}`) suite.assertQueryResult(r, "{root{targetHash}}", `{"data":{"root":{"targetHash":"0123456789abcdefghijklmnopqrstuv"}}}`) suite.assertQueryResult(r, "{root{targetValue targetHash}}", `{"data":{"root":{"targetHash":"0123456789abcdefghijklmnopqrstuv","targetValue":100}}}`) r = suite.vs.WriteValue(types.NewStruct("Foo", types.StructData{ "a": types.Number(28), "b": types.String("foo"), })) suite.assertQueryResult(r, "{root{targetValue{a}}}", `{"data":{"root":{"targetValue":{"a":28}}}}`) suite.assertQueryResult(r, "{root{targetValue{a b}}}", `{"data":{"root":{"targetValue":{"a":28,"b":"foo"}}}}`) r = suite.vs.WriteValue(types.NewList(suite.vs, types.String("foo"), types.String("bar"), types.String("baz"))) suite.assertQueryResult(r, "{root{targetValue{values}}}", `{"data":{"root":{"targetValue":{"values":["foo","bar","baz"]}}}}`) suite.assertQueryResult(r, "{root{targetValue{values(at:1,count:2)}}}", `{"data":{"root":{"targetValue":{"values":["bar","baz"]}}}}`) } func (suite *QueryGraphQLSuite) TestListOfUnionOfStructs() { list := types.NewList(suite.vs, types.NewStruct("Foo", types.StructData{ "a": types.Number(28), "b": types.String("baz"), }), types.NewStruct("Bar", types.StructData{ "b": types.String("bar"), }), types.NewStruct("Baz", types.StructData{ "c": types.Bool(true), }), ) suite.assertQueryResult(list, fmt.Sprintf("{root{values{... on %s{a b} ... on %s{b} ... on %s{c}}}}", GetTypeName(types.TypeOf(list.Get(0))), GetTypeName(types.TypeOf(list.Get(1))), GetTypeName(types.TypeOf(list.Get(2)))), `{"data":{"root":{"values":[{"a":28,"b":"baz"},{"b":"bar"},{"c":true}]}}}`) } func (suite *QueryGraphQLSuite) TestListOfUnionOfStructsConflictingFieldTypes() { list := types.NewList(suite.vs, types.NewStruct("Foo", types.StructData{ "a": types.Number(28), }), types.NewStruct("Bar", types.StructData{ "a": types.String("bar"), }), types.NewStruct("Baz", types.StructData{ "a": types.Bool(true), }), ) suite.assertQueryResult(list, fmt.Sprintf("{root{values{... on %s{a} ... on %s{b: a} ... on %s{c: a}}}}", GetTypeName(types.TypeOf(list.Get(0))), GetTypeName(types.TypeOf(list.Get(1))), GetTypeName(types.TypeOf(list.Get(2)))), `{"data":{"root":{"values":[{"a":28},{"b":"bar"},{"c":true}]}}}`) } func (suite *QueryGraphQLSuite) TestListOfUnionOfScalars() { list := types.NewList(suite.vs, types.Number(28), types.String("bar"), types.Bool(true), ) suite.assertQueryResult(list, "{root{values{... on BooleanValue{b: scalarValue} ... on StringValue{s: scalarValue} ... on NumberValue{n: scalarValue}}}}", `{"data":{"root":{"values":[{"n":28},{"s":"bar"},{"b":true}]}}}`) } func (suite *QueryGraphQLSuite) TestCyclicStructs() { // struct A { // a: "aaa" // b: Set(struct A { // a: "bbb" // b: Set() // }) // } s1 := types.NewStruct("A", types.StructData{ "a": types.String("aaa"), "b": types.NewSet(suite.vs, types.NewStruct("A", types.StructData{ "a": types.String("bbb"), "b": types.NewSet(suite.vs), })), }) suite.assertQueryResult(s1, "{root{a b{values{a}}}}", `{"data":{"root":{"a":"aaa","b":{"values":[{"a":"bbb"}]}}}}`) } func (suite *QueryGraphQLSuite) TestCyclicStructsWithUnion() { // struct A { // a: "aaa" // b: Struct A { // a: "bbb" // b: 42 // }) // } // struct A { // a: String, // b: Number | Cycle, // } s1 := types.NewStruct("A", types.StructData{ "a": types.String("aaa"), "b": types.NewStruct("A", types.StructData{ "a": types.String("bbb"), "b": types.Number(42), }), }) suite.assertQueryResult(s1, `{ root{ a b { a b { scalarValue } } } } `, `{ "data": { "root": { "a": "aaa", "b": { "a": "bbb", "b": { "scalarValue": 42 } } } } }`) suite.assertQueryResult(s1, fmt.Sprintf(`{ root{ a b { ... on %s { a } } } }`, GetTypeName(types.TypeOf(s1))), `{ "data": { "root": { "a": "aaa", "b": { "a": "bbb" } } } }`) } func (suite *QueryGraphQLSuite) TestNestedCollection() { list := types.NewList(suite.vs, types.NewSet(suite.vs, types.NewMap(suite.vs, types.Number(10), types.String("foo")), types.NewMap(suite.vs, types.Number(20), types.String("bar")), ), types.NewSet(suite.vs, types.NewMap(suite.vs, types.Number(30), types.String("baz")), types.NewMap(suite.vs, types.Number(40), types.String("bat")), ), ) suite.assertQueryResult(list, "{root{size}}", `{"data":{"root":{"size":2}}}`) suite.assertQueryResult(list, "{root{values(count:1){size}}}", `{"data":{"root":{"values":[{"size":2}]}}}`) suite.assertQueryResult(list, "{root{values(at:1,count:1){values(count:1){entries{key value}}}}}", `{"data":{"root":{"values":[{"values":[{"entries":[{"key":40,"value":"bat"}]}]}]}}}`) } func (suite *QueryGraphQLSuite) TestLoFi() { b := types.NewBlob(suite.vs, bytes.NewBufferString("I am a blob")) suite.assertQueryResult(b, "{root}", `{"data":{"root":"0123456789abcdefghijklmnopqrstuv"}}`) t := types.StringType suite.assertQueryResult(t, "{root}", `{"data":{"root":"0123456789abcdefghijklmnopqrstuv"}}`) } func (suite *QueryGraphQLSuite) TestError() { buff := &bytes.Buffer{} Error(errors.New("Some error string"), buff) suite.Equal(buff.String(), `{"data":null,"errors":[{"message":"Some error string","locations":null}]} `) } func (suite *QueryGraphQLSuite) TestMapArgs() { for _, entriesKey := range []string{"elements", "entries"} { m := types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("c"), types.Number(2), types.String("e"), types.Number(3), types.String("g"), types.Number(4), ) // count suite.assertQueryResult(m, "{root{"+entriesKey+"(count:0){value}}}", `{"data":{"root":{"`+entriesKey+`":[]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(count:2){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":2}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(count:3){key}}}", `{"data":{"root":{"`+entriesKey+`":[{"key":"a"},{"key":"c"},{"key":"e"}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(count: -1){key}}}", `{"data":{"root":{"`+entriesKey+`":[]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(count:5){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":2},{"value":3},{"value":4}]}}}`) // at suite.assertQueryResult(m, "{root{"+entriesKey+"(at:0){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":2},{"value":3},{"value":4}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:-1){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":2},{"value":3},{"value":4}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:2){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":3},{"value":4}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:5){value}}}", `{"data":{"root":{"`+entriesKey+`":[]}}}`) // at & count suite.assertQueryResult(m, "{root{"+entriesKey+"(at:0,count:2){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":2}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:-1,count:2){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":2}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:1,count:2){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":2},{"value":3}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:2,count:1){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":3}]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:2,count:0){value}}}", `{"data":{"root":{"`+entriesKey+`":[]}}}`) suite.assertQueryResult(m, "{root{"+entriesKey+"(at:2,count:10){value}}}", `{"data":{"root":{"`+entriesKey+`":[{"value":3},{"value":4}]}}}`) // key suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"e"){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"e","value":3}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"g"){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":4}]}}}`) // "f", no count/through so asking for exact match suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"f"){value}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) // "x" is larger than end suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"x"){value}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) // key & at // at is ignored when key is present suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"e",at:2){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"e","value":3}]}}}`) // key & count suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c", count: 2){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"c","value":2},{"key":"e","value":3}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c", count: 0){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c", count: -1){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"e", count: 5){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"e","value":3},{"key":"g","value":4}]}}}`) // through suite.assertQueryResult(m, `{root{`+entriesKey+`(through:"c"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"},{"key":"c"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(through:"b"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(through:"0"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) // key & through suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c", through:"c"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"c"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c",through:"e"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"c"},{"key":"e"}]}}}`) // through & count suite.assertQueryResult(m, `{root{`+entriesKey+`(through:"c",count:1){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(through:"b",count:0){key}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(through:"0",count:10){key}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) // at & through suite.assertQueryResult(m, `{root{`+entriesKey+`(at:0,through:"a"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(at:1,through:"e"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"c"},{"key":"e"}]}}}`) // at & count & through suite.assertQueryResult(m, `{root{`+entriesKey+`(at:0,count:2,through:"a"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(at:0,count:2,through:"e"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"},{"key":"c"}]}}}`) // key & count & through suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c",count:2,through:"c"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"c"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(key:"c",count:2,through:"g"){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"c"},{"key":"e"}]}}}`) } } func (suite *QueryGraphQLSuite) TestMapKeysArg() { for _, entriesKey := range []string{"elements", "entries"} { m := types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("c"), types.Number(2), types.String("e"), types.Number(3), types.String("g"), types.Number(4), ) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:["c","a"]){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":2},{"value":1}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:[]){value}}}`, `{"data":{"root":{"`+entriesKey+`":[]}}}`) m = types.NewMap(suite.vs, types.Number(1), types.String("a"), types.Number(2), types.String("c"), types.Number(3), types.String("e"), types.Number(4), types.String("g"), ) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:[4,1]){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":"g"},{"value":"a"}]}}}`) // Ignore other args suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:[4,1],key:2){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":"g"},{"value":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:[4,1],count:0){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":"g"},{"value":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:[4,1],at:4){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":"g"},{"value":"a"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:[4,1],through:1){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":"g"},{"value":"a"}]}}}`) } } func (suite *QueryGraphQLSuite) TestSetArgs() { for _, valuesKey := range []string{"elements", "values"} { s := types.NewSet(suite.vs, types.String("a"), types.String("c"), types.String("e"), types.String("g"), ) // count suite.assertQueryResult(s, "{root{"+valuesKey+"(count:0)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(count:2)}}", `{"data":{"root":{"`+valuesKey+`":["a","c"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(count:3)}}", `{"data":{"root":{"`+valuesKey+`":["a","c","e"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(count: -1)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(count:5)}}", `{"data":{"root":{"`+valuesKey+`":["a","c","e","g"]}}}`) // at suite.assertQueryResult(s, "{root{"+valuesKey+"(at:0)}}", `{"data":{"root":{"`+valuesKey+`":["a","c","e","g"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:-1)}}", `{"data":{"root":{"`+valuesKey+`":["a","c","e","g"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:2)}}", `{"data":{"root":{"`+valuesKey+`":["e","g"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:5)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) // at & count suite.assertQueryResult(s, "{root{"+valuesKey+"(at:0,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["a","c"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:-1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["a","c"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:1,count:2)}}", `{"data":{"root":{"`+valuesKey+`":["c","e"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:2,count:1)}}", `{"data":{"root":{"`+valuesKey+`":["e"]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:2,count:0)}}", `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(s, "{root{"+valuesKey+"(at:2,count:10)}}", `{"data":{"root":{"`+valuesKey+`":["e","g"]}}}`) // key suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"e")}}`, `{"data":{"root":{"`+valuesKey+`":["e"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"g")}}`, `{"data":{"root":{"`+valuesKey+`":["g"]}}}`) // "f", no count/through so asking for exact match suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"f")}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) // "x" is larger than end suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"x")}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) // exact match suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"0")}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) // key & at // at is ignored when key is present suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"e",at:2)}}`, `{"data":{"root":{"`+valuesKey+`":["e"]}}}`) // key & count suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c", count: 2)}}`, `{"data":{"root":{"`+valuesKey+`":["c","e"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c", count: 0)}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c", count: -1)}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"e", count: 5)}}`, `{"data":{"root":{"`+valuesKey+`":["e","g"]}}}`) // through suite.assertQueryResult(s, `{root{`+valuesKey+`(through:"c")}}`, `{"data":{"root":{"`+valuesKey+`":["a","c"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(through:"b")}}`, `{"data":{"root":{"`+valuesKey+`":["a"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(through:"0")}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) // key & through suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c", through:"c")}}`, `{"data":{"root":{"`+valuesKey+`":["c"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c",through:"e")}}`, `{"data":{"root":{"`+valuesKey+`":["c","e"]}}}`) // through & count suite.assertQueryResult(s, `{root{`+valuesKey+`(through:"c",count:1)}}`, `{"data":{"root":{"`+valuesKey+`":["a"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(through:"b",count:0)}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(through:"0",count:10)}}`, `{"data":{"root":{"`+valuesKey+`":[]}}}`) // at & through suite.assertQueryResult(s, `{root{`+valuesKey+`(at:0,through:"a")}}`, `{"data":{"root":{"`+valuesKey+`":["a"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(at:1,through:"e")}}`, `{"data":{"root":{"`+valuesKey+`":["c","e"]}}}`) // at & count & through suite.assertQueryResult(s, `{root{`+valuesKey+`(at:0,count:2,through:"a")}}`, `{"data":{"root":{"`+valuesKey+`":["a"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(at:0,count:2,through:"e")}}`, `{"data":{"root":{"`+valuesKey+`":["a","c"]}}}`) // key & count & through suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c",count:2,through:"c")}}`, `{"data":{"root":{"`+valuesKey+`":["c"]}}}`) suite.assertQueryResult(s, `{root{`+valuesKey+`(key:"c",count:2,through:"g")}}`, `{"data":{"root":{"`+valuesKey+`":["c","e"]}}}`) } } func (suite *QueryGraphQLSuite) TestMapValues() { m := types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("c"), types.Number(2), types.String("e"), types.Number(3), types.String("g"), types.Number(4), ) suite.assertQueryResult(m, "{root{values}}", `{"data":{"root":{"values":[1,2,3,4]}}}`) // count suite.assertQueryResult(m, "{root{values(count:0)}}", `{"data":{"root":{"values":[]}}}`) suite.assertQueryResult(m, "{root{values(count:2)}}", `{"data":{"root":{"values":[1,2]}}}`) suite.assertQueryResult(m, "{root{values(count:3)}}", `{"data":{"root":{"values":[1,2,3]}}}`) suite.assertQueryResult(m, "{root{values(count: -1)}}", `{"data":{"root":{"values":[]}}}`) suite.assertQueryResult(m, "{root{values(count:5)}}", `{"data":{"root":{"values":[1,2,3,4]}}}`) // at suite.assertQueryResult(m, "{root{values(at:0)}}", `{"data":{"root":{"values":[1,2,3,4]}}}`) suite.assertQueryResult(m, "{root{values(at:-1)}}", `{"data":{"root":{"values":[1,2,3,4]}}}`) suite.assertQueryResult(m, "{root{values(at:2)}}", `{"data":{"root":{"values":[3,4]}}}`) suite.assertQueryResult(m, "{root{values(at:5)}}", `{"data":{"root":{"values":[]}}}`) // at & count suite.assertQueryResult(m, "{root{values(at:0,count:2)}}", `{"data":{"root":{"values":[1,2]}}}`) suite.assertQueryResult(m, "{root{values(at:-1,count:2)}}", `{"data":{"root":{"values":[1,2]}}}`) suite.assertQueryResult(m, "{root{values(at:1,count:2)}}", `{"data":{"root":{"values":[2,3]}}}`) suite.assertQueryResult(m, "{root{values(at:2,count:1)}}", `{"data":{"root":{"values":[3]}}}`) suite.assertQueryResult(m, "{root{values(at:2,count:0)}}", `{"data":{"root":{"values":[]}}}`) suite.assertQueryResult(m, "{root{values(at:2,count:10)}}", `{"data":{"root":{"values":[3,4]}}}`) // key suite.assertQueryResult(m, `{root{values(key:"e")}}`, `{"data":{"root":{"values":[3]}}}`) suite.assertQueryResult(m, `{root{values(key:"g")}}`, `{"data":{"root":{"values":[4]}}}`) // "f", no count/through so asking for exact match suite.assertQueryResult(m, `{root{values(key:"f")}}`, `{"data":{"root":{"values":[]}}}`) // "x" is larger than end suite.assertQueryResult(m, `{root{values(key:"x")}}`, `{"data":{"root":{"values":[]}}}`) // key & at // at is ignored when key is present suite.assertQueryResult(m, `{root{values(key:"e",at:2)}}`, `{"data":{"root":{"values":[3]}}}`) // key & count suite.assertQueryResult(m, `{root{values(key:"c",count:2)}}`, `{"data":{"root":{"values":[2,3]}}}`) suite.assertQueryResult(m, `{root{values(key:"c",count:0)}}`, `{"data":{"root":{"values":[]}}}`) suite.assertQueryResult(m, `{root{values(key:"c",count:-1)}}`, `{"data":{"root":{"values":[]}}}`) suite.assertQueryResult(m, `{root{values(key:"e",count:5)}}`, `{"data":{"root":{"values":[3,4]}}}`) // through suite.assertQueryResult(m, `{root{values(through:"c")}}`, `{"data":{"root":{"values":[1,2]}}}`) suite.assertQueryResult(m, `{root{values(through:"b")}}`, `{"data":{"root":{"values":[1]}}}`) suite.assertQueryResult(m, `{root{values(through:"0")}}`, `{"data":{"root":{"values":[]}}}`) // key & through suite.assertQueryResult(m, `{root{values(key:"c", through:"c")}}`, `{"data":{"root":{"values":[2]}}}`) suite.assertQueryResult(m, `{root{values(key:"c",through:"e")}}`, `{"data":{"root":{"values":[2,3]}}}`) // through & count suite.assertQueryResult(m, `{root{values(through:"c",count:1)}}`, `{"data":{"root":{"values":[1]}}}`) suite.assertQueryResult(m, `{root{values(through:"b",count:0)}}`, `{"data":{"root":{"values":[]}}}`) suite.assertQueryResult(m, `{root{values(through:"0",count:10)}}`, `{"data":{"root":{"values":[]}}}`) // at & through suite.assertQueryResult(m, `{root{values(at:0,through:"a")}}`, `{"data":{"root":{"values":[1]}}}`) suite.assertQueryResult(m, `{root{values(at:1,through:"e")}}`, `{"data":{"root":{"values":[2,3]}}}`) // at & count & through suite.assertQueryResult(m, `{root{values(at:0,count:2,through:"a")}}`, `{"data":{"root":{"values":[1]}}}`) suite.assertQueryResult(m, `{root{values(at:0,count:2,through:"e")}}`, `{"data":{"root":{"values":[1,2]}}}`) // key & count & through suite.assertQueryResult(m, `{root{values(key:"c",count:2,through:"c")}}`, `{"data":{"root":{"values":[2]}}}`) suite.assertQueryResult(m, `{root{values(key:"c",count:2,through:"g")}}`, `{"data":{"root":{"values":[2,3]}}}`) } func (suite *QueryGraphQLSuite) TestMapKeys() { m := types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("c"), types.Number(2), types.String("e"), types.Number(3), types.String("g"), types.Number(4), ) suite.assertQueryResult(m, "{root{keys}}", `{"data":{"root":{"keys":["a","c","e","g"]}}}`) // count suite.assertQueryResult(m, "{root{keys(count:0)}}", `{"data":{"root":{"keys":[]}}}`) suite.assertQueryResult(m, "{root{keys(count:2)}}", `{"data":{"root":{"keys":["a","c"]}}}`) suite.assertQueryResult(m, "{root{keys(count:3)}}", `{"data":{"root":{"keys":["a","c","e"]}}}`) suite.assertQueryResult(m, "{root{keys(count: -1)}}", `{"data":{"root":{"keys":[]}}}`) suite.assertQueryResult(m, "{root{keys(count:5)}}", `{"data":{"root":{"keys":["a","c","e","g"]}}}`) // at suite.assertQueryResult(m, "{root{keys(at:0)}}", `{"data":{"root":{"keys":["a","c","e","g"]}}}`) suite.assertQueryResult(m, "{root{keys(at:-1)}}", `{"data":{"root":{"keys":["a","c","e","g"]}}}`) suite.assertQueryResult(m, "{root{keys(at:2)}}", `{"data":{"root":{"keys":["e","g"]}}}`) suite.assertQueryResult(m, "{root{keys(at:5)}}", `{"data":{"root":{"keys":[]}}}`) // at & count suite.assertQueryResult(m, "{root{keys(at:0,count:2)}}", `{"data":{"root":{"keys":["a","c"]}}}`) suite.assertQueryResult(m, "{root{keys(at:-1,count:2)}}", `{"data":{"root":{"keys":["a","c"]}}}`) suite.assertQueryResult(m, "{root{keys(at:1,count:2)}}", `{"data":{"root":{"keys":["c","e"]}}}`) suite.assertQueryResult(m, "{root{keys(at:2,count:1)}}", `{"data":{"root":{"keys":["e"]}}}`) suite.assertQueryResult(m, "{root{keys(at:2,count:0)}}", `{"data":{"root":{"keys":[]}}}`) suite.assertQueryResult(m, "{root{keys(at:2,count:10)}}", `{"data":{"root":{"keys":["e","g"]}}}`) // key suite.assertQueryResult(m, `{root{keys(key:"e")}}`, `{"data":{"root":{"keys":["e"]}}}`) suite.assertQueryResult(m, `{root{keys(key:"g")}}`, `{"data":{"root":{"keys":["g"]}}}`) // "f", no count/through so asking for exact match suite.assertQueryResult(m, `{root{keys(key:"f")}}`, `{"data":{"root":{"keys":[]}}}`) // "x" is larger than end suite.assertQueryResult(m, `{root{keys(key:"x")}}`, `{"data":{"root":{"keys":[]}}}`) // key & at // at is ignored when key is present suite.assertQueryResult(m, `{root{keys(key:"e",at:2)}}`, `{"data":{"root":{"keys":["e"]}}}`) // key & count suite.assertQueryResult(m, `{root{keys(key:"c",count:2)}}`, `{"data":{"root":{"keys":["c","e"]}}}`) suite.assertQueryResult(m, `{root{keys(key:"c",count:0)}}`, `{"data":{"root":{"keys":[]}}}`) suite.assertQueryResult(m, `{root{keys(key:"c",count:-1)}}`, `{"data":{"root":{"keys":[]}}}`) suite.assertQueryResult(m, `{root{keys(key:"e",count:5)}}`, `{"data":{"root":{"keys":["e","g"]}}}`) // through suite.assertQueryResult(m, `{root{keys(through:"c")}}`, `{"data":{"root":{"keys":["a","c"]}}}`) suite.assertQueryResult(m, `{root{keys(through:"b")}}`, `{"data":{"root":{"keys":["a"]}}}`) suite.assertQueryResult(m, `{root{keys(through:"0")}}`, `{"data":{"root":{"keys":[]}}}`) // key & through suite.assertQueryResult(m, `{root{keys(key:"c", through:"c")}}`, `{"data":{"root":{"keys":["c"]}}}`) suite.assertQueryResult(m, `{root{keys(key:"c",through:"e")}}`, `{"data":{"root":{"keys":["c","e"]}}}`) // through & count suite.assertQueryResult(m, `{root{keys(through:"c",count:1)}}`, `{"data":{"root":{"keys":["a"]}}}`) suite.assertQueryResult(m, `{root{keys(through:"b",count:0)}}`, `{"data":{"root":{"keys":[]}}}`) suite.assertQueryResult(m, `{root{keys(through:"0",count:10)}}`, `{"data":{"root":{"keys":[]}}}`) // at & through suite.assertQueryResult(m, `{root{keys(at:0,through:"a")}}`, `{"data":{"root":{"keys":["a"]}}}`) suite.assertQueryResult(m, `{root{keys(at:1,through:"e")}}`, `{"data":{"root":{"keys":["c","e"]}}}`) // at & count & through suite.assertQueryResult(m, `{root{keys(at:0,count:2,through:"a")}}`, `{"data":{"root":{"keys":["a"]}}}`) suite.assertQueryResult(m, `{root{keys(at:0,count:2,through:"e")}}`, `{"data":{"root":{"keys":["a","c"]}}}`) // key & count & through suite.assertQueryResult(m, `{root{keys(key:"c",count:2,through:"c")}}`, `{"data":{"root":{"keys":["c"]}}}`) suite.assertQueryResult(m, `{root{keys(key:"c",count:2,through:"g")}}`, `{"data":{"root":{"keys":["c","e"]}}}`) } func (suite *QueryGraphQLSuite) TestMapNullable() { // When selecting the result based on keys the values may be null. m := types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("c"), types.Number(2), ) for _, entriesKey := range []string{"elements", "entries"} { suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:["a","b","c"]){value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"value":1},{"value":null},{"value":2}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:["a","b","c"]){key}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a"},{"key":"b"},{"key":"c"}]}}}`) suite.assertQueryResult(m, `{root{`+entriesKey+`(keys:["a","b","c"]){key value}}}`, `{"data":{"root":{"`+entriesKey+`":[{"key":"a","value":1},{"key":"b","value":null},{"key":"c","value":2}]}}}`) } suite.assertQueryResult(m, `{root{values(keys:["a","b","c"])}}`, `{"data":{"root":{"values":[1,null,2]}}}`) suite.assertQueryResult(m, `{root{keys(keys:["a","b","c"])}}`, `{"data":{"root":{"keys":["a","b","c"]}}}`) } func (suite *QueryGraphQLSuite) TestStructWithOptionalField() { tm := NewTypeMap() rootValue := types.NewStruct("", types.StructData{ "n": types.Number(42), }) rootType := NomsTypeToGraphQLType(types.MakeStructType("", types.StructField{Name: "n", Type: types.NumberType, Optional: false}, types.StructField{Name: "s", Type: types.StringType, Optional: true}, ), false, tm) queryObj := graphql.NewObject(graphql.ObjectConfig{ Name: rootQueryKey, Fields: graphql.Fields{ rootKey: &graphql.Field{ Type: rootType, Resolve: func(p graphql.ResolveParams) (interface{}, error) { return MaybeGetScalar(rootValue), nil }, }, }}) schemaConfig := graphql.SchemaConfig{Query: queryObj} schema, err := graphql.NewSchema(schemaConfig) suite.NoError(err) ctx := NewContext(suite.vs) query := `{root{n s}}` r := graphql.Do(graphql.Params{ Schema: schema, RequestString: query, Context: ctx, }) suite.Equal(map[string]interface{}{"root": map[string]interface{}{"n": float64(42), "s": nil}}, r.Data) } func (suite *QueryGraphQLSuite) TestMutationScalarArgs() { test := func(query, expected string, nomsType *types.Type) { tc := NewTypeConverter() inType, err := tc.NomsTypeToGraphQLInputType(nomsType) suite.NoError(err) outType := tc.NomsTypeToGraphQLType(nomsType) suite.assertMutationTypes(query, expected, tc, inType, outType, func(p graphql.ResolveParams) (interface{}, error) { return p.Args["new"], nil }) } test(`mutation {test(new: 123)}`, `{"data": {"test": 123}}`, types.NumberType) test(`mutation {test(new: 0)}`, `{"data": {"test": 0}}`, types.NumberType) test(`mutation {test(new: "hi")}`, `{"data": {"test": "hi"}}`, types.StringType) test(`mutation {test(new: "")}`, `{"data": {"test": ""}}`, types.StringType) test(`mutation {test(new: true)}`, `{"data": {"test": true}}`, types.BoolType) test(`mutation {test(new: false)}`, `{"data": {"test": false}}`, types.BoolType) } func (suite *QueryGraphQLSuite) TestMutationWeirdosArgs() { test := func(query, expected string, nomsType *types.Type) { tc := NewTypeConverter() inType, err := tc.NomsTypeToGraphQLInputType(nomsType) suite.NoError(err) outType := graphql.String suite.assertMutationTypes(query, expected, tc, inType, outType, func(p graphql.ResolveParams) (interface{}, error) { return p.Args["new"], nil }) } test(`mutation {test(new: "#abc")}`, `{"data": {"test": "#abc"}}`, types.MakeRefType(types.NumberType)) test(`mutation {test(new: "0123456789")}`, `{"data": {"test": "0123456789"}}`, types.BlobType) } func (suite *QueryGraphQLSuite) assertMutationTypes(query, expected string, tc *TypeConverter, inType graphql.Input, outType graphql.Type, resolver graphql.FieldResolveFn) { buf := &bytes.Buffer{} root := types.Number(0) schemaConfig := graphql.SchemaConfig{ Mutation: graphql.NewObject(graphql.ObjectConfig{ Name: "Mutation", Fields: graphql.Fields{ "test": &graphql.Field{ Type: outType, Args: graphql.FieldConfigArgument{ "new": &graphql.ArgumentConfig{ Type: inType, }, }, Resolve: resolver, }, }, }), } queryWithSchemaConfig(root, query, schemaConfig, suite.vs, tc, buf) suite.JSONEq(expected, buf.String()) } func (suite *QueryGraphQLSuite) TestMutationCollectionArgs() { test := func(query, expected string, expectedArg interface{}, nomsType *types.Type) { tc := NewTypeConverter() inType, err := tc.NomsTypeToGraphQLInputType(nomsType) suite.NoError(err) outType := graphql.Boolean suite.assertMutationTypes(query, expected, tc, inType, outType, func(p graphql.ResolveParams) (interface{}, error) { suite.Equal(expectedArg, p.Args["new"]) return true, nil }) } test(`mutation {test(new: [0, 1, 2, 3])}`, `{"data": {"test": true}}`, []interface{}{float64(0), float64(1), float64(2), float64(3)}, types.MakeListType(types.NumberType)) test(`mutation {test(new: [])}`, `{"data": {"test": true}}`, []interface{}{}, types.MakeListType(types.NumberType)) test(`mutation {test(new: [0, 1, 2, 3])}`, `{"data": {"test": true}}`, []interface{}{float64(0), float64(1), float64(2), float64(3)}, types.MakeSetType(types.NumberType)) test(`mutation {test(new: [])}`, `{"data": {"test": true}}`, []interface{}{}, types.MakeSetType(types.NumberType)) test(`mutation { test(new: [ { key: 1, value: "a" }, { key: 2, value: "b" } ]) }`, `{"data": {"test": true}}`, []interface{}{ map[string]interface{}{"key": float64(1), "value": "a"}, map[string]interface{}{"key": float64(2), "value": "b"}, }, types.MakeMapType(types.NumberType, types.StringType)) test(`mutation {test(new: [])}`, `{"data": {"test": true}}`, []interface{}{}, types.MakeMapType(types.NumberType, types.StringType)) st := types.MakeStructTypeFromFields("N", types.FieldMap{ "f": types.NumberType, "b": types.BoolType, "s": types.StringType, }) test(`mutation {test(new: { f: 42, b: true, s: "hi" })}`, `{"data": {"test": true}}`, map[string]interface{}{"b": true, "f": float64(42), "s": "hi"}, st) } func (suite *QueryGraphQLSuite) TestMapWithComplexKeys() { m := types.NewMap(suite.vs, types.NewList(suite.vs, types.String("a")), types.Number(1), types.NewList(suite.vs, types.String("c")), types.Number(2), types.NewList(suite.vs, types.String("e")), types.Number(3), types.NewList(suite.vs, types.String("g")), types.Number(4), ) suite.assertQueryResult(m, `{root{values(key: ["e"])}}`, `{"data":{"root":{"values":[3]}}}`) suite.assertQueryResult(m, `{root{values(key: [])}}`, `{"data":{"root":{"values":[]}}}`) // The ordering here depends on the hash of the value... suite.assertQueryResult(m, `{root{values(key: ["a"], through: ["e"])}}`, `{"data":{"root":{"values":[1, 2, 3]}}}`) suite.assertQueryResult(m, `{root{values(keys: [["a"],["b"],["c"]])}}`, `{"data":{"root":{"values":[1, null, 2]}}}`) suite.assertQueryResult(m, `{ root { keys(keys: [["a"],["b"],["c"]]) { values } } }`, `{"data": { "root": { "keys": [ {"values": ["a"]}, {"values": ["b"]}, {"values": ["c"]} ] } }}`) m2 := types.NewMap(suite.vs, types.NewStruct("", types.StructData{ "n": types.String("a"), }), types.Number(1), types.NewStruct("", types.StructData{ "n": types.String("c"), }), types.Number(2), types.NewStruct("", types.StructData{ "n": types.String("e"), }), types.Number(3), types.NewStruct("", types.StructData{ "n": types.String("g"), }), types.Number(4), ) suite.assertQueryResult(m2, `{root{values(key: {n: "e"})}}`, `{"data":{"root":{"values":[3]}}}`) suite.assertQueryResult(m2, `{root{values(key: {n: "x"})}}`, `{"data":{"root":{"values":[]}}}`) // The order is based on hash suite.assertQueryResult(m2, `{root{values(key: {n: "g"}, through: {n: "c"})}}`, `{"data":{"root":{"values":[4,2]}}}`) suite.assertQueryResult(m2, `{root{values(keys: [{n: "a"}, {n: "b"}, {n: "c"}])}}`, `{"data":{"root":{"values":[1, null, 2]}}}`) suite.assertQueryResult(m2, `{root{keys(keys: [{n: "a"}, {n: "b"}, {n: "c"}]) { n }}}`, `{"data":{"root":{"keys":[{"n": "a"}, {"n": "b"}, {"n": "c"}]}}}`) } func (suite *QueryGraphQLSuite) TestSetWithComplexKeys() { s := types.NewSet(suite.vs, types.NewList(suite.vs, types.String("a")), types.NewList(suite.vs, types.String("c")), types.NewList(suite.vs, types.String("e")), types.NewList(suite.vs, types.String("g")), ) suite.assertQueryResult(s, `{root{values(key: ["e"]) { values }}}`, `{"data":{"root":{"values":[{"values":["e"]}]}}}`) suite.assertQueryResult(s, `{root{values(key: []) { values }}}`, `{"data":{"root":{"values":[]}}}`) // The ordering here depends on the hash of the value... suite.assertQueryResult(s, `{root{values(key: ["g"], through: ["c"]) { values }}}`, `{"data":{"root":{"values":[{"values":["g"]},{"values":["a"]},{"values":["c"]}]}}}`) s2 := types.NewSet(suite.vs, types.NewStruct("", types.StructData{ "n": types.String("a"), }), types.NewStruct("", types.StructData{ "n": types.String("c"), }), types.NewStruct("", types.StructData{ "n": types.String("e"), }), types.NewStruct("", types.StructData{ "n": types.String("g"), }), ) suite.assertQueryResult(s2, `{root{values(key: {n: "e"}) { n } }}`, `{"data":{"root":{"values":[{"n": "e"}]}}}`) suite.assertQueryResult(s2, `{root{values(key: {n: "x"}) { n } }}`, `{"data":{"root":{"values":[]}}}`) // The order is based on hash suite.assertQueryResult(s2, `{root{values(key: {n: "c"}, through: {n: "e"}) { n }}}`, `{"data":{"root":{"values":[{"n": "c"}, {"n": "e"}]}}}`) } func (suite *QueryGraphQLSuite) TestInputToNomsValue() { test := func(expected types.Value, val interface{}) { suite.True(expected.Equals(InputToNomsValue(suite.vs, val, types.TypeOf(expected)))) } test(types.Number(42), int(42)) test(types.Number(0), int(0)) test(types.Number(1.23), float64(1.23)) test(types.Number(0), float64(0)) test(types.Bool(true), true) test(types.Bool(false), false) test(types.String("hi"), "hi") test(types.String(""), "") test(types.NewList(suite.vs, types.Number(42)), []interface{}{float64(42)}) test(types.NewList(suite.vs, types.Number(1), types.Number(2)), []interface{}{float64(1), float64(2)}) test(types.NewSet(suite.vs, types.Number(42)), []interface{}{float64(42)}) test(types.NewSet(suite.vs, types.Number(1), types.Number(2)), []interface{}{float64(1), float64(2)}) test(types.NewMap(suite.vs, types.String("a"), types.Number(1), types.String("b"), types.Number(2), ), []interface{}{ map[string]interface{}{"key": "a", "value": 1}, map[string]interface{}{"key": "b", "value": 2}, }) test(types.NewMap(suite.vs, types.NewList(suite.vs, types.String("a")), types.Number(1), types.NewList(suite.vs, types.String("b")), types.Number(2), ), []interface{}{ map[string]interface{}{"key": []interface{}{"a"}, "value": 1}, map[string]interface{}{"key": []interface{}{"b"}, "value": 2}, }) test(types.NewMap(suite.vs, types.NewStruct("S", types.StructData{"a": types.Number(1)}), types.Number(11), types.NewStruct("S", types.StructData{"a": types.Number(2)}), types.Number(22), ), []interface{}{ map[string]interface{}{"key": map[string]interface{}{"a": float64(1)}, "value": 11}, map[string]interface{}{"key": map[string]interface{}{"a": float64(2)}, "value": 22}, }) test(types.NewSet(suite.vs, types.NewStruct("S", types.StructData{"a": types.Number(1)}), types.NewStruct("S", types.StructData{"a": types.Number(2)}), ), []interface{}{ map[string]interface{}{"a": float64(1)}, map[string]interface{}{"a": float64(2)}, }) expected := types.NewStruct("S", types.StructData{ "x": types.Number(42), }) expectedType := types.MakeStructType("S", types.StructField{Name: "a", Type: types.BoolType, Optional: true}, types.StructField{Name: "x", Type: types.NumberType, Optional: false}, ) val := map[string]interface{}{ "x": float64(42), } suite.Equal(expected, InputToNomsValue(suite.vs, val, expectedType)) val = map[string]interface{}{ "x": float64(42), "a": nil, } suite.Equal(expected, InputToNomsValue(suite.vs, val, expectedType)) val = map[string]interface{}{ "x": nil, } suite.Panics(func() { InputToNomsValue(suite.vs, val, expectedType) }) } func (suite *QueryGraphQLSuite) TestErrorsInInputType() { ut := types.MakeUnionType(types.BoolType, types.NumberType) test := func(t *types.Type) { tm := NewTypeMap() _, err := NomsTypeToGraphQLInputType(t, tm) suite.Error(err) } test(ut) test(types.MakeListType(ut)) test(types.MakeSetType(ut)) test(types.MakeMapType(ut, types.BoolType)) test(types.MakeMapType(types.BoolType, ut)) test(types.MakeMapType(ut, ut)) test(types.MakeStructTypeFromFields("", types.FieldMap{"u": ut})) test(types.MakeStructTypeFromFields("S", types.FieldMap{ "l": types.MakeListType(types.MakeCycleType("S")), })) test(types.MakeStructTypeFromFields("S", types.FieldMap{ "n": types.NumberType, "l": types.MakeListType(types.MakeCycleType("S")), })) } func (suite *QueryGraphQLSuite) TestVariables() { test := func(rootValue types.Value, expected string, query string, vars map[string]interface{}) { tc := NewTypeConverter() ctx := NewContext(suite.vs) schema, err := graphql.NewSchema(graphql.SchemaConfig{ Query: tc.NewRootQueryObject(rootValue), }) suite.NoError(err) r := graphql.Do(graphql.Params{ Schema: schema, RequestString: query, Context: ctx, VariableValues: vars, }) b, err := json.Marshal(r) suite.NoError(err) suite.JSONEq(expected, string(b)) } v := types.NewList(suite.vs, types.Number(0), types.Number(1), types.Number(2), types.Number(3)) test(v, `{"data":{"root":{"values":[0,1,2,3]}}}`, `query Test($c: Int) { root { values(count: $c) } }`, nil) test(v, `{"data":{"root":{"values":[0,1]}}}`, `query Test($c: Int) { root { values(count: $c) } }`, map[string]interface{}{ "c": 2, }) m := types.NewMap(suite.vs, types.String("a"), types.Number(0), types.String("b"), types.Number(1), types.String("c"), types.Number(2), types.String("d"), types.Number(3), ) test(m, `{"data":{"root":{"values":[1]}}}`, `query Test($k: String) { root { values(key: $k) } }`, map[string]interface{}{ "k": "b", }) test(m, `{"data":{"root":{"values":[1, 2]}}}`, `query Test($k: String, $t: String) { root { values(key: $k, through: $t) } }`, map[string]interface{}{ "k": "b", "t": "c", }) test(m, `{"data":{"root":{"values":[0, 2]}}}`, `query Test($ks: [String!]!) { root { values(keys: $ks) } }`, map[string]interface{}{ "ks": []string{"a", "c"}, }) m2 := types.NewMap(suite.vs, types.NewStruct("S", types.StructData{"n": types.String("a")}), types.Number(0), types.NewStruct("S", types.StructData{"n": types.String("b")}), types.Number(1), types.NewStruct("S", types.StructData{"n": types.String("c")}), types.Number(2), types.NewStruct("S", types.StructData{"n": types.String("d")}), types.Number(3), ) keyType := types.TypeOf(m2).Desc.(types.CompoundDesc).ElemTypes[0] q := fmt.Sprintf(`query Test($k: %s) { root { values(key: $k) } }`, GetInputTypeName(keyType)) test(m2, `{"data":{"root":{"values":[1]}}}`, q, map[string]interface{}{ "k": map[string]interface{}{ "n": "b", }, }) q = fmt.Sprintf(`query Test($ks: [%s!]) { root { values(keys: $ks) } }`, GetInputTypeName(keyType)) test(m2, `{"data":{"root":{"values":[0, 3]}}}`, q, map[string]interface{}{ "ks": []interface{}{ map[string]interface{}{ "n": "a", }, map[string]interface{}{ "n": "d", }, }, }) test(m2, `{"data":null,"errors":[{"message":"Variable \"$ks\" got invalid value [{}].\nIn element #1: In field \"n\": Expected \"String!\", found null.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "ks": []interface{}{ map[string]interface{}{}, }, }, ) test(m2, `{"data":null,"errors":[{"message":"Variable \"$ks\" got invalid value [{\"m\":\"b\",\"n\":\"a\"}].\nIn element #1: In field \"m\": Unknown field.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "ks": []interface{}{ map[string]interface{}{ "n": "a", "m": "b", }, }, }, ) test(m2, `{"data":null,"errors":[{"message":"Variable \"$ks\" got invalid value [{\"n\":null}].\nIn element #1: In field \"n\": Expected \"String!\", found null.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "ks": []interface{}{ map[string]interface{}{ "n": nil, }, }, }, ) test(m2, `{"data":null,"errors":[{"message":"Variable \"$ks\" got invalid value [null].\nIn element #1: Expected \"SInput_cgmdbo!\", found null.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "ks": []interface{}{ nil, }, }, ) m3 := types.NewMap(suite.vs, types.NewMap(suite.vs, types.Number(0), types.String("zero")), types.Bool(false), types.NewMap(suite.vs, types.Number(1), types.String("one")), types.Bool(true), ) keyNomsType := types.TypeOf(m3).Desc.(types.CompoundDesc).ElemTypes[0] tc := NewTypeConverter() keyGraphQLInputType, err := tc.NomsTypeToGraphQLInputType(keyNomsType) suite.NoError(err) q = fmt.Sprintf(`query Test($k: %s!) { root { values(key: $k) } }`, keyGraphQLInputType.String()) test(m3, `{"data":{"root":{"values":[false]}}}`, q, map[string]interface{}{ "k": []interface{}{ map[string]interface{}{ "key": float64(0), "value": "zero", }, }, }) test(m3, `{"data":null,"errors":[{"message":"Variable \"$k\" got invalid value [{\"key\":0}].\nIn element #1: In field \"value\": Expected \"String!\", found null.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "k": []interface{}{ map[string]interface{}{ "key": float64(0), }, }, }) test(m3, `{"data":null,"errors":[{"message":"Variable \"$k\" got invalid value [{\"key\":\"zero\"}].\nIn element #1: In field \"key\": Expected type \"Float\", found \"zero\".\nIn element #2: In field \"value\": Expected \"String!\", found null.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "k": []interface{}{ map[string]interface{}{ "key": "zero", }, }, }) test(m3, `{"data":null,"errors":[{"message":"Variable \"$k\" got invalid value [{\"extra\":false,\"key\":0,\"value\":\"zero\"}].\nIn element #1: In field \"extra\": Unknown field.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "k": []interface{}{ map[string]interface{}{ "key": float64(0), "value": "zero", "extra": false, }, }, }) test(m3, `{"data":null,"errors":[{"message":"Variable \"$k\" got invalid value [null].\nIn element #1: Expected \"NumberStringEntryInput!\", found null.","locations":[{"line":1,"column":12}]}]}`, q, map[string]interface{}{ "k": []interface{}{ nil, }, }) } func (suite *QueryGraphQLSuite) TestNameFunc() { test := func(tc *TypeConverter, rootValue types.Value, expected string, query string, vars map[string]interface{}) { ctx := NewContext(suite.vs) schema, err := graphql.NewSchema(graphql.SchemaConfig{ Query: tc.NewRootQueryObject(rootValue), }) suite.NoError(err) r := graphql.Do(graphql.Params{ Schema: schema, RequestString: query, Context: ctx, VariableValues: vars, }) b, err := json.Marshal(r) suite.NoError(err) suite.JSONEq(expected, string(b)) } aVal := types.NewStruct("A", types.StructData{ "a": types.Number(1), }) bVal := types.NewStruct("B", types.StructData{ "b": types.Number(2), }) list := types.NewList(suite.vs, aVal, bVal) tc := NewTypeConverter() tc.NameFunc = func(nomsType *types.Type, isInputType bool) string { if nomsType.Equals(types.TypeOf(aVal)) { return "A" } if nomsType.Equals(types.TypeOf(bVal)) { return "BBB" } return DefaultNameFunc(nomsType, isInputType) } query := `query { root { values { ... on A { a } ... on BBB { b } } } }` expected := `{ "data": { "root": { "values": [ {"a": 1}, {"b": 2} ] } } }` test(tc, list, expected, query, nil) set := types.NewSet(suite.vs, aVal, types.NewStruct("A", types.StructData{ "a": types.Number(2), }), types.NewStruct("A", types.StructData{ "a": types.Number(3), }), ) tc = NewTypeConverter() tc.NameFunc = func(nomsType *types.Type, isInputType bool) string { if nomsType.Equals(types.TypeOf(aVal)) { if isInputType { return "AI" } return "A" } return DefaultNameFunc(nomsType, isInputType) } query = `query ($key: AI!) { root { values(key: $key) { a } } }` expected = `{ "data": { "root": { "values": [ {"a": 2} ] } } }` test(tc, set, expected, query, map[string]interface{}{ "key": map[string]interface{}{"a": 2}, }) } func TestGetListElementsWithSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() v := types.NewSet(vs, types.Number(0), types.Number(1), types.Number(2)) r := getListElements(vs, v, map[string]interface{}{}) assert.Equal([]interface{}{float64(0), float64(1), float64(2)}, r) r = getListElements(vs, v, map[string]interface{}{ atKey: 1, }) assert.Equal([]interface{}{float64(1), float64(2)}, r) r = getListElements(vs, v, map[string]interface{}{ countKey: 2, }) assert.Equal([]interface{}{float64(0), float64(1)}, r) } func TestNoErrorOnNonCyclicTypeRefsInputType(t *testing.T) { assert := assert.New(t) type User struct { ID string `noms:"id"` } type Account struct { PendingUsers map[string]User Users map[string]User } var a Account typ := marshal.MustMarshalType(a) tc := NewTypeConverter() _, err := tc.NomsTypeToGraphQLInputType(typ) assert.NoError(err) } func TestErrorOnCyclicTypeRefsInputType(t *testing.T) { assert := assert.New(t) type Node struct { Children map[string]Node } var n Node typ := marshal.MustMarshalType(n) tc := NewTypeConverter() _, err := tc.NomsTypeToGraphQLInputType(typ) assert.Error(err) } ================================================ FILE: go/ngql/types.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package ngql import ( "context" "errors" "fmt" "strings" "github.com/attic-labs/graphql" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // TypeConverter provides functions to convert between Noms types and GraphQL // types. type TypeConverter struct { tm TypeMap NameFunc NameFunc } // NewTypeConverter creates a new TypeConverter. func NewTypeConverter() *TypeConverter { return &TypeConverter{ TypeMap{}, DefaultNameFunc, } } // NameFunc defines how to compute the GraphQL name for a Noms type. type NameFunc func(nomsType *types.Type, isInputType bool) string func (tc *TypeConverter) getTypeName(nomsType *types.Type) string { return tc.NameFunc(nomsType, false) } func (tc *TypeConverter) getInputTypeName(nomsType *types.Type) string { return tc.NameFunc(nomsType, true) } // NomsTypeToGraphQLType creates a GraphQL type from a Noms type that knows how // to resolve the Noms values. func (tc *TypeConverter) NomsTypeToGraphQLType(nomsType *types.Type) graphql.Type { return tc.nomsTypeToGraphQLType(nomsType, false) } // NomsTypeToGraphQLInputType creates a GraphQL input type from a Noms type. // Input types may not be unions or cyclic structs. If we encounter those // this returns an error. func (tc *TypeConverter) NomsTypeToGraphQLInputType(nomsType *types.Type) (graphql.Input, error) { return tc.nomsTypeToGraphQLInputType(nomsType) } // TypeMap is used as a cache in NomsTypeToGraphQLType and // NomsTypeToGraphQLInputType. type TypeMap map[typeMapKey]graphql.Type type typeMapKey struct { name string boxedIfScalar bool } // NewTypeMap creates a new map that is used as a cache in // NomsTypeToGraphQLType and NomsTypeToGraphQLInputType. func NewTypeMap() *TypeMap { return &TypeMap{} } // GraphQL has two type systems. // - One for output types which is used with resolvers to produce an output set. // - And another one for input types. Input types are used to verify that the // JSON like data passes as arguments are of the right type. // There is some overlap here. Scalars are the same and List can be used in // both. // The significant difference is graphql.Object (output) vs graphql.InputObject // Input types cannot be unions and input object types cannot contain cycles. type graphQLTypeMode uint8 const ( inputMode graphQLTypeMode = iota outputMode ) // In terms of resolving a graph of data, there are three types of value: // scalars, lists and maps. During resolution, we are converting some noms // value to a graphql value. A getFieldFn will be invoked for a matching noms // type. Its job is to retrieve the sub-value from the noms type which is // mapped to a graphql map as a fieldname. type getFieldFn func(v interface{}, fieldName string, ctx context.Context) types.Value // When a field name is resolved, it may take key:value arguments. A // getSubvaluesFn handles returning one or more *noms* values whose presence is // indicated by the provided arguments. type getSubvaluesFn func(vrw types.ValueReadWriter, v types.Value, args map[string]interface{}) interface{} // GraphQL requires all memberTypes in a Union to be Structs, so when a noms // union contains a scalar, we represent it in that context as a "boxed" value. // E.g. // Boolean! => // type BooleanValue { // scalarValue: Boolean! // } func (tc *TypeConverter) scalarToValue(nomsType *types.Type, scalarType graphql.Type) graphql.Type { return graphql.NewObject(graphql.ObjectConfig{ Name: fmt.Sprintf("%sValue", tc.getTypeName(nomsType)), Fields: graphql.Fields{ scalarValue: &graphql.Field{ Type: graphql.NewNonNull(scalarType), Resolve: func(p graphql.ResolveParams) (interface{}, error) { return p.Source, nil // p.Source is already a go-native scalar type }, }, }}) } func isScalar(nomsType *types.Type) bool { switch nomsType { case types.BoolType, types.NumberType, types.StringType: return true default: return false } } // NomsTypeToGraphQLType creates a GraphQL type from a Noms type that knows how // to resolve the Noms values. func NomsTypeToGraphQLType(nomsType *types.Type, boxedIfScalar bool, tm *TypeMap) graphql.Type { tc := TypeConverter{*tm, DefaultNameFunc} return tc.nomsTypeToGraphQLType(nomsType, boxedIfScalar) } func (tc *TypeConverter) nomsTypeToGraphQLType(nomsType *types.Type, boxedIfScalar bool) graphql.Type { name := tc.getTypeName(nomsType) key := typeMapKey{name, boxedIfScalar && isScalar(nomsType)} gqlType, ok := tc.tm[key] if ok { return gqlType } // The graphql package has built in support for recursive types using // FieldsThunk which allows the inner type to refer to an outer type by // lazily initializing the fields. switch nomsType.TargetKind() { case types.NumberKind: gqlType = graphql.Float if boxedIfScalar { gqlType = tc.scalarToValue(nomsType, gqlType) } case types.StringKind: gqlType = graphql.String if boxedIfScalar { gqlType = tc.scalarToValue(nomsType, gqlType) } case types.BoolKind: gqlType = graphql.Boolean if boxedIfScalar { gqlType = tc.scalarToValue(nomsType, gqlType) } case types.StructKind: gqlType = tc.structToGQLObject(nomsType) case types.ListKind, types.SetKind: gqlType = tc.listAndSetToGraphQLObject(nomsType) case types.MapKind: gqlType = tc.mapToGraphQLObject(nomsType) case types.RefKind: gqlType = tc.refToGraphQLObject(nomsType) case types.UnionKind: gqlType = tc.unionToGQLUnion(nomsType) case types.BlobKind, types.ValueKind, types.TypeKind: // TODO: https://github.com/attic-labs/noms/issues/3155 gqlType = graphql.String case types.CycleKind: panic("not reached") // we should never attempt to create a schema for any unresolved cycle default: panic("not reached") } tc.tm[key] = gqlType return gqlType } // NomsTypeToGraphQLInputType creates a GraphQL input type from a Noms type. // Input types may not be unions or cyclic structs. If we encounter those // this returns an error. func NomsTypeToGraphQLInputType(nomsType *types.Type, tm *TypeMap) (graphql.Input, error) { tc := TypeConverter{*tm, DefaultNameFunc} return tc.nomsTypeToGraphQLInputType(nomsType) } func (tc *TypeConverter) nomsTypeToGraphQLInputType(nomsType *types.Type) (graphql.Input, error) { // GraphQL input types do not support cycles. if types.HasStructCycles(nomsType) { return nil, errors.New("GraphQL input type cannot contain cycles") } name := tc.getInputTypeName(nomsType) key := typeMapKey{name, false} gqlType, ok := tc.tm[key] if ok { return gqlType, nil } var err error switch nomsType.TargetKind() { case types.NumberKind: gqlType = graphql.Float case types.StringKind: gqlType = graphql.String case types.BoolKind: gqlType = graphql.Boolean case types.StructKind: gqlType, err = tc.structToGQLInputObject(nomsType) case types.ListKind, types.SetKind: gqlType, err = tc.listAndSetToGraphQLInputObject(nomsType) case types.MapKind: gqlType, err = tc.mapToGraphQLInputObject(nomsType) case types.RefKind: gqlType = graphql.String case types.UnionKind: return nil, errors.New("GraphQL input type cannot contain unions") case types.BlobKind, types.ValueKind, types.TypeKind: // TODO: https://github.com/attic-labs/noms/issues/3155 gqlType = graphql.String case types.CycleKind: panic("not reachable") // This is handled at the top of nomsTypeToGraphQLInputType default: panic("not reached") } if err != nil { return nil, err } tc.tm[key] = gqlType return gqlType, nil } func isEmptyNomsUnion(nomsType *types.Type) bool { return nomsType.TargetKind() == types.UnionKind && len(nomsType.Desc.(types.CompoundDesc).ElemTypes) == 0 } // Creates a union of structs type. func (tc *TypeConverter) unionToGQLUnion(nomsType *types.Type) *graphql.Union { nomsMemberTypes := nomsType.Desc.(types.CompoundDesc).ElemTypes memberTypes := make([]*graphql.Object, len(nomsMemberTypes)) for i, nomsUnionType := range nomsMemberTypes { // Member types cannot be non-null and must be struct (graphl.Object) memberTypes[i] = tc.nomsTypeToGraphQLType(nomsUnionType, true).(*graphql.Object) } return graphql.NewUnion(graphql.UnionConfig{ Name: tc.getTypeName(nomsType), Types: memberTypes, ResolveType: func(p graphql.ResolveTypeParams) *graphql.Object { if v, ok := p.Value.(types.Value); ok { // We cannot just get the type of the value here. GraphQL requires // us to return one of the types in memberTypes. for i, t := range nomsMemberTypes { if types.IsValueSubtypeOf(v, t) { return memberTypes[i] } } return nil } var nomsType *types.Type switch p.Value.(type) { case float64: nomsType = types.NumberType case string: nomsType = types.StringType case bool: nomsType = types.BoolType } return tc.nomsTypeToGraphQLType(nomsType, true).(*graphql.Object) }, }) } func (tc *TypeConverter) structToGQLObject(nomsType *types.Type) *graphql.Object { return graphql.NewObject(graphql.ObjectConfig{ Name: tc.getTypeName(nomsType), Fields: graphql.FieldsThunk(func() graphql.Fields { structDesc := nomsType.Desc.(types.StructDesc) fields := graphql.Fields{ "hash": &graphql.Field{ Type: graphql.NewNonNull(graphql.String), Resolve: func(p graphql.ResolveParams) (interface{}, error) { return p.Source.(types.Struct).Hash().String(), nil }, }, } structDesc.IterFields(func(name string, nomsFieldType *types.Type, optional bool) { fieldType := tc.nomsTypeToGraphQLType(nomsFieldType, false) if !optional { fieldType = graphql.NewNonNull(fieldType) } fields[name] = &graphql.Field{ Type: fieldType, Resolve: func(p graphql.ResolveParams) (interface{}, error) { if field, ok := p.Source.(types.Struct).MaybeGet(name); ok { return MaybeGetScalar(field), nil } return nil, nil }, } }) return fields }), }) } func (tc *TypeConverter) listAndSetToGraphQLInputObject(nomsType *types.Type) (graphql.Input, error) { nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] elemType, err := tc.nomsTypeToGraphQLInputType(nomsValueType) if err != nil { return nil, err } return graphql.NewList(graphql.NewNonNull(elemType)), nil } func (tc *TypeConverter) mapToGraphQLInputObject(nomsType *types.Type) (graphql.Input, error) { nomsKeyType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[1] keyType, err := tc.nomsTypeToGraphQLInputType(nomsKeyType) if err != nil { return nil, err } valueType, err := tc.nomsTypeToGraphQLInputType(nomsValueType) if err != nil { return nil, err } entryType := tc.mapEntryToGraphQLInputObject(keyType, valueType, nomsKeyType, nomsValueType) return graphql.NewList(entryType), nil } func (tc *TypeConverter) structToGQLInputObject(nomsType *types.Type) (graphql.Input, error) { var err error rv := graphql.NewInputObject(graphql.InputObjectConfig{ Name: tc.getInputTypeName(nomsType), Fields: graphql.InputObjectConfigFieldMapThunk(func() graphql.InputObjectConfigFieldMap { structDesc := nomsType.Desc.(types.StructDesc) fields := make(graphql.InputObjectConfigFieldMap, structDesc.Len()) structDesc.IterFields(func(name string, nomsFieldType *types.Type, optional bool) { if err != nil { return } var fieldType graphql.Input fieldType, err = tc.nomsTypeToGraphQLInputType(nomsFieldType) if err != nil { return } if !optional { fieldType = graphql.NewNonNull(fieldType) } fields[name] = &graphql.InputObjectFieldConfig{ Type: fieldType, } }) return fields }), }) if err != nil { return nil, err } return rv, nil } var listArgs = graphql.FieldConfigArgument{ atKey: &graphql.ArgumentConfig{Type: graphql.Int}, countKey: &graphql.ArgumentConfig{Type: graphql.Int}, } func getListElements(vrw types.ValueReadWriter, v types.Value, args map[string]interface{}) interface{} { l := v.(types.Collection) idx := 0 count := int(l.Len()) end := count if at, ok := args[atKey].(int); ok { idx = at } if c, ok := args[countKey].(int); ok { count = c } // Clamp ranges if count <= 0 || idx >= end { return ([]interface{})(nil) } if idx < 0 { idx = 0 } if idx+count > end { count = end - idx } values := make([]interface{}, count) cols, offset := types.LoadLeafNodes([]types.Collection{l}, uint64(idx), uint64(idx+count)) // Iterate the collections we got, skipping the first offset elements and bailing out // once we've filled values with count elements. elementsSeen := uint64(0) maybeAddElement := func(v types.Value) { if elementsSeen >= offset && elementsSeen-offset < uint64(count) { values[elementsSeen-offset] = MaybeGetScalar(v) } elementsSeen++ } // TODO: Use a cursor so we do not have to instantiate all values. @arv has a // change in the works that only creates Values as needed. for _, c := range cols { v := c.(types.Value) v.WalkValues(maybeAddElement) if elementsSeen-offset >= uint64(count) { break } } return values } func getSetElements(vrw types.ValueReadWriter, v types.Value, args map[string]interface{}) interface{} { s := v.(types.Set) iter, nomsKey, nomsThrough, count, singleExactMatch := getCollectionArgs(vrw, s, args, iteratorFactory{ IteratorFrom: func(from types.Value) interface{} { return s.IteratorFrom(from) }, IteratorAt: func(at uint64) interface{} { return s.IteratorAt(at) }, First: func() interface{} { return &setFirstIterator{s: s} }, }) if count == 0 { return ([]interface{})(nil) } setIter := iter.(types.SetIterator) values := make([]interface{}, 0, count) for i := uint64(0); i < count; i++ { v := setIter.Next() if v == nil { break } if singleExactMatch { if nomsKey.Equals(v) { values = append(values, MaybeGetScalar(v)) } break } if nomsThrough != nil { if !nomsThrough.Less(v) { values = append(values, MaybeGetScalar(v)) } else { break } } else { values = append(values, MaybeGetScalar(v)) } } return values } func getCollectionArgs(vrw types.ValueReadWriter, col types.Collection, args map[string]interface{}, factory iteratorFactory) (iter interface{}, nomsKey, nomsThrough types.Value, count uint64, singleExactMatch bool) { typ := types.TypeOf(col) length := col.Len() nomsKeyType := typ.Desc.(types.CompoundDesc).ElemTypes[0] if keys, ok := args[keysKey]; ok { slice := keys.([]interface{}) nomsKeys := make(types.ValueSlice, len(slice)) for i, v := range slice { var nomsValue types.Value nomsValue = InputToNomsValue(vrw, v, nomsKeyType) nomsKeys[i] = nomsValue } count = uint64(len(slice)) iter = &mapIteratorForKeys{ m: col.(types.Map), keys: nomsKeys, } return } nomsThrough = getThroughArg(vrw, nomsKeyType, args) count, singleExactMatch = getCountArg(length, args) if key, ok := args[keyKey]; ok { nomsKey = InputToNomsValue(vrw, key, nomsKeyType) iter = factory.IteratorFrom(nomsKey) } else if at, ok := args[atKey]; ok { idx := at.(int) if idx < 0 { idx = 0 } else if uint64(idx) > length { count = 0 return } iter = factory.IteratorAt(uint64(idx)) } else if count == 1 && !singleExactMatch { // no key, no at, no through, but a count:1 iter = factory.First() } else { iter = factory.IteratorAt(0) } return } type mapAppender func(slice []interface{}, k, v types.Value) []interface{} type mapiter interface { Valid() bool Entry() (k, v types.Value) Next() bool } func getMapElements(vrw types.ValueReadWriter, v types.Value, args map[string]interface{}, app mapAppender) (interface{}, error) { m := v.(types.Map) iter, nomsKey, nomsThrough, count, singleExactMatch := getCollectionArgs(vrw, m, args, iteratorFactory{ IteratorFrom: func(from types.Value) interface{} { return m.IteratorFrom(from) }, IteratorAt: func(at uint64) interface{} { return m.IteratorAt(at) }, First: func() interface{} { return &mapFirstIterator{m: &m} }, }) if count == 0 { return ([]interface{})(nil), nil } mapIter := iter.(mapiter) values := make([]interface{}, 0, count) for i := uint64(0); i < count; i++ { if !mapIter.Valid() { break } k, v := mapIter.Entry() if singleExactMatch { if nomsKey.Equals(k) { values = app(values, k, v) } break } if nomsThrough != nil { if !nomsThrough.Less(k) { values = app(values, k, v) } else { break } } else { values = app(values, k, v) } mapIter.Next() } return values, nil } func getCountArg(count uint64, args map[string]interface{}) (c uint64, singleExactMatch bool) { if c, ok := args[countKey]; ok { c := c.(int) if c <= 0 { return 0, false } return uint64(c), false } // If we have key and no count/through we use count 1 _, hasKey := args[keyKey] _, hasThrough := args[throughKey] if hasKey && !hasThrough { return uint64(1), true } return count, false } func getThroughArg(vrw types.ValueReadWriter, nomsKeyType *types.Type, args map[string]interface{}) types.Value { if through, ok := args[throughKey]; ok { return InputToNomsValue(vrw, through, nomsKeyType) } return nil } type iteratorFactory struct { IteratorFrom func(from types.Value) interface{} IteratorAt func(at uint64) interface{} First func() interface{} } type mapEntry struct { key, value types.Value } // Map data must be returned as a list of key-value pairs. Each unique keyType:valueType is // represented as a graphql // // type Entry { // key: ! // value: ! // } func (tc *TypeConverter) mapEntryToGraphQLObject(keyType, valueType graphql.Type, nomsKeyType, nomsValueType *types.Type) graphql.Type { return graphql.NewNonNull(graphql.NewObject(graphql.ObjectConfig{ Name: fmt.Sprintf("%s%sEntry", tc.getTypeName(nomsKeyType), tc.getTypeName(nomsValueType)), Fields: graphql.FieldsThunk(func() graphql.Fields { return graphql.Fields{ keyKey: &graphql.Field{ Type: keyType, Resolve: func(p graphql.ResolveParams) (interface{}, error) { entry := p.Source.(mapEntry) return MaybeGetScalar(entry.key), nil }, }, valueKey: &graphql.Field{ Type: valueType, Resolve: func(p graphql.ResolveParams) (interface{}, error) { entry := p.Source.(mapEntry) return MaybeGetScalar(entry.value), nil }, }, } }), })) } func (tc *TypeConverter) mapEntryToGraphQLInputObject(keyType, valueType graphql.Input, nomsKeyType, nomsValueType *types.Type) graphql.Input { return graphql.NewNonNull(graphql.NewInputObject(graphql.InputObjectConfig{ Name: fmt.Sprintf("%s%sEntryInput", tc.getInputTypeName(nomsKeyType), tc.getInputTypeName(nomsValueType)), Fields: graphql.InputObjectConfigFieldMapThunk(func() graphql.InputObjectConfigFieldMap { return graphql.InputObjectConfigFieldMap{ keyKey: &graphql.InputObjectFieldConfig{ Type: graphql.NewNonNull(keyType), }, valueKey: &graphql.InputObjectFieldConfig{ Type: graphql.NewNonNull(valueType), }, } }), })) } // DefaultNameFunc returns the GraphQL type name for a Noms type. func DefaultNameFunc(nomsType *types.Type, isInputType bool) string { if isInputType { return GetInputTypeName(nomsType) } return GetTypeName(nomsType) } // GetTypeName provides a unique type name that is used by GraphQL. func GetTypeName(nomsType *types.Type) string { return getTypeName(nomsType, "") } // GetInputTypeName returns a type name that is unique and useful for GraphQL // input types. func GetInputTypeName(nomsType *types.Type) string { return getTypeName(nomsType, "Input") } func getTypeName(nomsType *types.Type, suffix string) string { switch nomsType.TargetKind() { case types.BoolKind: return "Boolean" case types.NumberKind: return "Number" case types.StringKind: return "String" case types.BlobKind: return "Blob" case types.ValueKind: return "Value" case types.ListKind: nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] if isEmptyNomsUnion(nomsValueType) { return "EmptyList" } return fmt.Sprintf("%sList%s", GetTypeName(nomsValueType), suffix) case types.MapKind: nomsKeyType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[1] if isEmptyNomsUnion(nomsKeyType) { d.Chk.True(isEmptyNomsUnion(nomsValueType)) return "EmptyMap" } return fmt.Sprintf("%sTo%sMap%s", GetTypeName(nomsKeyType), GetTypeName(nomsValueType), suffix) case types.RefKind: return fmt.Sprintf("%sRef%s", GetTypeName(nomsType.Desc.(types.CompoundDesc).ElemTypes[0]), suffix) case types.SetKind: nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] if isEmptyNomsUnion(nomsValueType) { return "EmptySet" } return fmt.Sprintf("%sSet%s", GetTypeName(nomsValueType), suffix) case types.StructKind: // GraphQL Name cannot start with a number. // GraphQL type names must be globally unique. return fmt.Sprintf("%s%s_%s", nomsType.Desc.(types.StructDesc).Name, suffix, nomsType.Hash().String()[:6]) case types.TypeKind: // GraphQL Name cannot start with a number. // TODO: https://github.com/attic-labs/noms/issues/3155 return fmt.Sprintf("Type%s_%s", suffix, nomsType.Hash().String()[:6]) case types.UnionKind: unionMemberTypes := nomsType.Desc.(types.CompoundDesc).ElemTypes names := make([]string, len(unionMemberTypes)) for i, unionMemberType := range unionMemberTypes { names[i] = GetTypeName(unionMemberType) } return strings.Join(names, "Or") + suffix case types.CycleKind: return "Cycle" default: panic(fmt.Sprintf("(GetTypeName) not reached: %s", nomsType.Describe())) } } func argsWithSize() graphql.Fields { return graphql.Fields{ sizeKey: &graphql.Field{ Type: graphql.Float, Resolve: func(p graphql.ResolveParams) (interface{}, error) { c := p.Source.(types.Collection) return MaybeGetScalar(types.Number(c.Len())), nil }, }, } } func (tc *TypeConverter) listAndSetToGraphQLObject(nomsType *types.Type) *graphql.Object { nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] var listType, valueType graphql.Type var keyInputType graphql.Input var keyInputError error if !isEmptyNomsUnion(nomsValueType) { valueType = tc.nomsTypeToGraphQLType(nomsValueType, false) keyInputType, keyInputError = tc.nomsTypeToGraphQLInputType(nomsValueType) listType = graphql.NewNonNull(valueType) } return graphql.NewObject(graphql.ObjectConfig{ Name: tc.getTypeName(nomsType), Fields: graphql.FieldsThunk(func() graphql.Fields { fields := argsWithSize() if listType != nil { var args graphql.FieldConfigArgument var getSubvalues getSubvaluesFn switch nomsType.TargetKind() { case types.ListKind: args = listArgs getSubvalues = getListElements case types.SetKind: args = graphql.FieldConfigArgument{ atKey: &graphql.ArgumentConfig{Type: graphql.Int}, countKey: &graphql.ArgumentConfig{Type: graphql.Int}, } if keyInputError == nil { args[keyKey] = &graphql.ArgumentConfig{Type: keyInputType} args[throughKey] = &graphql.ArgumentConfig{Type: keyInputType} } getSubvalues = getSetElements } valuesField := &graphql.Field{ Type: graphql.NewList(listType), Args: args, Resolve: func(p graphql.ResolveParams) (interface{}, error) { c := p.Source.(types.Collection) vrw := p.Context.Value(vrwKey).(types.ValueReadWriter) return getSubvalues(vrw, c, p.Args), nil }, } fields[valuesKey] = valuesField fields[elementsKey] = valuesField } return fields }), }) } func (tc *TypeConverter) mapToGraphQLObject(nomsType *types.Type) *graphql.Object { return graphql.NewObject(graphql.ObjectConfig{ Name: tc.getTypeName(nomsType), Fields: graphql.FieldsThunk(func() graphql.Fields { nomsKeyType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] nomsValueType := nomsType.Desc.(types.CompoundDesc).ElemTypes[1] isEmptyMap := isEmptyNomsUnion(nomsKeyType) || isEmptyNomsUnion(nomsValueType) fields := argsWithSize() if !isEmptyMap { keyType := tc.nomsTypeToGraphQLType(nomsKeyType, false) keyInputType, keyInputError := tc.nomsTypeToGraphQLInputType(nomsKeyType) valueType := tc.nomsTypeToGraphQLType(nomsValueType, false) entryType := tc.mapEntryToGraphQLObject(graphql.NewNonNull(keyType), valueType, nomsKeyType, nomsValueType) args := graphql.FieldConfigArgument{ atKey: &graphql.ArgumentConfig{Type: graphql.Int}, countKey: &graphql.ArgumentConfig{Type: graphql.Int}, } if keyInputError == nil { args[keyKey] = &graphql.ArgumentConfig{Type: keyInputType} args[keysKey] = &graphql.ArgumentConfig{Type: graphql.NewList(graphql.NewNonNull(keyInputType))} args[throughKey] = &graphql.ArgumentConfig{Type: keyInputType} } entriesField := &graphql.Field{ Type: graphql.NewList(entryType), Args: args, Resolve: func(p graphql.ResolveParams) (interface{}, error) { c := p.Source.(types.Collection) vrw := p.Context.Value(vrwKey).(types.ValueReadWriter) return getMapElements(vrw, c, p.Args, mapAppendEntry) }, } fields[entriesKey] = entriesField fields[elementsKey] = entriesField fields[keysKey] = &graphql.Field{ Type: graphql.NewList(keyType), Args: args, Resolve: func(p graphql.ResolveParams) (interface{}, error) { c := p.Source.(types.Collection) vrw := p.Context.Value(vrwKey).(types.ValueReadWriter) return getMapElements(vrw, c, p.Args, mapAppendKey) }, } fields[valuesKey] = &graphql.Field{ Type: graphql.NewList(valueType), Args: args, Resolve: func(p graphql.ResolveParams) (interface{}, error) { c := p.Source.(types.Collection) vrw := p.Context.Value(vrwKey).(types.ValueReadWriter) return getMapElements(vrw, c, p.Args, mapAppendValue) }, } } return fields }), }) } func mapAppendKey(slice []interface{}, k, v types.Value) []interface{} { return append(slice, MaybeGetScalar(k)) } func mapAppendValue(slice []interface{}, k, v types.Value) []interface{} { return append(slice, MaybeGetScalar(v)) } func mapAppendEntry(slice []interface{}, k, v types.Value) []interface{} { return append(slice, mapEntry{k, v}) } // Refs are represented as structs: // // type Entry { // targetHash: String! // targetValue: ! // } func (tc *TypeConverter) refToGraphQLObject(nomsType *types.Type) *graphql.Object { return graphql.NewObject(graphql.ObjectConfig{ Name: tc.getTypeName(nomsType), Fields: graphql.FieldsThunk(func() graphql.Fields { nomsTargetType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] targetType := tc.nomsTypeToGraphQLType(nomsTargetType, false) return graphql.Fields{ targetHashKey: &graphql.Field{ Type: graphql.NewNonNull(graphql.String), Resolve: func(p graphql.ResolveParams) (interface{}, error) { r := p.Source.(types.Ref) return MaybeGetScalar(types.String(r.TargetHash().String())), nil }, }, targetValueKey: &graphql.Field{ Type: targetType, Resolve: func(p graphql.ResolveParams) (interface{}, error) { r := p.Source.(types.Ref) return MaybeGetScalar(r.TargetValue(p.Context.Value(vrwKey).(types.ValueReader))), nil }, }, } }), }) } func MaybeGetScalar(v types.Value) interface{} { switch v.(type) { case types.Bool: return bool(v.(types.Bool)) case types.Number: return float64(v.(types.Number)) case types.String: return string(v.(types.String)) case *types.Type, types.Blob: // TODO: https://github.com/attic-labs/noms/issues/3155 return v.Hash() } return v } // InputToNomsValue converts a GraphQL input value (as used in arguments and // variables) to a Noms value. func InputToNomsValue(vrw types.ValueReadWriter, arg interface{}, nomsType *types.Type) types.Value { switch nomsType.TargetKind() { case types.BoolKind: return types.Bool(arg.(bool)) case types.NumberKind: if i, ok := arg.(int); ok { return types.Number(i) } return types.Number(arg.(float64)) case types.StringKind: return types.String(arg.(string)) case types.ListKind, types.SetKind: elemType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] sl := arg.([]interface{}) vs := make(types.ValueSlice, len(sl)) for i, v := range sl { vs[i] = InputToNomsValue(vrw, v, elemType) } if nomsType.TargetKind() == types.ListKind { return types.NewList(vrw, vs...) } return types.NewSet(vrw, vs...) case types.MapKind: // Maps are passed as [{key: K, value: V}, ...] keyType := nomsType.Desc.(types.CompoundDesc).ElemTypes[0] valType := nomsType.Desc.(types.CompoundDesc).ElemTypes[1] sl := arg.([]interface{}) kvs := make(types.ValueSlice, 2*len(sl)) for i, v := range sl { v := v.(map[string]interface{}) kvs[2*i] = InputToNomsValue(vrw, v["key"], keyType) kvs[2*i+1] = InputToNomsValue(vrw, v["value"], valType) } return types.NewMap(vrw, kvs...) case types.StructKind: desc := nomsType.Desc.(types.StructDesc) data := make(types.StructData, desc.Len()) m := arg.(map[string]interface{}) desc.IterFields(func(name string, t *types.Type, optional bool) { if m[name] != nil || !optional { data[name] = InputToNomsValue(vrw, m[name], t) } }) return types.NewStruct(desc.Name, data) } panic("not yet implemented") } type mapIteratorForKeys struct { m types.Map keys types.ValueSlice idx int } func (it *mapIteratorForKeys) Valid() bool { return it.idx < len(it.keys) } func (it *mapIteratorForKeys) Entry() (k, v types.Value) { if it.idx >= len(it.keys) { return } k = it.keys[it.idx] v = it.m.Get(k) return } func (it *mapIteratorForKeys) Next() bool { it.idx++ return it.Valid() } type setFirstIterator struct { s types.Set } func (it *setFirstIterator) Next() types.Value { return it.s.First() } func (it *setFirstIterator) SkipTo(v types.Value) types.Value { panic("not implemented") } type mapFirstIterator struct { m *types.Map } func (it *mapFirstIterator) Valid() bool { return it.m != nil && !it.m.Empty() } func (it *mapFirstIterator) Entry() (types.Value, types.Value) { if it.m == nil { return nil, nil } k, v := it.m.First() it.m = nil return k, v } func (it *mapFirstIterator) Next() bool { return false } ================================================ FILE: go/nomdl/lexer.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nomdl import ( "fmt" "text/scanner" ) type lexer struct { scanner *scanner.Scanner peekToken rune } func (lex *lexer) next() rune { if lex.peekToken != 0 { tok := lex.peekToken lex.peekToken = 0 return tok } return lex.scanner.Scan() } func (lex *lexer) peek() rune { if lex.peekToken != 0 { return lex.peekToken } tok := lex.scanner.Scan() lex.peekToken = tok return tok } func (lex *lexer) pos() scanner.Position { if lex.peekToken != 0 { panic("Cannot use pos after peek") } return lex.scanner.Pos() } func (lex *lexer) tokenText() string { if lex.peekToken != 0 { panic("Cannot use tokenText after peek") } return lex.scanner.TokenText() } func (lex *lexer) eat(expected rune) rune { tok := lex.next() lex.check(expected, tok) return tok } func (lex *lexer) eatIf(expected rune) bool { tok := lex.peek() if tok == expected { lex.next() return true } return false } func (lex *lexer) check(expected, actual rune) { if actual != expected { lex.tokenMismatch(expected, actual) } } func (lex *lexer) tokenMismatch(expected, actual rune) { raiseSyntaxError(fmt.Sprintf("Unexpected token %s, expected %s", scanner.TokenString(actual), scanner.TokenString(expected)), lex.pos()) } func (lex *lexer) unexpectedToken(actual rune) { raiseSyntaxError(fmt.Sprintf("Unexpected token %s", scanner.TokenString(actual)), lex.pos()) } func raiseSyntaxError(msg string, pos scanner.Position) { panic(syntaxError{ msg: msg, pos: pos, }) } type syntaxError struct { msg string pos scanner.Position } func (e syntaxError) Error() string { return fmt.Sprintf("%s, %s", e.msg, e.pos) } func catchSyntaxError(f func()) (errRes error) { defer func() { if err := recover(); err != nil { if err, ok := err.(syntaxError); ok { errRes = err return } panic(err) } }() f() return } ================================================ FILE: go/nomdl/parser.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nomdl import ( "bytes" "fmt" "io" "strconv" "strings" "text/scanner" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // Parser provides ways to parse Noms types. type Parser struct { lex *lexer vrw types.ValueReadWriter } // ParserOptions allows passing options into New. type ParserOptions struct { // Filename is the name of the file we are currently parsing. Filename string } // New creates a new Parser. func New(vrw types.ValueReadWriter, r io.Reader, options ParserOptions) *Parser { s := scanner.Scanner{} s.Init(r) s.Filename = options.Filename s.Mode = scanner.ScanIdents | scanner.ScanComments | scanner.SkipComments | scanner.ScanFloats | scanner.ScanStrings // | scanner.ScanRawStrings s.Error = func(s *scanner.Scanner, msg string) {} lex := lexer{scanner: &s} return &Parser{&lex, vrw} } // ParseType parses a string describing a Noms type. func ParseType(code string) (typ *types.Type, err error) { p := New(nil, strings.NewReader(code), ParserOptions{}) err = catchSyntaxError(func() { typ = p.parseType() p.ensureAtEnd() }) return } // MustParseType parses a string describing a Noms type and panics if there // is an error. func MustParseType(code string) *types.Type { typ, err := ParseType(code) d.PanicIfError(err) return typ } // Parse parses a string describing a Noms value. func Parse(vrw types.ValueReadWriter, code string) (v types.Value, err error) { p := New(vrw, strings.NewReader(code), ParserOptions{}) err = catchSyntaxError(func() { v = p.parseValue() p.ensureAtEnd() }) return } // MustParse parses a string describing a Noms value and panics if there // is an error. func MustParse(vrw types.ValueReadWriter, code string) types.Value { v, err := Parse(vrw, code) d.PanicIfError(err) return v } func (p *Parser) ensureAtEnd() { p.lex.eat(scanner.EOF) } // Type : // TypeWithoutUnion (`|` TypeWithoutUnion)* // // TypeWithoutUnion : // `Blob` // `Bool` // `Number` // `String` // `Type` // `Value` // CycleType // ListType // MapType // RefType // SetType // StructType // // CycleType : // `Cycle` `<` StructName `>` // // ListType : // `List` `<` Type? `>` // // MapType : // `Map` `<` (Type `,` Type)? `>` // // RefType : // `Set` `<` Type `>` // // SetType : // `Set` `<` Type? `>` // // StructType : // `Struct` StructName? `{` StructTypeFields? `}` // // StructTypeFields : // StructTypeField // StructTypeField `,` StructTypeFields? // // StructName : // Ident // // StructTypeField : // StructFieldName `?`? `:` Type // // StructFieldName : // Ident func (p *Parser) parseType() *types.Type { tok := p.lex.eat(scanner.Ident) return p.parseTypeWithToken(tok, p.lex.tokenText()) } func (p *Parser) parseTypeWithToken(tok rune, tokenText string) *types.Type { t := p.parseSingleTypeWithToken(tok, tokenText) tok = p.lex.peek() if tok != '|' { return t } unionTypes := []*types.Type{t} for { tok = p.lex.peek() if tok == '|' { p.lex.next() } else { break } unionTypes = append(unionTypes, p.parseSingleType()) } return types.MakeUnionType(unionTypes...) } func (p *Parser) parseSingleType() *types.Type { tok := p.lex.eat(scanner.Ident) return p.parseSingleTypeWithToken(tok, p.lex.tokenText()) } func (p *Parser) parseSingleTypeWithToken(tok rune, tokenText string) *types.Type { switch tokenText { case "Bool": return types.BoolType case "Blob": return types.BlobType case "Number": return types.NumberType case "String": return types.StringType case "Type": return types.TypeType case "Value": return types.ValueType case "Struct": return p.parseStructType() case "Map": return p.parseMapType() case "List": elemType := p.parseSingleElemType(true) return types.MakeListType(elemType) case "Set": elemType := p.parseSingleElemType(true) return types.MakeSetType(elemType) case "Ref": elemType := p.parseSingleElemType(false) return types.MakeRefType(elemType) case "Cycle": return p.parseCycleType() } p.lex.unexpectedToken(tok) return nil } func (p *Parser) parseStructType() *types.Type { tok := p.lex.next() name := "" if tok == scanner.Ident { name = p.lex.tokenText() p.lex.eat('{') } else { p.lex.check('{', tok) } fields := []types.StructField{} for p.lex.peek() != '}' { p.lex.eat(scanner.Ident) fieldName := p.lex.tokenText() optional := p.lex.eatIf('?') p.lex.eat(':') typ := p.parseType() fields = append(fields, types.StructField{ Name: fieldName, Type: typ, Optional: optional, }) if p.lex.eatIf(',') { continue } break } p.lex.eat('}') return types.MakeStructType(name, fields...) } func (p *Parser) parseSingleElemType(allowEmptyUnion bool) *types.Type { p.lex.eat('<') if allowEmptyUnion && p.lex.eatIf('>') { return types.MakeUnionType() } elemType := p.parseType() p.lex.eat('>') return elemType } func (p *Parser) parseCycleType() *types.Type { p.lex.eat('<') p.lex.eat(scanner.Ident) name := p.lex.tokenText() p.lex.eat('>') return types.MakeCycleType(name) } func (p *Parser) parseMapType() *types.Type { var keyType, valueType *types.Type p.lex.eat('<') if p.lex.eatIf('>') { keyType = types.MakeUnionType() valueType = keyType } else { keyType = p.parseType() p.lex.eat(',') valueType = p.parseType() p.lex.eat('>') } return types.MakeMapType(keyType, valueType) } // Value : // Type // Bool // Number // String // List // Set // Map // Struct // // Bool : // `true` // `false` // // Number : // ... // // String : // ... // // List : // `[` Values? `]` // // Values : // Value // Value `,` Values? // // Set : // `set` `{` Values? `}` // // Map : // `map` `{` MapEntries? `}` // // MapEntries : // MapEntry // MapEntry `,` MapEntries? // // MapEntry : // Value `:` Value // // Struct : // `struct` StructName? `{` StructFields? `}` // // StructFields : // StructField // StructField `,` StructFields? // // StructField : // StructFieldName `:` Value func (p *Parser) parseValue() types.Value { tok := p.lex.next() switch tok { case scanner.Ident: switch tokenText := p.lex.tokenText(); tokenText { case "true": return types.Bool(true) case "false": return types.Bool(false) case "set": return p.parseSet() case "map": return p.parseMap() case "struct": return p.parseStruct() case "blob": return p.parseBlob() default: return p.parseTypeWithToken(tok, tokenText) } case scanner.Float, scanner.Int: f := p.parseFloat() return types.Number(f) case '-': if !p.lex.eatIf(scanner.Float) { p.lex.eat(scanner.Int) } n := p.parseFloat() return types.Number(-float64(n)) case '+': if !p.lex.eatIf(scanner.Float) { p.lex.eat(scanner.Int) } return p.parseFloat() case '[': return p.parseList() case scanner.String: s := p.lex.tokenText() s2, err := strconv.Unquote(s) if err != nil { raiseSyntaxError(fmt.Sprintf("Invalid string %s", s), p.lex.pos()) } return types.String(s2) } p.lex.unexpectedToken(tok) panic("unreachable") } func (p *Parser) parseFloat() types.Number { s := p.lex.tokenText() f, _ := strconv.ParseFloat(s, 64) return types.Number(f) } func (p *Parser) parseList() types.List { // already swallowed '[' le := types.NewList(p.vrw).Edit() for p.lex.peek() != ']' { v := p.parseValue() le.Append(v) if p.lex.eatIf(',') { continue } break } p.lex.eat(']') return le.List() } func (p *Parser) parseSet() types.Set { // already swallowed 'set' p.lex.eat('{') se := types.NewSet(p.vrw).Edit() for p.lex.peek() != '}' { v := p.parseValue() se.Insert(v) if p.lex.eatIf(',') { continue } break } p.lex.eat('}') return se.Set() } func (p *Parser) parseMap() types.Map { // already swallowed 'map' p.lex.eat('{') me := types.NewMap(p.vrw).Edit() for p.lex.peek() != '}' { key := p.parseValue() p.lex.eat(':') value := p.parseValue() me.Set(key, value) if p.lex.eatIf(',') { continue } break } p.lex.eat('}') return me.Map() } func (p *Parser) blobString(s string) []byte { raise := func() { raiseSyntaxError(fmt.Sprintf("Invalid blob \"%s\"", s), p.lex.pos()) } if len(s)%2 != 0 { raise() } var buff bytes.Buffer for i := 0; i < len(s); i += 2 { n, err := strconv.ParseUint(s[i:i+2], 16, 8) if err != nil { raise() } buff.WriteByte(uint8(n)) } return buff.Bytes() } func (p *Parser) parseBlob() types.Blob { // already swallowed 'blob' p.lex.eat('{') var buff bytes.Buffer for p.lex.peek() != '}' { tok := p.lex.next() switch tok { case scanner.Ident, scanner.Int: s := p.lex.tokenText() buff.Write(p.blobString(s)) default: p.lex.unexpectedToken(tok) } } p.lex.eat('}') return types.NewBlob(p.vrw, bytes.NewReader(buff.Bytes())) } func (p *Parser) parseStruct() types.Struct { // already swallowed 'struct' tok := p.lex.next() name := "" if tok == scanner.Ident { name = p.lex.tokenText() p.lex.eat('{') } else { p.lex.check('{', tok) } data := types.StructData{} for p.lex.peek() != '}' { p.lex.eat(scanner.Ident) fieldName := p.lex.tokenText() p.lex.eat(':') v := p.parseValue() data[fieldName] = v if p.lex.eatIf(',') { continue } break } p.lex.eat('}') return types.NewStruct(name, data) } ================================================ FILE: go/nomdl/parser_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package nomdl import ( "bytes" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func newTestValueStore() *types.ValueStore { st := &chunks.TestStorage{} return types.NewValueStore(st.NewView()) } func assertParseType(t *testing.T, code string, expected *types.Type) { t.Run(code, func(t *testing.T) { actual, err := ParseType(code) assert.NoError(t, err) assert.True(t, expected.Equals(actual), "Expected: %s, Actual: %s", expected.Describe(), actual.Describe()) }) } func assertParse(t *testing.T, vrw types.ValueReadWriter, code string, expected types.Value) { t.Run(code, func(t *testing.T) { actual, err := Parse(vrw, code) if !assert.NoError(t, err) { return } assert.True(t, expected.Equals(actual), "Expected: %s, Actual: %s", types.EncodedValue(expected), types.EncodedValue(actual)) }) } func assertParseError(t *testing.T, code, msg string) { t.Run(code, func(t *testing.T) { vrw := newTestValueStore() p := New(vrw, strings.NewReader(code), ParserOptions{ Filename: "example", }) err := catchSyntaxError(func() { p.parseValue() }) if assert.Error(t, err) { assert.Equal(t, msg, err.Error()) } }) } func TestSimpleTypes(t *testing.T) { assertParseType(t, "Blob", types.BlobType) assertParseType(t, "Bool", types.BoolType) assertParseType(t, "Number", types.NumberType) assertParseType(t, "String", types.StringType) assertParseType(t, "Value", types.ValueType) assertParseType(t, "Type", types.TypeType) } func TestWhitespace(t *testing.T) { for _, r := range " \t\n\r" { assertParseType(t, string(r)+"Blob", types.BlobType) assertParseType(t, "Blob"+string(r), types.BlobType) } } func TestComments(t *testing.T) { assertParseType(t, "/* */Blob", types.BlobType) assertParseType(t, "Blob/* */", types.BlobType) assertParseType(t, "Blob//", types.BlobType) assertParseType(t, "//\nBlob", types.BlobType) } func TestCompoundTypes(t *testing.T) { assertParseType(t, "List<>", types.MakeListType(types.MakeUnionType())) assertParseType(t, "List", types.MakeListType(types.BoolType)) assertParseError(t, "List", `Unexpected token ",", expected ">", example:1:11`) assertParseError(t, "List", example:1:10`) assertParseError(t, "List<", `Unexpected token EOF, expected Ident, example:1:6`) assertParseError(t, "List", `Unexpected token EOF, expected "<", example:1:5`) assertParseType(t, "Set<>", types.MakeSetType(types.MakeUnionType())) assertParseType(t, "Set", types.MakeSetType(types.BoolType)) assertParseError(t, "Set", `Unexpected token ",", expected ">", example:1:10`) assertParseError(t, "Set", example:1:9`) assertParseError(t, "Set<", `Unexpected token EOF, expected Ident, example:1:5`) assertParseError(t, "Set", `Unexpected token EOF, expected "<", example:1:4`) assertParseError(t, "Ref<>", `Unexpected token ">", expected Ident, example:1:6`) assertParseType(t, "Ref", types.MakeRefType(types.BoolType)) assertParseError(t, "Ref", `Unexpected token ",", expected ">", example:1:12`) assertParseError(t, "Ref", example:1:11`) assertParseError(t, "Ref<", `Unexpected token EOF, expected Ident, example:1:5`) assertParseError(t, "Ref", `Unexpected token EOF, expected "<", example:1:4`) // Cannot use Equals on unresolved cycles. ct := MustParseType("Cycle") assert.Equal(t, ct, types.MakeCycleType("Abc")) assertParseError(t, "Cycle<-123>", `Unexpected token "-", expected Ident, example:1:8`) assertParseError(t, "Cycle<12.3>", `Unexpected token Float, expected Ident, example:1:11`) assertParseError(t, "Cycle<>", `Unexpected token ">", expected Ident, example:1:8`) assertParseError(t, "Cycle<", `Unexpected token EOF, expected Ident, example:1:7`) assertParseError(t, "Cycle", `Unexpected token EOF, expected "<", example:1:6`) assertParseType(t, "Map<>", types.MakeMapType(types.MakeUnionType(), types.MakeUnionType())) assertParseType(t, "Map", types.MakeMapType(types.BoolType, types.StringType)) assertParseError(t, "Map", `Unexpected token ">", expected Ident, example:1:11`) assertParseError(t, "Map<,Bool>", `Unexpected token ",", expected Ident, example:1:6`) assertParseError(t, "Map<,>", `Unexpected token ",", expected Ident, example:1:6`) assertParseError(t, "Map", example:1:14`) assertParseError(t, "Map", types.MakeListType(types.MakeUnionType(types.BoolType, types.NumberType))) assertParseType(t, "Map", types.MakeMapType( types.MakeUnionType(types.BoolType, types.NumberType), types.MakeUnionType(types.BoolType, types.NumberType), ), ) assertParseType(t, `Struct S { x: Number | Bool }`, types.MakeStructTypeFromFields("S", types.FieldMap{ "x": types.MakeUnionType(types.BoolType, types.NumberType), })) assertParseType(t, `Struct S { x: Number | Bool, y: String }`, types.MakeStructTypeFromFields("S", types.FieldMap{ "x": types.MakeUnionType(types.BoolType, types.NumberType), "y": types.StringType, })) assertParseError(t, "Bool |", "Unexpected token EOF, expected Ident, example:1:7") assertParseError(t, "Bool | Number |", "Unexpected token EOF, expected Ident, example:1:16") assertParseError(t, "Bool | | ", `Unexpected token "|", expected Ident, example:1:9`) assertParseError(t, "", `Unexpected token EOF, example:1:1`) } func TestValuePrimitives(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "Number", types.NumberType) assertParse(t, vs, "Number | String", types.MakeUnionType(types.NumberType, types.StringType)) assertParse(t, vs, "true", types.Bool(true)) assertParse(t, vs, "false", types.Bool(false)) assertParse(t, vs, "0", types.Number(0)) assertParse(t, vs, "1", types.Number(1)) assertParse(t, vs, "1.1", types.Number(1.1)) assertParse(t, vs, "1.1e1", types.Number(1.1e1)) assertParse(t, vs, "1e1", types.Number(1e1)) assertParse(t, vs, "1e-1", types.Number(1e-1)) assertParse(t, vs, "1e+1", types.Number(1e+1)) assertParse(t, vs, "+0", types.Number(0)) assertParse(t, vs, "+1", types.Number(1)) assertParse(t, vs, "+1.1", types.Number(1.1)) assertParse(t, vs, "+1.1e1", types.Number(1.1e1)) assertParse(t, vs, "+1e1", types.Number(1e1)) assertParse(t, vs, "+1e-1", types.Number(1e-1)) assertParse(t, vs, "+1e+1", types.Number(1e+1)) assertParse(t, vs, "-0", types.Number(-0)) assertParse(t, vs, "-1", types.Number(-1)) assertParse(t, vs, "-1.1", types.Number(-1.1)) assertParse(t, vs, "-1.1e1", types.Number(-1.1e1)) assertParse(t, vs, "-1e1", types.Number(-1e1)) assertParse(t, vs, "-1e-1", types.Number(-1e-1)) assertParse(t, vs, "-1e+1", types.Number(-1e+1)) assertParse(t, vs, `"a"`, types.String("a")) assertParse(t, vs, `""`, types.String("")) assertParse(t, vs, `"\""`, types.String("\"")) assertParseError(t, `"\"`, "Invalid string \"\\\", example:1:4") assertParseError(t, `"abc`, "Invalid string \"abc, example:1:5") assertParseError(t, `"`, "Invalid string \", example:1:2") assertParseError(t, `" "`, "Invalid string \"\n, example:2:1") assertParseError(t, "`", "Unexpected token \"`\", example:1:2") } func TestValueList(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "[]", types.NewList(vs)) assertParse(t, vs, "[42]", types.NewList(vs, types.Number(42))) assertParse(t, vs, "[42,]", types.NewList(vs, types.Number(42))) assertParseError(t, "[", "Unexpected token EOF, example:1:2") assertParseError(t, "[,", "Unexpected token \",\", example:1:3") assertParseError(t, "[42", "Unexpected token EOF, expected \"]\", example:1:4") assertParseError(t, "[42,", "Unexpected token EOF, example:1:5") assertParseError(t, "[,]", "Unexpected token \",\", example:1:3") assertParse(t, vs, `[42, Bool, ]`, types.NewList(vs, types.Number(42), types.BoolType)) assertParse(t, vs, `[42, Bool ]`, types.NewList(vs, types.Number(42), types.BoolType)) } func TestEmptyValuesInEditors(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "[[]]", types.NewList(vs, types.NewList(vs))) assertParse(t, vs, "[set {}]", types.NewList(vs, types.NewSet(vs))) assertParse(t, vs, "[map {}]", types.NewList(vs, types.NewMap(vs))) assertParse(t, vs, "set {[]}", types.NewSet(vs, types.NewList(vs))) assertParse(t, vs, "set {set {}}", types.NewSet(vs, types.NewSet(vs))) assertParse(t, vs, "set {map {}}", types.NewSet(vs, types.NewMap(vs))) assertParse(t, vs, "map {map {}: map {}}", types.NewMap(vs, types.NewMap(vs), types.NewMap(vs))) assertParse(t, vs, "map {[]: []}", types.NewMap(vs, types.NewList(vs), types.NewList(vs))) assertParse(t, vs, "map {set {}: set {}}", types.NewMap(vs, types.NewSet(vs), types.NewSet(vs))) } func TestValueSet(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "set {}", types.NewSet(vs)) assertParse(t, vs, "set {42}", types.NewSet(vs, types.Number(42))) assertParse(t, vs, "set {42,}", types.NewSet(vs, types.Number(42))) assertParseError(t, "set", "Unexpected token EOF, expected \"{\", example:1:4") assertParseError(t, "set {", "Unexpected token EOF, example:1:6") assertParseError(t, "set {,", "Unexpected token \",\", example:1:7") assertParseError(t, "set {42", "Unexpected token EOF, expected \"}\", example:1:8") assertParseError(t, "set {42,", "Unexpected token EOF, example:1:9") assertParseError(t, "set {,}", "Unexpected token \",\", example:1:7") assertParse(t, vs, `set {42, Bool, }`, types.NewSet(vs, types.Number(42), types.BoolType)) assertParse(t, vs, `set {42, Bool }`, types.NewSet(vs, types.Number(42), types.BoolType)) } func TestValueMap(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "map {}", types.NewMap(vs)) assertParse(t, vs, "map {42: true}", types.NewMap(vs, types.Number(42), types.Bool(true))) assertParse(t, vs, "map {42: true,}", types.NewMap(vs, types.Number(42), types.Bool(true))) assertParseError(t, "map", "Unexpected token EOF, expected \"{\", example:1:4") assertParseError(t, "map {", "Unexpected token EOF, example:1:6") assertParseError(t, "map {,", "Unexpected token \",\", example:1:7") assertParseError(t, "map {42", "Unexpected token EOF, expected \":\", example:1:8") assertParseError(t, "map {42,", "Unexpected token \",\", expected \":\", example:1:9") assertParseError(t, "map {42:", "Unexpected token EOF, example:1:9") assertParseError(t, "map {42: true", "Unexpected token EOF, expected \"}\", example:1:14") assertParseError(t, "map {,}", "Unexpected token \",\", example:1:7") assertParse(t, vs, `map {42: Bool, }`, types.NewMap(vs, types.Number(42), types.BoolType)) assertParse(t, vs, `map {42: Bool }`, types.NewMap(vs, types.Number(42), types.BoolType)) } func TestValueType(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "Bool", types.BoolType) assertParse(t, vs, "Number", types.NumberType) assertParse(t, vs, "String", types.StringType) } func TestValueStruct(t *testing.T) { vs := newTestValueStore() assertParse(t, vs, "struct {}", types.NewStruct("", nil)) assertParseError(t, "struct", "Unexpected token EOF, expected \"{\", example:1:7") assertParseError(t, "struct {", "Unexpected token EOF, expected Ident, example:1:9") assertParse(t, vs, "struct name {}", types.NewStruct("name", nil)) assertParseError(t, "struct name", "Unexpected token EOF, expected \"{\", example:1:12") assertParseError(t, "struct name {", "Unexpected token EOF, expected Ident, example:1:14") assertParse(t, vs, "struct name {a: 42}", types.NewStruct("name", types.StructData{"a": types.Number(42)})) assertParse(t, vs, "struct name {a: 42,}", types.NewStruct("name", types.StructData{"a": types.Number(42)})) assertParseError(t, "struct name {a", "Unexpected token EOF, expected \":\", example:1:15") assertParseError(t, "struct name {a: ", "Unexpected token EOF, example:1:17") assertParseError(t, "struct name {a,", "Unexpected token \",\", expected \":\", example:1:16") assertParseError(t, "struct name {a}", "Unexpected token \"}\", expected \":\", example:1:16") assertParseError(t, "struct name {a: 42", "Unexpected token EOF, expected \"}\", example:1:19") assertParseError(t, "struct name {a: 42,", "Unexpected token EOF, expected Ident, example:1:20") assertParseError(t, "struct name {a:}", "Unexpected token \"}\", example:1:17") assertParse(t, vs, "struct name {b: 42, a: true}", types.NewStruct("name", types.StructData{"b": types.Number(42), "a": types.Bool(true)})) assertParse(t, vs, `struct name { b: 42, a: true, }`, types.NewStruct("name", types.StructData{"b": types.Number(42), "a": types.Bool(true)})) assertParse(t, vs, "struct name {a: Struct {}}", types.NewStruct("name", types.StructData{"a": types.MakeStructType("")})) } func TestValueBlob(t *testing.T) { vs := newTestValueStore() test := func(code string, bs ...byte) { assertParse(t, vs, code, types.NewBlob(vs, bytes.NewBuffer(bs))) } test("blob {}") test("blob {// comment\n}") test("blob {10}", 0x10) test("blob {10/* comment */}", 0x10) test("blob {0000ff}", 0, 0, 0xff) test("blob {00 00 ff}", 0, 0, 0xff) test("blob { 00\n00\nff }", 0, 0, 0xff) test("blob { ffffffff ffffffff ffffffff ffffffff}", 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, ) test("blob { ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff}", 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, ) assertParseError(t, "blob", "Unexpected token EOF, expected \"{\", example:1:5") assertParseError(t, "blob {", "Unexpected token EOF, example:1:7") assertParseError(t, "blob { 00", "Unexpected token EOF, example:1:10") assertParseError(t, "blob {hh}", "Invalid blob \"hh\", example:1:9") assertParseError(t, "blob {0}", "Invalid blob \"0\", example:1:8") assertParseError(t, "blob {00 0}", "Invalid blob \"0\", example:1:11") assertParseError(t, "blob {ff 0 0}", "Invalid blob \"0\", example:1:11") } func TestRoundTrips(t *testing.T) { vs := newTestValueStore() test := func(v types.Value) { code := types.EncodedValue(v) assertParse(t, vs, code, v) } test(types.Number(0)) test(types.Number(42)) test(types.Number(-0)) test(types.Number(-42)) test(types.Number(0.05)) test(types.Number(-0.05)) test(types.Number(1e50)) test(types.Number(-1e50)) test(types.Bool(true)) test(types.Bool(false)) test(types.String("")) test(types.String("a")) test(types.String("\"")) test(types.String("'")) test(types.String("`")) test(types.NewEmptyBlob(vs)) test(types.NewBlob(vs, bytes.NewBufferString("abc"))) test(types.NewList(vs)) test(types.NewList(vs, types.Number(42), types.Bool(true), types.String("abc"))) test(types.NewSet(vs)) test(types.NewSet(vs, types.Number(42), types.Bool(true), types.String("abc"))) test(types.NewMap(vs)) test(types.NewMap(vs, types.Number(42), types.Bool(true), types.String("abc"), types.NewMap(vs))) test(types.NewStruct("", nil)) test(types.NewStruct("Number", nil)) test(types.NewStruct("Number", types.StructData{ "Number": types.NumberType, })) test(types.MakeStructType("S", types.StructField{ Name: "cycle", Type: types.MakeCycleType("S"), Optional: true, })) } ================================================ FILE: go/perf/hash-perf-rig/README.md ================================================ This is a performance test rig for the two main types of hashing we do in NOMS - buzhash and sha1. There's also support for sha256, sha512, and blake2b hash functions for comparison. As of May 9, these are the numbers I get on a macbook pro 3.1 GHz Intel Core i7. - no hashing : 3500 MB/s - sha1 only : 470 MB/s - sha256 only : 185 MB/s - sha512 only : 299 MB/s - blake2b only : 604 MB/s - bh only : 139 MB/s - sha1 and bh : 110 MB/s - sha256 and bh : 80 MB/s - sha512 and bh : 96 MB/s - blake2b and bh: 115 MB/s I think that in the no hashing case there is some compiler optimization going on because I note that if all I do is add a loop that reads out bytes one by one from the slice, it drops to 1000MB/s. One outcome of this is that there's no sense going to sha256 - we should just jump straight to sha512. ================================================ FILE: go/perf/hash-perf-rig/main.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "crypto/sha1" "crypto/sha256" "crypto/sha512" "fmt" "hash" "io" "os" "time" "github.com/attic-labs/kingpin" "github.com/codahale/blake2" humanize "github.com/dustin/go-humanize" "github.com/kch42/buzhash" ) func main() { useSHA := kingpin.Flag("use-sha", "=no hashing, 1=sha1, 256=sha256, 512=sha512, blake=blake2b").String() useBH := kingpin.Flag("use-bh", "whether we buzhash the bytes").Bool() bigFile := kingpin.Arg("bigfile", "input file to chunk").Required().String() kingpin.Parse() bh := buzhash.NewBuzHash(64 * 8) f, _ := os.Open(*bigFile) defer f.Close() t0 := time.Now() buf := make([]byte, 4*1024) l := uint64(0) var h hash.Hash if *useSHA == "1" { h = sha1.New() } else if *useSHA == "256" { h = sha256.New() } else if *useSHA == "512" { h = sha512.New() } else if *useSHA == "blake" { h = blake2.NewBlake2B() } for { n, err := f.Read(buf) l += uint64(n) if err == io.EOF { break } s := buf[:n] if h != nil { h.Write(s) } if *useBH { bh.Write(s) } } t1 := time.Now() d := t1.Sub(t0) fmt.Printf("Read %s in %s (%s/s)\n", humanize.Bytes(l), d, humanize.Bytes(uint64(float64(l)/d.Seconds()))) digest := []byte{} if h != nil { fmt.Printf("%x\n", h.Sum(digest)) } } ================================================ FILE: go/perf/suite/suite.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package suite implements a performance test suite for Noms, intended for // measuring and reporting long running tests. // // Usage is similar to testify's suite: // 1. Define a test suite struct which inherits from suite.PerfSuite. // 2. Define methods on that struct that start with the word "Test", optionally // followed by digits, then followed a non-empty capitalized string. // 3. Call suite.Run with an instance of that struct. // 4. Run go test with the -perf flag. // // Flags: // -perf.mem Backs the database by a memory store, instead of nbs. // -perf.prefix Gives the dataset IDs for test results a prefix. // -perf.repeat Sets how many times tests are repeated ("reps"). // -perf.run Only run tests that match a regex (case insensitive). // -perf.testdata Sets a custom path to the Noms testdata directory. // // PerfSuite also supports testify/suite style Setup/TearDown methods: // Setup/TearDownSuite is called exactly once. // Setup/TearDownRep is called for each repetition of the test runs, i.e. -perf.repeat times. // Setup/TearDownTest is called for every test. // // Test results are written to Noms, along with a dump of the environment they were recorded in. // // Test names are derived from that "non-empty capitalized string": "Test" is omitted because it's // redundant, and leading digits are omitted to allow for manual test ordering. For example: // // > cat ./samples/go/csv/csv-import/perf_test.go // type perfSuite { // suite.PerfSuite // } // // func (s *perfSuite) TestFoo() { ... } // func (s *perfSuite) TestZoo() { ... } // func (s *perfSuite) Test01Qux() { ... } // func (s *perfSuite) Test02Bar() { ... } // // func TestPerf(t *testing.T) { // suite.Run("csv-import", t, &perfSuite{}) // } // // > noms serve & // > go test -v ./samples/go/csv/... -perf http://localhost:8000 -perf.repeat 3 // (perf) RUN(1/3) Test01Qux (recorded as "Qux") // (perf) PASS: Test01Qux (5s, paused 15s, total 20s) // (perf) RUN(1/3) Test02Bar (recorded as "Bar") // (perf) PASS: Test02Bar (15s, paused 2s, total 17s) // (perf) RUN(1/3) TestFoo (recorded as "Foo") // (perf) PASS: TestFoo (10s, paused 1s, total 11s) // (perf) RUN(1/3) TestZoo (recorded as "Zoo") // (perf) PASS: TestZoo (1s, paused 42s, total 43s) // ... // // > noms show http://localhost:8000::csv-import // { // environment: ... // tests: [{ // "Bar": {elapsed: 15s, paused: 2s, total: 17s}, // "Foo": {elapsed: 10s, paused: 1s, total: 11s}, // "Qux": {elapsed: 5s, paused: 15s, total: 20s}, // "Zoo": {elapsed: 1s, paused: 42s, total: 43s}, // }, ...] // ... // } package suite import ( "bytes" "flag" "fmt" "io" "io/ioutil" "os" "os/exec" "path" "path/filepath" "reflect" "regexp" "strings" "testing" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/nbs" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" "github.com/shirou/gopsutil/cpu" "github.com/shirou/gopsutil/disk" "github.com/shirou/gopsutil/host" "github.com/shirou/gopsutil/mem" "github.com/stretchr/testify/assert" testifySuite "github.com/stretchr/testify/suite" ) var ( perfFlag = flag.String("perf", "", "The database to write perf tests to. If this isn't specified, perf tests are skipped. If you want a dry run, use \"mem\" as a database") perfMemFlag = flag.Bool("perf.mem", false, "Back the test database by a memory store, not nbs. This will affect test timing, but it's provided in case you're low on disk space") perfPrefixFlag = flag.String("perf.prefix", "", `Prefix for the dataset IDs where results are written. For example, a prefix of "foo/" will write test datasets like "foo/csv-import" instead of just "csv-import"`) perfRepeatFlag = flag.Int("perf.repeat", 1, "The number of times to repeat each perf test") perfRunFlag = flag.String("perf.run", "", "Only run perf tests that match a regular expression") perfTestdataFlag = flag.String("perf.testdata", "", "Path to the noms testdata directory. By default this is ../testdata relative to the noms directory") testNamePattern = regexp.MustCompile("^Test[0-9]*([A-Z].*$)") ) // PerfSuite is the core of the perf testing suite. See package documentation for details. type PerfSuite struct { // T is the testing.T instance set when the suite is passed into Run. T *testing.T // W is the io.Writer to write test output, which only outputs if the verbose flag is set. W io.Writer // AtticLabs is the path to the attic-labs directory (e.g. /path/to/go/src/github.com/attic-labs). AtticLabs string // Testdata is the path to the testdata directory - typically /path/to/go/src/github.com/attic-labs, but it can be overridden with the -perf.testdata flag. Testdata string // Database is a Noms database that tests can use for reading and writing. State is persisted across a single Run of a suite. Database datas.Database // DatabaseSpec is the Noms spec of Database (typically a localhost URL). DatabaseSpec string tempFiles []*os.File tempDirs []string paused time.Duration datasetID string } // SetupRepSuite has a SetupRep method, which runs every repetition of the test, i.e. -perf.repeat times in total. type SetupRepSuite interface { SetupRep() } // TearDownRepSuite has a TearDownRep method, which runs every repetition of the test, i.e. -perf.repeat times in total. type TearDownRepSuite interface { TearDownRep() } type perfSuiteT interface { Suite() *PerfSuite } type environment struct { DiskUsages map[string]disk.UsageStat Cpus map[int]cpu.InfoStat Mem mem.VirtualMemoryStat Host host.InfoStat Partitions map[string]disk.PartitionStat } type timeInfo struct { elapsed, paused, total time.Duration } type testRep map[string]timeInfo type nopWriter struct{} func (r nopWriter) Write(p []byte) (int, error) { return len(p), nil } // Run runs suiteT and writes results to dataset datasetID in the database given by the -perf command line flag. func Run(datasetID string, t *testing.T, suiteT perfSuiteT) { assert := assert.New(t) if !assert.NotEqual("", datasetID) { return } // Piggy-back off the go test -v flag. verboseFlag := flag.Lookup("test.v") assert.NotNil(verboseFlag) verbose := verboseFlag.Value.(flag.Getter).Get().(bool) if *perfFlag == "" { if verbose { fmt.Printf("(perf) Skipping %s, -perf flag not set\n", datasetID) } return } suite := suiteT.Suite() suite.T = t if verbose { suite.W = os.Stdout } else { suite.W = nopWriter{} } gopath := os.Getenv("GOPATH") if !assert.NotEmpty(gopath) { return } suite.AtticLabs = path.Join(gopath, "src", "github.com", "attic-labs") suite.Testdata = *perfTestdataFlag if suite.Testdata == "" { suite.Testdata = path.Join(suite.AtticLabs, "testdata") } // Clean up temporary directories/files last. defer func() { for _, f := range suite.tempFiles { os.Remove(f.Name()) } for _, d := range suite.tempDirs { os.RemoveAll(d) } }() suite.datasetID = datasetID // This is the database the perf test results are written to. sp, err := spec.ForDatabase(*perfFlag) if !assert.NoError(err) { return } defer sp.Close() // List of test runs, each a map of test name => timing info. testReps := make([]testRep, *perfRepeatFlag) // Note: the default value of perfRunFlag is "", which is actually a valid // regular expression that matches everything. perfRunRe, err := regexp.Compile("(?i)" + *perfRunFlag) if !assert.NoError(err, `Invalid regular expression "%s"`, *perfRunFlag) { return } defer func() { db := sp.GetDatabase() reps := make([]types.Value, *perfRepeatFlag) for i, rep := range testReps { timesSlice := types.ValueSlice{} for name, info := range rep { timesSlice = append(timesSlice, types.String(name), types.NewStruct("", types.StructData{ "elapsed": types.Number(info.elapsed.Nanoseconds()), "paused": types.Number(info.paused.Nanoseconds()), "total": types.Number(info.total.Nanoseconds()), })) } reps[i] = types.NewMap(db, timesSlice...) } record := types.NewStruct("", map[string]types.Value{ "environment": suite.getEnvironment(db), "nomsRevision": types.String(suite.getGitHead(path.Join(suite.AtticLabs, "noms"))), "testdataRevision": types.String(suite.getGitHead(suite.Testdata)), "reps": types.NewList(db, reps...), }) ds := db.GetDataset(*perfPrefixFlag + datasetID) _, err := db.CommitValue(ds, record) assert.NoError(err) }() if t, ok := suiteT.(testifySuite.SetupAllSuite); ok { t.SetupSuite() } for repIdx := 0; repIdx < *perfRepeatFlag; repIdx++ { testReps[repIdx] = testRep{} serverHost, stopServerFn := suite.StartRemoteDatabase() suite.DatabaseSpec = serverHost suite.Database = datas.NewDatabase(datas.NewHTTPChunkStore(serverHost, "")) defer suite.Database.Close() if t, ok := suiteT.(SetupRepSuite); ok { t.SetupRep() } for t, mIdx := reflect.TypeOf(suiteT), 0; mIdx < t.NumMethod(); mIdx++ { m := t.Method(mIdx) parts := testNamePattern.FindStringSubmatch(m.Name) if parts == nil { continue } recordName := parts[1] if !perfRunRe.MatchString(recordName) && !perfRunRe.MatchString(m.Name) { continue } if _, ok := testReps[repIdx][recordName]; ok { assert.Fail(`Multiple tests are named "%s"`, recordName) continue } if verbose { fmt.Printf("(perf) RUN(%d/%d) %s (as \"%s\")\n", repIdx+1, *perfRepeatFlag, m.Name, recordName) } if t, ok := suiteT.(testifySuite.SetupTestSuite); ok { t.SetupTest() } start := time.Now() suite.paused = 0 err := callSafe(m.Name, m.Func, suiteT) total := time.Since(start) elapsed := total - suite.paused if verbose && err == nil { fmt.Printf("(perf) PASS: %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total) } else if err != nil { fmt.Printf("(perf) FAIL: %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total) fmt.Println(err) } testReps[repIdx][recordName] = timeInfo{elapsed, suite.paused, total} if t, ok := suiteT.(testifySuite.TearDownTestSuite); ok { t.TearDownTest() } } if t, ok := suiteT.(TearDownRepSuite); ok { t.TearDownRep() } stopServerFn() } if t, ok := suiteT.(testifySuite.TearDownAllSuite); ok { t.TearDownSuite() } } func (suite *PerfSuite) Suite() *PerfSuite { return suite } // NewAssert returns the assert.Assertions instance for this test. func (suite *PerfSuite) NewAssert() *assert.Assertions { return assert.New(suite.T) } // TempFile creates a temporary file, which will be automatically cleaned up by // the perf test suite. Files will be prefixed with the test's dataset ID func (suite *PerfSuite) TempFile() *os.File { f, err := ioutil.TempFile("", suite.tempPrefix()) assert.NoError(suite.T, err) suite.tempFiles = append(suite.tempFiles, f) return f } // TempDir creates a temporary directory, which will be automatically cleaned // up by the perf test suite. Directories will be prefixed with the test's // dataset ID. func (suite *PerfSuite) TempDir() string { d, err := ioutil.TempDir("", suite.tempPrefix()) assert.NoError(suite.T, err) suite.tempDirs = append(suite.tempDirs, d) return d } func (suite *PerfSuite) tempPrefix() string { sep := fmt.Sprintf("%c", os.PathSeparator) return strings.Replace(fmt.Sprintf("perf.%s.", suite.datasetID), sep, ".", -1) } // Pause pauses the test timer while fn is executing. Useful for omitting long setup code (e.g. copying files) from the test elapsed time. func (suite *PerfSuite) Pause(fn func()) { start := time.Now() fn() suite.paused += time.Since(start) } // OpenGlob opens the concatenation of all files that match pattern, returned // as []io.Reader so it can be used immediately with io.MultiReader. // // Large CSV files in testdata are broken up into foo.a, foo.b, etc to get // around GitHub file size restrictions. func (suite *PerfSuite) OpenGlob(pattern ...string) []io.Reader { assert := suite.NewAssert() glob, err := filepath.Glob(path.Join(pattern...)) assert.NoError(err) files := make([]io.Reader, len(glob)) for i, m := range glob { f, err := os.Open(m) assert.NoError(err) files[i] = f } return files } // CloseGlob closes all of the files, designed to be used with OpenGlob. func (suite *PerfSuite) CloseGlob(files []io.Reader) { assert := suite.NewAssert() for _, f := range files { assert.NoError(f.(*os.File).Close()) } } func callSafe(name string, fun reflect.Value, args ...interface{}) error { funArgs := make([]reflect.Value, len(args)) for i, arg := range args { funArgs[i] = reflect.ValueOf(arg) } return d.Try(func() { fun.Call(funArgs) }) } func (suite *PerfSuite) getEnvironment(vrw types.ValueReadWriter) types.Value { assert := suite.NewAssert() env := environment{ DiskUsages: map[string]disk.UsageStat{}, Cpus: map[int]cpu.InfoStat{}, Partitions: map[string]disk.PartitionStat{}, } partitions, err := disk.Partitions(false) assert.NoError(err) for _, p := range partitions { usage, err := disk.Usage(p.Mountpoint) assert.NoError(err) env.DiskUsages[p.Mountpoint] = *usage env.Partitions[p.Device] = p } cpus, err := cpu.Info() assert.NoError(err) for i, c := range cpus { env.Cpus[i] = c } mem, err := mem.VirtualMemory() assert.NoError(err) env.Mem = *mem hostInfo, err := host.Info() assert.NoError(err) env.Host = *hostInfo envStruct, err := marshal.Marshal(vrw, env) assert.NoError(err) return envStruct } func (suite *PerfSuite) getGitHead(dir string) string { stdout := &bytes.Buffer{} cmd := exec.Command("git", "rev-parse", "HEAD") cmd.Stdout = stdout cmd.Dir = dir if err := cmd.Run(); err != nil { return "" } return strings.TrimSpace(stdout.String()) } // StartRemoteDatabase creates a new remote database on an arbitrary free port, // running on a separate goroutine. Returns the hostname that that database was // started on, and a callback to run to shut down the server. // // If the -perf.mem flag is specified, the remote database is hosted in memory, // not on disk (in a temporary nbs directory). // // - Why not use a local database + memory store? // Firstly, because the spec would be "mem", and the spec library doesn't // know how to reuse stores. // Secondly, because it's an unrealistic performance measurement. // // - Why use a remote (HTTP) database? // It's more realistic to exercise the HTTP stack, even if it's just talking // over localhost. // // - Why provide an option for nbs vs memory underlying store? // Again, nbs is more realistic than memory, and in common cases disk // space > memory space. // However, on this developer's laptop, there is // actually very little disk space, and a lot of memory; plus making the // test run a little bit faster locally is nice. func (suite *PerfSuite) StartRemoteDatabase() (host string, stopFn func()) { var chunkStore chunks.ChunkStore if *perfMemFlag { st := &chunks.MemoryStorage{} chunkStore = st.NewView() } else { dbDir := suite.TempDir() chunkStore = nbs.NewLocalStore(dbDir, 128*(1<<20)) } server := datas.NewRemoteDatabaseServer(chunkStore, "0.0.0.0", 0) portChan := make(chan int) server.Ready = func() { portChan <- server.Port() } go server.Run() port := <-portChan host = fmt.Sprintf("http://localhost:%d", port) stopFn = func() { server.Stop() } return } ================================================ FILE: go/perf/suite/suite_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package suite import ( "io/ioutil" "os" "testing" "time" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) type testSuite struct { PerfSuite tempFileName, tempDir string setupTest, tearDownTest int setupRep, tearDownRep int setupSuite, tearDownSuite int foo, bar, abc, def, nothing, testimate int } func (s *testSuite) TestNonEmptyPaths() { assert := s.NewAssert() assert.NotEqual("", s.AtticLabs) assert.NotEqual("", s.Testdata) assert.NotEqual("", s.DatabaseSpec) } func (s *testSuite) TestDatabase() { assert := s.NewAssert() val := types.Bool(true) r := s.Database.WriteValue(val) assert.True(s.Database.ReadValue(r.TargetHash()).Equals(val)) } func (s *testSuite) TestTempFile() { s.tempFileName = s.TempFile().Name() s.tempDir = s.TempDir() } func (s *testSuite) TestGlob() { assert := s.NewAssert() f := s.TempFile() f.Close() create := func(suffix string) { f, err := os.Create(f.Name() + suffix) assert.NoError(err) f.Close() } create("a") create(".a") create(".b") glob := s.OpenGlob(f.Name() + ".*") assert.Equal(2, len(glob)) assert.Equal(f.Name()+".a", glob[0].(*os.File).Name()) assert.Equal(f.Name()+".b", glob[1].(*os.File).Name()) s.CloseGlob(glob) b := make([]byte, 16) _, err := glob[0].Read(b) assert.Error(err) _, err = glob[1].Read(b) assert.Error(err) } func (s *testSuite) TestPause() { s.Pause(func() { s.waitForSmidge() }) } func (s *testSuite) TestFoo() { s.foo++ s.waitForSmidge() } func (s *testSuite) TestBar() { s.bar++ s.waitForSmidge() } func (s *testSuite) Test01Abc() { s.abc++ s.waitForSmidge() } func (s *testSuite) Test02Def() { s.def++ s.waitForSmidge() } func (s *testSuite) testNothing() { s.nothing++ s.waitForSmidge() } func (s *testSuite) Testimate() { s.testimate++ s.waitForSmidge() } func (s *testSuite) SetupTest() { s.setupTest++ } func (s *testSuite) TearDownTest() { s.tearDownTest++ } func (s *testSuite) SetupRep() { s.setupRep++ } func (s *testSuite) TearDownRep() { s.tearDownRep++ } func (s *testSuite) SetupSuite() { s.setupSuite++ } func (s *testSuite) TearDownSuite() { s.tearDownSuite++ } func (s *testSuite) waitForSmidge() { // Tests should call this to make sure the measurement shows up as > 0, not that it shows up as a millisecond. <-time.After(time.Millisecond) } func TestSuite(t *testing.T) { runTestSuite(t, false) } func TestSuiteWithMem(t *testing.T) { t.Skip("Flaky on Jenkins") runTestSuite(t, true) } func runTestSuite(t *testing.T, mem bool) { assert := assert.New(t) // Write test results to our own temporary LDB database. ldbDir, err := ioutil.TempDir("", "suite.TestSuite") assert.NoError(err) defer os.RemoveAll(ldbDir) flagVal, repeatFlagVal, memFlagVal := *perfFlag, *perfRepeatFlag, *perfMemFlag *perfFlag, *perfRepeatFlag, *perfMemFlag = ldbDir, 3, mem defer func() { *perfFlag, *perfRepeatFlag, *perfMemFlag = flagVal, repeatFlagVal, memFlagVal }() s := &testSuite{} Run("ds", t, s) expectedTests := []string{ "Abc", "Bar", "Database", "Def", "Foo", "Glob", "NonEmptyPaths", "Pause", "TempFile", } // The temp file and dir should have been cleaned up. _, err = os.Stat(s.tempFileName) assert.NotNil(err) _, err = os.Stat(s.tempDir) assert.NotNil(err) // The correct number of Setup/TearDown calls should have been run. assert.Equal(1, s.setupSuite) assert.Equal(1, s.tearDownSuite) assert.Equal(*perfRepeatFlag, s.setupRep) assert.Equal(*perfRepeatFlag, s.tearDownRep) assert.Equal(*perfRepeatFlag*len(expectedTests), s.setupTest) assert.Equal(*perfRepeatFlag*len(expectedTests), s.tearDownTest) // The results should have been written to the "ds" dataset. sp, err := spec.ForDataset(ldbDir + "::ds") assert.NoError(err) defer sp.Close() head := sp.GetDataset().HeadValue().(types.Struct) // These tests mostly assert that the structure of the results is correct. Specific values are hard. getOrFail := func(s types.Struct, f string) types.Value { val, ok := s.MaybeGet(f) assert.True(ok) return val } env, ok := getOrFail(head, "environment").(types.Struct) assert.True(ok) getOrFail(env, "diskUsages") getOrFail(env, "cpus") getOrFail(env, "mem") getOrFail(env, "host") getOrFail(env, "partitions") // Todo: re-enable this code once demo-server gets build without CodePipeline // This fails with CodePipeline because the source code is brought into // Jenkins as a zip file rather than as a git repo. //nomsRevision := getOrFail(head, "nomsRevision") //assert.True(ok) //assert.True(string(nomsRevision.(types.String)) != "") //getOrFail(head, "testdataRevision") reps, ok := getOrFail(head, "reps").(types.List) assert.True(ok) assert.Equal(*perfRepeatFlag, int(reps.Len())) reps.IterAll(func(rep types.Value, _ uint64) { i := 0 rep.(types.Map).IterAll(func(k, timesVal types.Value) { if assert.True(i < len(expectedTests)) { assert.Equal(expectedTests[i], string(k.(types.String))) } times := timesVal.(types.Struct) assert.True(getOrFail(times, "elapsed").(types.Number) > 0) assert.True(getOrFail(times, "total").(types.Number) > 0) paused := getOrFail(times, "paused").(types.Number) if k == types.String("Pause") { assert.True(paused > 0) } else { assert.True(paused == 0) } i++ }) assert.Equal(i, len(expectedTests)) }) } func TestPrefixFlag(t *testing.T) { t.Skip("Flaky on Jenkins") assert := assert.New(t) // Write test results to a temporary database. ldbDir, err := ioutil.TempDir("", "suite.TestSuite") assert.NoError(err) defer os.RemoveAll(ldbDir) flagVal, prefixFlagVal := *perfFlag, *perfPrefixFlag *perfFlag, *perfPrefixFlag = ldbDir, "foo/" defer func() { *perfFlag, *perfPrefixFlag = flagVal, prefixFlagVal }() Run("my-prefix/test", t, &PerfSuite{}) // The results should have been written to "foo/my-prefix/test" not "my-prefix/test". sp, err := spec.ForDataset(ldbDir + "::my-prefix/test") assert.NoError(err) defer sp.Close() _, ok := sp.GetDataset().MaybeHead() assert.False(ok) sp, err = spec.ForDataset(ldbDir + "::foo/my-prefix/test") assert.NoError(err) defer sp.Close() _, ok = sp.GetDataset().HeadValue().(types.Struct) assert.True(ok) } func TestRunFlag(t *testing.T) { t.Skip("Flaky on Jenkins") assert := assert.New(t) type expect struct { foo, bar, abc, def, nothing, testimate int } run := func(re string, exp expect) { flagVal, memFlagVal, runFlagVal := *perfFlag, *perfMemFlag, *perfRunFlag *perfFlag, *perfMemFlag, *perfRunFlag = "mem", true, re defer func() { *perfFlag, *perfMemFlag, *perfRunFlag = flagVal, memFlagVal, runFlagVal }() s := testSuite{} Run("test", t, &s) assert.Equal(exp, expect{s.foo, s.bar, s.abc, s.def, s.nothing, s.testimate}) } run("", expect{foo: 1, bar: 1, abc: 1, def: 1}) run(".", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("test", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("^test", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("Test", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("^Test", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("f", expect{foo: 1, def: 1}) run("^f", expect{foo: 1}) run("testf", expect{foo: 1}) run("^testf", expect{foo: 1}) run("testF", expect{foo: 1}) run("^testF", expect{foo: 1}) run("F", expect{foo: 1, def: 1}) run("^F", expect{foo: 1}) run("Testf", expect{foo: 1}) run("^Testf", expect{foo: 1}) run("TestF", expect{foo: 1}) run("^TestF", expect{foo: 1}) run("ef", expect{def: 1}) run("def", expect{def: 1}) run("ddef", expect{}) run("testdef", expect{}) run("test01def", expect{}) run("test02def", expect{def: 1}) run("Test02def", expect{def: 1}) run("test02Def", expect{def: 1}) run("Test02Def", expect{def: 1}) run("z", expect{}) run("testz", expect{}) run("Testz", expect{}) run("[fa]", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("[fb]", expect{foo: 1, bar: 1, abc: 1, def: 1}) run("[fc]", expect{foo: 1, abc: 1, def: 1}) run("test[fa]", expect{foo: 1}) run("test[fb]", expect{foo: 1, bar: 1}) run("test[fc]", expect{foo: 1}) run("Test[fa]", expect{foo: 1}) run("Test[fb]", expect{foo: 1, bar: 1}) run("Test[fc]", expect{foo: 1}) run("foo|bar", expect{foo: 1, bar: 1}) run("FOO|bar", expect{foo: 1, bar: 1}) run("Testfoo|bar", expect{foo: 1, bar: 1}) run("TestFOO|bar", expect{foo: 1, bar: 1}) run("Testfoo|Testbar", expect{foo: 1, bar: 1}) run("TestFOO|Testbar", expect{foo: 1, bar: 1}) run("footest", expect{}) run("nothing", expect{}) } ================================================ FILE: go/sloppy/sloppy.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package sloppy import ( "github.com/attic-labs/noms/go/d" ) const ( maxOffsetPOT = uint16(12) maxTableSize = 1 << 14 maxLength = 1<<12 - 1 tableMask = maxTableSize - 1 shift = uint32(20) ) // TODO: Make this configurable var maxOffset = int(1< maxLength || src[sl.idx] != src[sl.matchOffset+sl.matchLength]) { // End Match if sl.maybeCopy(src) { return // terminate if consumer has "closed" } } // Look for a match if we are beyond the first byte AND either there is no // match yet, OR we are matching, but fewer than 3 bytes have been // matched. The later condition allows for giving up to 2 bytes of a copy // in order to reference a "closer" sequence. Empirical tests on // structured data, suggests this reduces the average offset by ~2/3. if sl.idx > 0 && (!sl.matching || sl.matchLength < 3) { matchPos := int(sl.table[nextHash&tableMask]) if sl.idx > matchPos && src[sl.idx] == src[matchPos] && // filter false positives sl.idx-matchPos <= maxOffset && // don't refer back beyond maxOffset (!sl.matching || matchPos >= sl.matchOffset+4) { // if we are "rematching", ensure the new match is at least 4 bytes closer if sl.matching { // We are dropping an existing match for a closer one. Emit the // matched bytes as literals if sl.dontCopy(src, sl.idx-sl.matchLength, sl.idx) { return // terminate if consumer has "closed" } } // Begin a new match sl.matching = true sl.matchOffset = matchPos sl.matchLength = 0 } } // Store new hashed offset sl.table[nextHash&tableMask] = uint32(sl.idx) if sl.matching { sl.matchLength++ } else { if sl.enc.emitLiteral(src[sl.idx]) { return // terminate if consumer has "closed" } } } } func (sl *Sloppy) Reset() { sl.idx = 0 sl.matching = false sl.matchOffset = 0 sl.matchLength = 0 sl.table = [maxTableSize]uint32{} } // len >= 2^(2 + log2(maxOffset) - log2(maxOffset-off)). IOW, for the first 1/2 // of the maxOffset, a copy must be >= 4. For 1/2 of what remains, a copy must // be >= 8, etc... func copyLongEnough(off, len uint16) bool { d.PanicIfTrue(off == 0) p := uint16(0) x := (1 << maxOffsetPOT) - off for x > 0 { x = x >> 1 p++ } i := maxOffsetPOT - p min := 4 for i > 0 { min = min << 1 i-- } return int(len) >= min } // Emit matches bytes as literals. func (sl *Sloppy) dontCopy(src []byte, from, to int) bool { for ; from < to; from++ { if sl.enc.emitLiteral(src[from]) { return true } } return false } // Emit a copy if the length is sufficient for a given offset func (sl *Sloppy) maybeCopy(src []byte) bool { off, len := uint16(sl.idx-(sl.matchOffset+sl.matchLength)), uint16(sl.matchLength) sl.matching = false sl.matchOffset = 0 sl.matchLength = 0 if !copyLongEnough(off, len) { return sl.dontCopy(src, sl.idx-int(len), sl.idx) } return sl.enc.emitCopy(off, len) } type encoder interface { emitLiteral(b byte) bool emitCopy(offset, length uint16) bool } type binaryEncoder struct { f func(b byte) bool } func (be binaryEncoder) emitLiteral(b byte) bool { return be.f(b) } func (be binaryEncoder) emitCopy(offset, length uint16) bool { // all copies are encoded as 3 bytes. // 12 bits for offset and 12 bits for length // 8 MSBits of offset if be.f(byte(offset >> 4)) { return true } // 4 LSBits offset | 4 MSBits length if be.f(byte(offset<<4) | byte(length>>4)) { return true } // 8 LSBits of length if be.f(byte(length)) { return true } return false } func fbhash(u uint32) uint32 { return (u * 0x1e35a7bd) >> shift } func load32(b []byte, i int) uint32 { b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 } ================================================ FILE: go/sloppy/sloppy_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package sloppy import ( "fmt" "testing" "time" "github.com/golang/snappy" "github.com/stretchr/testify/assert" ) type testSloppyEncoder struct { current []byte enc []interface{} } func (tt *testSloppyEncoder) emitLiteral(b byte) bool { tt.current = append(tt.current, b) return false } func (tt *testSloppyEncoder) emitCopy(offset, length uint16) bool { tt.emitString() tt.enc = append(tt.enc, offset, length) return false } func (tt *testSloppyEncoder) emitString() { if len(tt.current) > 0 { tt.enc = append(tt.enc, string(tt.current)) tt.current = make([]byte, 0) } } func TestCopyLongEnough(t *testing.T) { assert.False(t, copyLongEnough(1, 3)) assert.True(t, copyLongEnough(1, 4)) assert.False(t, copyLongEnough(2047, 3)) assert.True(t, copyLongEnough(2048, 4)) assert.False(t, copyLongEnough(2048+1, 7)) assert.True(t, copyLongEnough(2048+1, 8)) assert.False(t, copyLongEnough(2048+1024+1, 15)) assert.True(t, copyLongEnough(2048+1024+1, 16)) assert.False(t, copyLongEnough(2048+1024+512+1, 31)) assert.True(t, copyLongEnough(2048+1024+512+1, 32)) assert.False(t, copyLongEnough(2048+1024+512+256+1, 63)) assert.True(t, copyLongEnough(2048+1024+512+256+1, 64)) assert.False(t, copyLongEnough(2048+1024+512+256+128+1, 127)) assert.True(t, copyLongEnough(2048+1024+512+256+128+1, 128)) assert.False(t, copyLongEnough(2048+1024+512+256+128+64+1, 255)) assert.True(t, copyLongEnough(2048+1024+512+256+128+64+1, 256)) assert.False(t, copyLongEnough(2048+1024+512+256+128+64+32+1, 511)) assert.True(t, copyLongEnough(2048+1024+512+256+128+64+32+1, 512)) assert.False(t, copyLongEnough(2048+1024+512+256+128+64+32+16+1, 1023)) assert.True(t, copyLongEnough(2048+1024+512+256+128+64+32+16+1, 1024)) assert.False(t, copyLongEnough(2048+1024+512+256+128+64+32+16+8+1, 2047)) assert.True(t, copyLongEnough(2048+1024+512+256+128+64+32+16+8+1, 2048)) assert.False(t, copyLongEnough(2048+1024+512+256+128+64+32+16+8+4+1, 4095)) assert.True(t, copyLongEnough(2048+1024+512+256+128+64+32+16+8+4+1, 4096)) } func TestSloppySimple(t *testing.T) { mo := maxOffset defer func() { maxOffset = mo }() // NOTE: Sloppy always buffers the last three bytes of input. tc := []struct { max int pt string enc []interface{} }{ // Match length not long enough {4, "ababxxxx", []interface{}{"ababx"}}, // Trailing literal {4, "01230123xxxx", []interface{}{"0123", uint16(4), uint16(4), "x"}}, // Match past current {4, "012301230123xxxx", []interface{}{"0123", uint16(4), uint16(8), "x"}}, // Offset to most recent {6, "ABCDxABCDyABCDzzzz", []interface{}{"ABCDx", uint16(5), uint16(4), "y", uint16(5), uint16(4), "z"}}, // Offset beyond max {5, "0123x0123yy0123zzzz", []interface{}{"0123x", uint16(5), uint16(4), "yy0123z"}}, // Prefer closer match {128, "23hello my friend, 12hello my friend, 01hello my friend, 23hello my friendxxxx", []interface{}{"23hello my friend, 12", uint16(19), uint16(17), "01", uint16(19), uint16(17), "23", uint16(19), uint16(15), "x", }}, } for i, c := range tc { t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) { tt := &testSloppyEncoder{ []byte{}, []interface{}{}, } maxOffset = c.max sl := New(nil) sl.enc = tt sl.Update([]byte(c.pt)) tt.emitString() assert.Equal(t, c.enc, tt.enc) }) } } func TestSloppyContinuation(t *testing.T) { mo := maxOffset defer func() { maxOffset = mo }() // NOTE: Sloppy always buffers the last three bytes of input. tc := []struct { max int pt, pt2 string enc []interface{} }{ // Simple, {4, "umborkbork", "umborkborkborkaaaa", []interface{}{"umbork", uint16(4), uint16(8), "a"}}, {8, "umborkbork", "umborkborkxyzborkaaaa", []interface{}{"umbork", uint16(4), uint16(4), "xyz", uint16(7), uint16(4), "a"}}, // Resume indexing {8, "x", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xA", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xAB", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xABC", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xABCD", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xABCDA", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xABCDAB", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xABCDABC", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, {8, "xABCDABCD", "xABCDABCDxxxx", []interface{}{"xABCD", uint16(4), uint16(4), "x"}}, } for i, c := range tc { t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) { tt := &testSloppyEncoder{ []byte{}, []interface{}{}, } maxOffset = c.max sl := New(nil) sl.enc = tt sl.Update([]byte(c.pt)) sl.Update([]byte(c.pt2)) tt.emitString() assert.Equal(t, c.enc, tt.enc) }) } } func TestBinaryEncoder(t *testing.T) { out := []byte{} f := func(b byte) bool { out = append(out, b) return false } b := binaryEncoder{f} b.emitLiteral(5) assert.Equal(t, []byte{5}, out) out = out[0:0] b.emitCopy(0, 5) assert.Equal(t, []byte{0, 0, 5}, out) out = out[0:0] b.emitCopy(5, 5) assert.Equal(t, []byte{0, 80, 5}, out) out = out[0:0] b.emitCopy(4095, 4095) assert.Equal(t, []byte{255, 255, 255}, out) } func TestSnappyVsSloppy(t *testing.T) { orig := []byte(aliceText) t1 := time.Now() snappyComp := snappy.Encode(nil, orig) t2 := time.Now() sloppyBytes := int(0) f := func(b byte) bool { sloppyBytes++ return false } sl := New(f) t3 := time.Now() sl.Update(orig) t4 := time.Now() // Note: it's expected that sloppy doesn't compress as well as snappy - mainly // because it can only refer to copies 4k away, but also because it will // sacrifice a slightly longer copy in order to refer less far away. fmt.Println("Original", len([]byte(aliceText))) fmt.Println("Snappy", len(snappyComp), t2.Sub(t1)) fmt.Println("Sloppy", sloppyBytes, t4.Sub(t3)) assert.Equal(t, int(43104), sloppyBytes) } var aliceText = `Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, ‘and what is the use of a book,’ thought Alice ‘without pictures or conversations?’ So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so very remarkable in that; nor did Alice think it so very much out of the way to hear the Rabbit say to itself, ‘Oh dear! Oh dear! I shall be late!’ (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually took a watch out of its waistcoat-pocket, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. In another moment down went Alice after it, never once considering how in the world she was to get out again. The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled ‘ORANGE MARMALADE’, but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. ‘Well!’ thought Alice to herself, ‘after such a fall as this, I shall think nothing of tumbling down stairs! How brave they’ll all think me at home! Why, I wouldn’t say anything about it, even if I fell off the top of the house!’ (Which was very likely true.) Down, down, down. Would the fall never come to an end! ‘I wonder how many miles I’ve fallen by this time?’ she said aloud. ‘I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—’ (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a very good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) ‘—yes, that’s about the right distance—but then I wonder what Latitude or Longitude I’ve got to?’ (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.) Presently she began again. ‘I wonder if I shall fall right through the earth! How funny it’ll seem to come out among the people that walk with their heads downward! The Antipathies, I think—’ (she was rather glad there was no one listening, this time, as it didn’t sound at all the right word) ‘—but I shall have to ask them what the name of the country is, you know. Please, Ma’am, is this New Zealand or Australia?’ (and she tried to curtsey as she spoke—fancy curtseying as you’re falling through the air! Do you think you could manage it?) ‘And what an ignorant little girl she’ll think me for asking! No, it’ll never do to ask: perhaps I shall see it written up somewhere.’ Down, down, down. There was nothing else to do, so Alice soon began talking again. ‘Dinah’ll miss me very much to-night, I should think!’ (Dinah was the cat.) ‘I hope they’ll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I’m afraid, but you might catch a bat, and that’s very like a mouse, you know. But do cats eat bats, I wonder?’ And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, ‘Do cats eat bats? Do cats eat bats?’ and sometimes, ‘Do bats eat cats?’ for, you see, as she couldn’t answer either question, it didn’t much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, ‘Now, Dinah, tell me the truth: did you ever eat a bat?’ when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over. Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, ‘Oh my ears and whiskers, how late it’s getting!’ She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof. There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again. Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice’s first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted! Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; ‘and even if my head would go through,’ thought poor Alice, ‘it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only knew how to begin.’ For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible. There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, (‘which certainly was not here before,’ said Alice,) and round the neck of the bottle was a paper label, with the words ‘DRINK ME’ beautifully printed on it in large letters. It was all very well to say ‘Drink me,’ but the wise little Alice was not going to do that in a hurry. ‘No, I’ll look first,’ she said, ‘and see whether it’s marked “poison” or not’; for she had read several nice little histories about children who had got burnt, and eaten up by wild beasts and other unpleasant things, all because they would not remember the simple rules their friends had taught them: such as, that a red-hot poker will burn you if you hold it too long; and that if you cut your finger very deeply with a knife, it usually bleeds; and she had never forgotten that, if you drink much from a bottle marked ‘poison,’ it is almost certain to disagree with you, sooner or later. However, this bottle was not marked ‘poison,’ so Alice ventured to taste it, and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and hot buttered toast,) she very soon finished it off. * * * * * * * * * * * * * * * * * * * * ‘What a curious feeling!’ said Alice; ‘I must be shutting up like a telescope.’ And so it was indeed: she was now only ten inches high, and her face brightened up at the thought that she was now the right size for going through the little door into that lovely garden. First, however, she waited for a few minutes to see if she was going to shrink any further: she felt a little nervous about this; ‘for it might end, you know,’ said Alice to herself, ‘in my going out altogether, like a candle. I wonder what I should be like then?’ And she tried to fancy what the flame of a candle is like after the candle is blown out, for she could not remember ever having seen such a thing. After a while, finding that nothing more happened, she decided on going into the garden at once; but, alas for poor Alice! when she got to the door, she found she had forgotten the little golden key, and when she went back to the table for it, she found she could not possibly reach it: she could see it quite plainly through the glass, and she tried her best to climb up one of the legs of the table, but it was too slippery; and when she had tired herself out with trying, the poor little thing sat down and cried. ‘Come, there’s no use in crying like that!’ said Alice to herself, rather sharply; ‘I advise you to leave off this minute!’ She generally gave herself very good advice, (though she very seldom followed it), and sometimes she scolded herself so severely as to bring tears into her eyes; and once she remembered trying to box her own ears for having cheated herself in a game of croquet she was playing against herself, for this curious child was very fond of pretending to be two people. ‘But it’s no use now,’ thought poor Alice, ‘to pretend to be two people! Why, there’s hardly enough of me left to make one respectable person!’ Soon her eye fell on a little glass box that was lying under the table: she opened it, and found in it a very small cake, on which the words ‘EAT ME’ were beautifully marked in currants. ‘Well, I’ll eat it,’ said Alice, ‘and if it makes me grow larger, I can reach the key; and if it makes me grow smaller, I can creep under the door; so either way I’ll get into the garden, and I don’t care which happens!’ She ate a little bit, and said anxiously to herself, ‘Which way? Which way?’, holding her hand on the top of her head to feel which way it was growing, and she was quite surprised to find that she remained the same size: to be sure, this generally happens when one eats cake, but Alice had got so much into the way of expecting nothing but out-of-the-way things to happen, that it seemed quite dull and stupid for life to go on in the common way. So she set to work, and very soon finished off the cake. * * * * * * * * * * * * * * * * * * * * CHAPTER II. The Pool of Tears ‘Curiouser and curiouser!’ cried Alice (she was so much surprised, that for the moment she quite forgot how to speak good English); ‘now I’m opening out like the largest telescope that ever was! Good-bye, feet!’ (for when she looked down at her feet, they seemed to be almost out of sight, they were getting so far off). ‘Oh, my poor little feet, I wonder who will put on your shoes and stockings for you now, dears? I’m sure I shan’t be able! I shall be a great deal too far off to trouble myself about you: you must manage the best way you can;—but I must be kind to them,’ thought Alice, ‘or perhaps they won’t walk the way I want to go! Let me see: I’ll give them a new pair of boots every Christmas.’ And she went on planning to herself how she would manage it. ‘They must go by the carrier,’ she thought; ‘and how funny it’ll seem, sending presents to one’s own feet! And how odd the directions will look! Alice’s Right Foot, Esq. Hearthrug, near The Fender, (with Alice’s love). Oh dear, what nonsense I’m talking!’ Just then her head struck against the roof of the hall: in fact she was now more than nine feet high, and she at once took up the little golden key and hurried off to the garden door. Poor Alice! It was as much as she could do, lying down on one side, to look through into the garden with one eye; but to get through was more hopeless than ever: she sat down and began to cry again. ‘You ought to be ashamed of yourself,’ said Alice, ‘a great girl like you,’ (she might well say this), ‘to go on crying in this way! Stop this moment, I tell you!’ But she went on all the same, shedding gallons of tears, until there was a large pool all round her, about four inches deep and reaching half down the hall. After a time she heard a little pattering of feet in the distance, and she hastily dried her eyes to see what was coming. It was the White Rabbit returning, splendidly dressed, with a pair of white kid gloves in one hand and a large fan in the other: he came trotting along in a great hurry, muttering to himself as he came, ‘Oh! the Duchess, the Duchess! Oh! won’t she be savage if I’ve kept her waiting!’ Alice felt so desperate that she was ready to ask help of any one; so, when the Rabbit came near her, she began, in a low, timid voice, ‘If you please, sir—’ The Rabbit started violently, dropped the white kid gloves and the fan, and skurried away into the darkness as hard as he could go. Alice took up the fan and gloves, and, as the hall was very hot, she kept fanning herself all the time she went on talking: ‘Dear, dear! How queer everything is to-day! And yesterday things went on just as usual. I wonder if I’ve been changed in the night? Let me think: was I the same when I got up this morning? I almost think I can remember feeling a little different. But if I’m not the same, the next question is, Who in the world am I? Ah, that’s the great puzzle!’ And she began thinking over all the children she knew that were of the same age as herself, to see if she could have been changed for any of them. ‘I’m sure I’m not Ada,’ she said, ‘for her hair goes in such long ringlets, and mine doesn’t go in ringlets at all; and I’m sure I can’t be Mabel, for I know all sorts of things, and she, oh! she knows such a very little! Besides, she’s she, and I’m I, and—oh dear, how puzzling it all is! I’ll try if I know all the things I used to know. Let me see: four times five is twelve, and four times six is thirteen, and four times seven is—oh dear! I shall never get to twenty at that rate! However, the Multiplication Table doesn’t signify: let’s try Geography. London is the capital of Paris, and Paris is the capital of Rome, and Rome—no, that’s all wrong, I’m certain! I must have been changed for Mabel! I’ll try and say “How doth the little—“’ and she crossed her hands on her lap as if she were saying lessons, and began to repeat it, but her voice sounded hoarse and strange, and the words did not come the same as they used to do:— ‘How doth the little crocodile Improve his shining tail, And pour the waters of the Nile On every golden scale! ‘How cheerfully he seems to grin, How neatly spread his claws, And welcome little fishes in With gently smiling jaws!’ ‘I’m sure those are not the right words,’ said poor Alice, and her eyes filled with tears again as she went on, ‘I must be Mabel after all, and I shall have to go and live in that poky little house, and have next to no toys to play with, and oh! ever so many lessons to learn! No, I’ve made up my mind about it; if I’m Mabel, I’ll stay down here! It’ll be no use their putting their heads down and saying “Come up again, dear!” I shall only look up and say “Who am I then? Tell me that first, and then, if I like being that person, I’ll come up: if not, I’ll stay down here till I’m somebody else”—but, oh dear!’ cried Alice, with a sudden burst of tears, ‘I do wish they would put their heads down! I am so very tired of being all alone here!’ As she said this she looked down at her hands, and was surprised to see that she had put on one of the Rabbit’s little white kid gloves while she was talking. ‘How can I have done that?’ she thought. ‘I must be growing small again.’ She got up and went to the table to measure herself by it, and found that, as nearly as she could guess, she was now about two feet high, and was going on shrinking rapidly: she soon found out that the cause of this was the fan she was holding, and she dropped it hastily, just in time to avoid shrinking away altogether. ‘That was a narrow escape!’ said Alice, a good deal frightened at the sudden change, but very glad to find herself still in existence; ‘and now for the garden!’ and she ran with all speed back to the little door: but, alas! the little door was shut again, and the little golden key was lying on the glass table as before, ‘and things are worse than ever,’ thought the poor child, ‘for I never was so small as this before, never! And I declare it’s too bad, that it is!’ As she said these words her foot slipped, and in another moment, splash! she was up to her chin in salt water. Her first idea was that she had somehow fallen into the sea, ‘and in that case I can go back by railway,’ she said to herself. (Alice had been to the seaside once in her life, and had come to the general conclusion, that wherever you go to on the English coast you find a number of bathing machines in the sea, some children digging in the sand with wooden spades, then a row of lodging houses, and behind them a railway station.) However, she soon made out that she was in the pool of tears which she had wept when she was nine feet high. ‘I wish I hadn’t cried so much!’ said Alice, as she swam about, trying to find her way out. ‘I shall be punished for it now, I suppose, by being drowned in my own tears! That will be a queer thing, to be sure! However, everything is queer to-day.’ Just then she heard something splashing about in the pool a little way off, and she swam nearer to make out what it was: at first she thought it must be a walrus or hippopotamus, but then she remembered how small she was now, and she soon made out that it was only a mouse that had slipped in like herself. ‘Would it be of any use, now,’ thought Alice, ‘to speak to this mouse? Everything is so out-of-the-way down here, that I should think very likely it can talk: at any rate, there’s no harm in trying.’ So she began: ‘O Mouse, do you know the way out of this pool? I am very tired of swimming about here, O Mouse!’ (Alice thought this must be the right way of speaking to a mouse: she had never done such a thing before, but she remembered having seen in her brother’s Latin Grammar, ‘A mouse—of a mouse—to a mouse—a mouse—O mouse!’) The Mouse looked at her rather inquisitively, and seemed to her to wink with one of its little eyes, but it said nothing. ‘Perhaps it doesn’t understand English,’ thought Alice; ‘I daresay it’s a French mouse, come over with William the Conqueror.’ (For, with all her knowledge of history, Alice had no very clear notion how long ago anything had happened.) So she began again: ‘Ou est ma chatte?’ which was the first sentence in her French lesson-book. The Mouse gave a sudden leap out of the water, and seemed to quiver all over with fright. ‘Oh, I beg your pardon!’ cried Alice hastily, afraid that she had hurt the poor animal’s feelings. ‘I quite forgot you didn’t like cats.’ ‘Not like cats!’ cried the Mouse, in a shrill, passionate voice. ‘Would you like cats if you were me?’ ‘Well, perhaps not,’ said Alice in a soothing tone: ‘don’t be angry about it. And yet I wish I could show you our cat Dinah: I think you’d take a fancy to cats if you could only see her. She is such a dear quiet thing,’ Alice went on, half to herself, as she swam lazily about in the pool, ‘and she sits purring so nicely by the fire, licking her paws and washing her face—and she is such a nice soft thing to nurse—and she’s such a capital one for catching mice—oh, I beg your pardon!’ cried Alice again, for this time the Mouse was bristling all over, and she felt certain it must be really offended. ‘We won’t talk about her any more if you’d rather not.’ ‘We indeed!’ cried the Mouse, who was trembling down to the end of his tail. ‘As if I would talk on such a subject! Our family always hated cats: nasty, low, vulgar things! Don’t let me hear the name again!’ ‘I won’t indeed!’ said Alice, in a great hurry to change the subject of conversation. ‘Are you—are you fond—of—of dogs?’ The Mouse did not answer, so Alice went on eagerly: ‘There is such a nice little dog near our house I should like to show you! A little bright-eyed terrier, you know, with oh, such long curly brown hair! And it’ll fetch things when you throw them, and it’ll sit up and beg for its dinner, and all sorts of things—I can’t remember half of them—and it belongs to a farmer, you know, and he says it’s so useful, it’s worth a hundred pounds! He says it kills all the rats and—oh dear!’ cried Alice in a sorrowful tone, ‘I’m afraid I’ve offended it again!’ For the Mouse was swimming away from her as hard as it could go, and making quite a commotion in the pool as it went. So she called softly after it, ‘Mouse dear! Do come back again, and we won’t talk about cats or dogs either, if you don’t like them!’ When the Mouse heard this, it turned round and swam slowly back to her: its face was quite pale (with passion, Alice thought), and it said in a low trembling voice, ‘Let us get to the shore, and then I’ll tell you my history, and you’ll understand why it is I hate cats and dogs.’ It was high time to go, for the pool was getting quite crowded with the birds and animals that had fallen into it: there were a Duck and a Dodo, a Lory and an Eaglet, and several other curious creatures. Alice led the way, and the whole party swam to the shore. CHAPTER III. A Caucus-Race and a Long Tale They were indeed a queer-looking party that assembled on the bank—the birds with draggled feathers, the animals with their fur clinging close to them, and all dripping wet, cross, and uncomfortable. The first question of course was, how to get dry again: they had a consultation about this, and after a few minutes it seemed quite natural to Alice to find herself talking familiarly with them, as if she had known them all her life. Indeed, she had quite a long argument with the Lory, who at last turned sulky, and would only say, ‘I am older than you, and must know better’; and this Alice would not allow without knowing how old it was, and, as the Lory positively refused to tell its age, there was no more to be said. At last the Mouse, who seemed to be a person of authority among them, called out, ‘Sit down, all of you, and listen to me! I’ll soon make you dry enough!’ They all sat down at once, in a large ring, with the Mouse in the middle. Alice kept her eyes anxiously fixed on it, for she felt sure she would catch a bad cold if she did not get dry very soon. ‘Ahem!’ said the Mouse with an important air, ‘are you all ready? This is the driest thing I know. Silence all round, if you please! “William the Conqueror, whose cause was favoured by the pope, was soon submitted to by the English, who wanted leaders, and had been of late much accustomed to usurpation and conquest. Edwin and Morcar, the earls of Mercia and Northumbria—“’ ‘Ugh!’ said the Lory, with a shiver. ‘I beg your pardon!’ said the Mouse, frowning, but very politely: ‘Did you speak?’ ‘Not I!’ said the Lory hastily. ‘I thought you did,’ said the Mouse. ‘—I proceed. “Edwin and Morcar, the earls of Mercia and Northumbria, declared for him: and even Stigand, the patriotic archbishop of Canterbury, found it advisable—“’ ‘Found what?’ said the Duck. ‘Found it,’ the Mouse replied rather crossly: ‘of course you know what “it” means.’ ‘I know what “it” means well enough, when I find a thing,’ said the Duck: ‘it’s generally a frog or a worm. The question is, what did the archbishop find?’ The Mouse did not notice this question, but hurriedly went on, ‘“—found it advisable to go with Edgar Atheling to meet William and offer him the crown. William’s conduct at first was moderate. But the insolence of his Normans—” How are you getting on now, my dear?’ it continued, turning to Alice as it spoke. ‘As wet as ever,’ said Alice in a melancholy tone: ‘it doesn’t seem to dry me at all.’ ‘In that case,’ said the Dodo solemnly, rising to its feet, ‘I move that the meeting adjourn, for the immediate adoption of more energetic remedies—’ ‘Speak English!’ said the Eaglet. ‘I don’t know the meaning of half those long words, and, what’s more, I don’t believe you do either!’ And the Eaglet bent down its head to hide a smile: some of the other birds tittered audibly. ‘What I was going to say,’ said the Dodo in an offended tone, ‘was, that the best thing to get us dry would be a Caucus-race.’ ‘What is a Caucus-race?’ said Alice; not that she wanted much to know, but the Dodo had paused as if it thought that somebody ought to speak, and no one else seemed inclined to say anything. ‘Why,’ said the Dodo, ‘the best way to explain it is to do it.’ (And, as you might like to try the thing yourself, some winter day, I will tell you how the Dodo managed it.) First it marked out a race-course, in a sort of circle, (‘the exact shape doesn’t matter,’ it said,) and then all the party were placed along the course, here and there. There was no ‘One, two, three, and away,’ but they began running when they liked, and left off when they liked, so that it was not easy to know when the race was over. However, when they had been running half an hour or so, and were quite dry again, the Dodo suddenly called out ‘The race is over!’ and they all crowded round it, panting, and asking, ‘But who has won?’ This question the Dodo could not answer without a great deal of thought, and it sat for a long time with one finger pressed upon its forehead (the position in which you usually see Shakespeare, in the pictures of him), while the rest waited in silence. At last the Dodo said, ‘Everybody has won, and all must have prizes.’ ‘But who is to give the prizes?’ quite a chorus of voices asked. ‘Why, she, of course,’ said the Dodo, pointing to Alice with one finger; and the whole party at once crowded round her, calling out in a confused way, ‘Prizes! Prizes!’ Alice had no idea what to do, and in despair she put her hand in her pocket, and pulled out a box of comfits, (luckily the salt water had not got into it), and handed them round as prizes. There was exactly one a-piece all round. ‘But she must have a prize herself, you know,’ said the Mouse. ‘Of course,’ the Dodo replied very gravely. ‘What else have you got in your pocket?’ he went on, turning to Alice. ‘Only a thimble,’ said Alice sadly. ‘Hand it over here,’ said the Dodo. Then they all crowded round her once more, while the Dodo solemnly presented the thimble, saying ‘We beg your acceptance of this elegant thimble’; and, when it had finished this short speech, they all cheered. Alice thought the whole thing very absurd, but they all looked so grave that she did not dare to laugh; and, as she could not think of anything to say, she simply bowed, and took the thimble, looking as solemn as she could. The next thing was to eat the comfits: this caused some noise and confusion, as the large birds complained that they could not taste theirs, and the small ones choked and had to be patted on the back. However, it was over at last, and they sat down again in a ring, and begged the Mouse to tell them something more. ‘You promised to tell me your history, you know,’ said Alice, ‘and why it is you hate—C and D,’ she added in a whisper, half afraid that it would be offended again. ‘Mine is a long and a sad tale!’ said the Mouse, turning to Alice, and sighing. ‘It is a long tail, certainly,’ said Alice, looking down with wonder at the Mouse’s tail; ‘but why do you call it sad?’ And she kept on puzzling about it while the Mouse was speaking, so that her idea of the tale was something like this:— ‘Fury said to a mouse, That he met in the house, “Let us both go to law: I will prosecute you.—Come, I’ll take no denial; We must have a trial: For really this morning I’ve nothing to do.” Said the mouse to the cur, “Such a trial, dear Sir, With no jury or judge, would be wasting our breath.” “I’ll be judge, I’ll be jury,” Said cunning old Fury: “I’ll try the whole cause, and condemn you to death.”’ ‘You are not attending!’ said the Mouse to Alice severely. ‘What are you thinking of?’ ‘I beg your pardon,’ said Alice very humbly: ‘you had got to the fifth bend, I think?’ ‘I had not!’ cried the Mouse, sharply and very angrily. ‘A knot!’ said Alice, always ready to make herself useful, and looking anxiously about her. ‘Oh, do let me help to undo it!’ ‘I shall do nothing of the sort,’ said the Mouse, getting up and walking away. ‘You insult me by talking such nonsense!’ ‘I didn’t mean it!’ pleaded poor Alice. ‘But you’re so easily offended, you know!’ The Mouse only growled in reply. ‘Please come back and finish your story!’ Alice called after it; and the others all joined in chorus, ‘Yes, please do!’ but the Mouse only shook its head impatiently, and walked a little quicker. ‘What a pity it wouldn’t stay!’ sighed the Lory, as soon as it was quite out of sight; and an old Crab took the opportunity of saying to her daughter ‘Ah, my dear! Let this be a lesson to you never to lose your temper!’ ‘Hold your tongue, Ma!’ said the young Crab, a little snappishly. ‘You’re enough to try the patience of an oyster!’ ‘I wish I had our Dinah here, I know I do!’ said Alice aloud, addressing nobody in particular. ‘She’d soon fetch it back!’ ‘And who is Dinah, if I might venture to ask the question?’ said the Lory. Alice replied eagerly, for she was always ready to talk about her pet: ‘Dinah’s our cat. And she’s such a capital one for catching mice you can’t think! And oh, I wish you could see her after the birds! Why, she’ll eat a little bird as soon as look at it!’ This speech caused a remarkable sensation among the party. Some of the birds hurried off at once: one old Magpie began wrapping itself up very carefully, remarking, ‘I really must be getting home; the night-air doesn’t suit my throat!’ and a Canary called out in a trembling voice to its children, ‘Come away, my dears! It’s high time you were all in bed!’ On various pretexts they all moved off, and Alice was soon left alone. ‘I wish I hadn’t mentioned Dinah!’ she said to herself in a melancholy tone. ‘Nobody seems to like her, down here, and I’m sure she’s the best cat in the world! Oh, my dear Dinah! I wonder if I shall ever see you any more!’ And here poor Alice began to cry again, for she felt very lonely and low-spirited. In a little while, however, she again heard a little pattering of footsteps in the distance, and she looked up eagerly, half hoping that the Mouse had changed his mind, and was coming back to finish his story. CHAPTER IV. The Rabbit Sends in a Little Bill It was the White Rabbit, trotting slowly back again, and looking anxiously about as it went, as if it had lost something; and she heard it muttering to itself ‘The Duchess! The Duchess! Oh my dear paws! Oh my fur and whiskers! She’ll get me executed, as sure as ferrets are ferrets! Where can I have dropped them, I wonder?’ Alice guessed in a moment that it was looking for the fan and the pair of white kid gloves, and she very good-naturedly began hunting about for them, but they were nowhere to be seen—everything seemed to have changed since her swim in the pool, and the great hall, with the glass table and the little door, had vanished completely. Very soon the Rabbit noticed Alice, as she went hunting about, and called out to her in an angry tone, ‘Why, Mary Ann, what are you doing out here? Run home this moment, and fetch me a pair of gloves and a fan! Quick, now!’ And Alice was so much frightened that she ran off at once in the direction it pointed to, without trying to explain the mistake it had made. ‘He took me for his housemaid,’ she said to herself as she ran. ‘How surprised he’ll be when he finds out who I am! But I’d better take him his fan and gloves—that is, if I can find them.’ As she said this, she came upon a neat little house, on the door of which was a bright brass plate with the name ‘W. RABBIT’ engraved upon it. She went in without knocking, and hurried upstairs, in great fear lest she should meet the real Mary Ann, and be turned out of the house before she had found the fan and gloves. ‘How queer it seems,’ Alice said to herself, ‘to be going messages for a rabbit! I suppose Dinah’ll be sending me on messages next!’ And she began fancying the sort of thing that would happen: ‘“Miss Alice! Come here directly, and get ready for your walk!” “Coming in a minute, nurse! But I’ve got to see that the mouse doesn’t get out.” Only I don’t think,’ Alice went on, ‘that they’d let Dinah stop in the house if it began ordering people about like that!’ By this time she had found her way into a tidy little room with a table in the window, and on it (as she had hoped) a fan and two or three pairs of tiny white kid gloves: she took up the fan and a pair of the gloves, and was just going to leave the room, when her eye fell upon a little bottle that stood near the looking-glass. There was no label this time with the words ‘DRINK ME,’ but nevertheless she uncorked it and put it to her lips. ‘I know something interesting is sure to happen,’ she said to herself, ‘whenever I eat or drink anything; so I’ll just see what this bottle does. I do hope it’ll make me grow large again, for really I’m quite tired of being such a tiny little thing!’ It did so indeed, and much sooner than she had expected: before she had drunk half the bottle, she found her head pressing against the ceiling, and had to stoop to save her neck from being broken. She hastily put down the bottle, saying to herself ‘That’s quite enough—I hope I shan’t grow any more—As it is, I can’t get out at the door—I do wish I hadn’t drunk quite so much!’ Alas! it was too late to wish that! She went on growing, and growing, and very soon had to kneel down on the floor: in another minute there was not even room for this, and she tried the effect of lying down with one elbow against the door, and the other arm curled round her head. Still she went on growing, and, as a last resource, she put one arm out of the window, and one foot up the chimney, and said to herself ‘Now I can do no more, whatever happens. What will become of me?’ Luckily for Alice, the little magic bottle had now had its full effect, and she grew no larger: still it was very uncomfortable, and, as there seemed to be no sort of chance of her ever getting out of the room again, no wonder she felt unhappy. ‘It was much pleasanter at home,’ thought poor Alice, ‘when one wasn’t always growing larger and smaller, and being ordered about by mice and rabbits. I almost wish I hadn’t gone down that rabbit-hole—and yet—and yet—it’s rather curious, you know, this sort of life! I do wonder what can have happened to me! When I used to read fairy-tales, I fancied that kind of thing never happened, and now here I am in the middle of one! There ought to be a book written about me, that there ought! And when I grow up, I’ll write one—but I’m grown up now,’ she added in a sorrowful tone; ‘at least there’s no room to grow up any more here.’ ‘But then,’ thought Alice, ‘shall I never get any older than I am now? That’ll be a comfort, one way—never to be an old woman—but then—always to have lessons to learn! Oh, I shouldn’t like that!’ ‘Oh, you foolish Alice!’ she answered herself. ‘How can you learn lessons in here? Why, there’s hardly room for you, and no room at all for any lesson-books!’ And so she went on, taking first one side and then the other, and making quite a conversation of it altogether; but after a few minutes she heard a voice outside, and stopped to listen. ‘Mary Ann! Mary Ann!’ said the voice. ‘Fetch me my gloves this moment!’ Then came a little pattering of feet on the stairs. Alice knew it was the Rabbit coming to look for her, and she trembled till she shook the house, quite forgetting that she was now about a thousand times as large as the Rabbit, and had no reason to be afraid of it. Presently the Rabbit came up to the door, and tried to open it; but, as the door opened inwards, and Alice’s elbow was pressed hard against it, that attempt proved a failure. Alice heard it say to itself ‘Then I’ll go round and get in at the window.’ ‘That you won’t’ thought Alice, and, after waiting till she fancied she heard the Rabbit just under the window, she suddenly spread out her hand, and made a snatch in the air. She did not get hold of anything, but she heard a little shriek and a fall, and a crash of broken glass, from which she concluded that it was just possible it had fallen into a cucumber-frame, or something of the sort. Next came an angry voice—the Rabbit’s—‘Pat! Pat! Where are you?’ And then a voice she had never heard before, ‘Sure then I’m here! Digging for apples, yer honour!’ ‘Digging for apples, indeed!’ said the Rabbit angrily. ‘Here! Come and help me out of this!’ (Sounds of more broken glass.) ‘Now tell me, Pat, what’s that in the window?’ ‘Sure, it’s an arm, yer honour!’ (He pronounced it ‘arrum.’) ‘An arm, you goose! Who ever saw one that size? Why, it fills the whole window!’ ‘Sure, it does, yer honour: but it’s an arm for all that.’ ‘Well, it’s got no business there, at any rate: go and take it away!’ There was a long silence after this, and Alice could only hear whispers now and then; such as, ‘Sure, I don’t like it, yer honour, at all, at all!’ ‘Do as I tell you, you coward!’ and at last she spread out her hand again, and made another snatch in the air. This time there were two little shrieks, and more sounds of broken glass. ‘What a number of cucumber-frames there must be!’ thought Alice. ‘I wonder what they’ll do next! As for pulling me out of the window, I only wish they could! I’m sure I don’t want to stay in here any longer!’ She waited for some time without hearing anything more: at last came a rumbling of little cartwheels, and the sound of a good many voices all talking together: she made out the words: ‘Where’s the other ladder?—Why, I hadn’t to bring but one; Bill’s got the other—Bill! fetch it here, lad!—Here, put ‘em up at this corner—No, tie ‘em together first—they don’t reach half high enough yet—Oh! they’ll do well enough; don’t be particular—Here, Bill! catch hold of this rope—Will the roof bear?—Mind that loose slate—Oh, it’s coming down! Heads below!’ (a loud crash)—‘Now, who did that?—It was Bill, I fancy—Who’s to go down the chimney?—Nay, I shan’t! You do it!—That I won’t, then!—Bill’s to go down—Here, Bill! the master says you’re to go down the chimney!’ ‘Oh! So Bill’s got to come down the chimney, has he?’ said Alice to herself. ‘Shy, they seem to put everything upon Bill! I wouldn’t be in Bill’s place for a good deal: this fireplace is narrow, to be sure; but I think I can kick a little!’ She drew her foot as far down the chimney as she could, and waited till she heard a little animal (she couldn’t guess of what sort it was) scratching and scrambling about in the chimney close above her: then, saying to herself ‘This is Bill,’ she gave one sharp kick, and waited to see what would happen next. The first thing she heard was a general chorus of ‘There goes Bill!’ then the Rabbit’s voice along—‘Catch him, you by the hedge!’ then silence, and then another confusion of voices—‘Hold up his head—Brandy now—Don’t choke him—How was it, old fellow? What happened to you? Tell us all about it!’ Last came a little feeble, squeaking voice, (‘That’s Bill,’ thought Alice,) ‘Well, I hardly know—No more, thank ye; I’m better now—but I’m a deal too flustered to tell you—all I know is, something comes at me like a Jack-in-the-box, and up I goes like a sky-rocket!’ ‘So you did, old fellow!’ said the others. ‘We must burn the house down!’ said the Rabbit’s voice; and Alice called out as loud as she could, ‘If you do. I’ll set Dinah at you!’ There was a dead silence instantly, and Alice thought to herself, ‘I wonder what they will do next! If they had any sense, they’d take the roof off.’ After a minute or two, they began moving about again, and Alice heard the Rabbit say, ‘A barrowful will do, to begin with.’ ‘A barrowful of what?’ thought Alice; but she had not long to doubt, for the next moment a shower of little pebbles came rattling in at the window, and some of them hit her in the face. ‘I’ll put a stop to this,’ she said to herself, and shouted out, ‘You’d better not do that again!’ which produced another dead silence. Alice noticed with some surprise that the pebbles were all turning into little cakes as they lay on the floor, and a bright idea came into her head. ‘If I eat one of these cakes,’ she thought, ‘it’s sure to make some change in my size; and as it can’t possibly make me larger, it must make me smaller, I suppose.’ So she swallowed one of the cakes, and was delighted to find that she began shrinking directly. As soon as she was small enough to get through the door, she ran out of the house, and found quite a crowd of little animals and birds waiting outside. The poor little Lizard, Bill, was in the middle, being held up by two guinea-pigs, who were giving it something out of a bottle. They all made a rush at Alice the moment she appeared; but she ran off as hard as she could, and soon found herself safe in a thick wood. ‘The first thing I’ve got to do,’ said Alice to herself, as she wandered about in the wood, ‘is to grow to my right size again; and the second thing is to find my way into that lovely garden. I think that will be the best plan.’ It sounded an excellent plan, no doubt, and very neatly and simply arranged; the only difficulty was, that she had not the smallest idea how to set about it; and while she was peering about anxiously among the trees, a little sharp bark just over her head made her look up in a great hurry. An enormous puppy was looking down at her with large round eyes, and feebly stretching out one paw, trying to touch her. ‘Poor little thing!’ said Alice, in a coaxing tone, and she tried hard to whistle to it; but she was terribly frightened all the time at the thought that it might be hungry, in which case it would be very likely to eat her up in spite of all her coaxing. Hardly knowing what she did, she picked up a little bit of stick, and held it out to the puppy; whereupon the puppy jumped into the air off all its feet at once, with a yelp of delight, and rushed at the stick, and made believe to worry it; then Alice dodged behind a great thistle, to keep herself from being run over; and the moment she appeared on the other side, the puppy made another rush at the stick, and tumbled head over heels in its hurry to get hold of it; then Alice, thinking it was very like having a game of play with a cart-horse, and expecting every moment to be trampled under its feet, ran round the thistle again; then the puppy began a series of short charges at the stick, running a very little way forwards each time and a long way back, and barking hoarsely all the while, till at last it sat down a good way off, panting, with its tongue hanging out of its mouth, and its great eyes half shut. This seemed to Alice a good opportunity for making her escape; so she set off at once, and ran till she was quite tired and out of breath, and till the puppy’s bark sounded quite faint in the distance. ‘And yet what a dear little puppy it was!’ said Alice, as she leant against a buttercup to rest herself, and fanned herself with one of the leaves: ‘I should have liked teaching it tricks very much, if—if I’d only been the right size to do it! Oh dear! I’d nearly forgotten that I’ve got to grow up again! Let me see—how is it to be managed? I suppose I ought to eat or drink something or other; but the great question is, what?’ The great question certainly was, what? Alice looked all round her at the flowers and the blades of grass, but she did not see anything that looked like the right thing to eat or drink under the circumstances. There was a large mushroom growing near her, about the same height as herself; and when she had looked under it, and on both sides of it, and behind it, it occurred to her that she might as well look and see what was on the top of it. She stretched herself up on tiptoe, and peeped over the edge of the mushroom, and her eyes immediately met those of a large caterpillar, that was sitting on the top with its arms folded, quietly smoking a long hookah, and taking not the smallest notice of her or of anything else. CHAPTER V. Advice from a Caterpillar The Caterpillar and Alice looked at each other for some time in silence: at last the Caterpillar took the hookah out of its mouth, and addressed her in a languid, sleepy voice. ‘Who are you?’ said the Caterpillar. This was not an encouraging opening for a conversation. Alice replied, rather shyly, ‘I—I hardly know, sir, just at present—at least I know who I was when I got up this morning, but I think I must have been changed several times since then.’ ‘What do you mean by that?’ said the Caterpillar sternly. ‘Explain yourself!’ ‘I can’t explain myself, I’m afraid, sir’ said Alice, ‘because I’m not myself, you see.’ ‘I don’t see,’ said the Caterpillar. ‘I’m afraid I can’t put it more clearly,’ Alice replied very politely, ‘for I can’t understand it myself to begin with; and being so many different sizes in a day is very confusing.’ ‘It isn’t,’ said the Caterpillar. ‘Well, perhaps you haven’t found it so yet,’ said Alice; ‘but when you have to turn into a chrysalis—you will some day, you know—and then after that into a butterfly, I should think you’ll feel it a little queer, won’t you?’ ‘Not a bit,’ said the Caterpillar. ‘Well, perhaps your feelings may be different,’ said Alice; ‘all I know is, it would feel very queer to me.’ ‘You!’ said the Caterpillar contemptuously. ‘Who are you?’ Which brought them back again to the beginning of the conversation. Alice felt a little irritated at the Caterpillar’s making such very short remarks, and she drew herself up and said, very gravely, ‘I think, you ought to tell me who you are, first.’ ‘Why?’ said the Caterpillar. Here was another puzzling question; and as Alice could not think of any good reason, and as the Caterpillar seemed to be in a very unpleasant state of mind, she turned away. ‘Come back!’ the Caterpillar called after her. ‘I’ve something important to say!’ This sounded promising, certainly: Alice turned and came back again. ‘Keep your temper,’ said the Caterpillar. ‘Is that all?’ said Alice, swallowing down her anger as well as she could. ‘No,’ said the Caterpillar. Alice thought she might as well wait, as she had nothing else to do, and perhaps after all it might tell her something worth hearing. For some minutes it puffed away without speaking, but at last it unfolded its arms, took the hookah out of its mouth again, and said, ‘So you think you’re changed, do you?’ ‘I’m afraid I am, sir,’ said Alice; ‘I can’t remember things as I used—and I don’t keep the same size for ten minutes together!’ ‘Can’t remember what things?’ said the Caterpillar. ‘Well, I’ve tried to say “How doth the little busy bee,” but it all came different!’ Alice replied in a very melancholy voice. ‘Repeat, “You are old, Father William,”’ said the Caterpillar. Alice folded her hands, and began:— ‘You are old, Father William,’ the young man said, ‘And your hair has become very white; And yet you incessantly stand on your head— Do you think, at your age, it is right?’ ‘In my youth,’ Father William replied to his son, ‘I feared it might injure the brain; But, now that I’m perfectly sure I have none, Why, I do it again and again.’ ‘You are old,’ said the youth, ‘as I mentioned before, And have grown most uncommonly fat; Yet you turned a back-somersault in at the door— Pray, what is the reason of that?’ ‘In my youth,’ said the sage, as he shook his grey locks, ‘I kept all my limbs very supple By the use of this ointment—one shilling the box— Allow me to sell you a couple?’ ‘You are old,’ said the youth, ‘and your jaws are too weak For anything tougher than suet; Yet you finished the goose, with the bones and the beak— Pray how did you manage to do it?’ ‘In my youth,’ said his father, ‘I took to the law, And argued each case with my wife; And the muscular strength, which it gave to my jaw, Has lasted the rest of my life.’ ‘You are old,’ said the youth, ‘one would hardly suppose That your eye was as steady as ever; Yet you balanced an eel on the end of your nose— What made you so awfully clever?’ ‘I have answered three questions, and that is enough,’ Said his father; ‘don’t give yourself airs! Do you think I can listen all day to such stuff? Be off, or I’ll kick you down stairs!’ ‘That is not said right,’ said the Caterpillar. ‘Not quite right, I’m afraid,’ said Alice, timidly; ‘some of the words have got altered.’ ‘It is wrong from beginning to end,’ said the Caterpillar decidedly, and there was silence for some minutes. The Caterpillar was the first to speak. ‘What size do you want to be?’ it asked. ‘Oh, I’m not particular as to size,’ Alice hastily replied; ‘only one doesn’t like changing so often, you know.’ ‘I don’t know,’ said the Caterpillar. Alice said nothing: she had never been so much contradicted in her life before, and she felt that she was losing her temper. ‘Are you content now?’ said the Caterpillar. ‘Well, I should like to be a little larger, sir, if you wouldn’t mind,’ said Alice: ‘three inches is such a wretched height to be.’ ‘It is a very good height indeed!’ said the Caterpillar angrily, rearing itself upright as it spoke (it was exactly three inches high). ‘But I’m not used to it!’ pleaded poor Alice in a piteous tone. And she thought of herself, ‘I wish the creatures wouldn’t be so easily offended!’ ‘You’ll get used to it in time,’ said the Caterpillar; and it put the hookah into its mouth and began smoking again. This time Alice waited patiently until it chose to speak again. In a minute or two the Caterpillar took the hookah out of its mouth and yawned once or twice, and shook itself. Then it got down off the mushroom, and crawled away in the grass, merely remarking as it went, ‘One side will make you grow taller, and the other side will make you grow shorter.’ ‘One side of what? The other side of what?’ thought Alice to herself. ‘Of the mushroom,’ said the Caterpillar, just as if she had asked it aloud; and in another moment it was out of sight. Alice remained looking thoughtfully at the mushroom for a minute, trying to make out which were the two sides of it; and as it was perfectly round, she found this a very difficult question. However, at last she stretched her arms round it as far as they would go, and broke off a bit of the edge with each hand. ‘And now which is which?’ she said to herself, and nibbled a little of the right-hand bit to try the effect: the next moment she felt a violent blow underneath her chin: it had struck her foot! She was a good deal frightened by this very sudden change, but she felt that there was no time to be lost, as she was shrinking rapidly; so she set to work at once to eat some of the other bit. Her chin was pressed so closely against her foot, that there was hardly room to open her mouth; but she did it at last, and managed to swallow a morsel of the lefthand bit. * * * * * * * * * * * * * * * * * * * * ‘Come, my head’s free at last!’ said Alice in a tone of delight, which changed into alarm in another moment, when she found that her shoulders were nowhere to be found: all she could see, when she looked down, was an immense length of neck, which seemed to rise like a stalk out of a sea of green leaves that lay far below her. ‘What can all that green stuff be?’ said Alice. ‘And where have my shoulders got to? And oh, my poor hands, how is it I can’t see you?’ She was moving them about as she spoke, but no result seemed to follow, except a little shaking among the distant green leaves. As there seemed to be no chance of getting her hands up to her head, she tried to get her head down to them, and was delighted to find that her neck would bend about easily in any direction, like a serpent. She had just succeeded in curving it down into a graceful zigzag, and was going to dive in among the leaves, which she found to be nothing but the tops of the trees under which she had been wandering, when a sharp hiss made her draw back in a hurry: a large pigeon had flown into her face, and was beating her violently with its wings. ‘Serpent!’ screamed the Pigeon. ‘I’m not a serpent!’ said Alice indignantly. ‘Let me alone!’ ‘Serpent, I say again!’ repeated the Pigeon, but in a more subdued tone, and added with a kind of sob, ‘I’ve tried every way, and nothing seems to suit them!’ ‘I haven’t the least idea what you’re talking about,’ said Alice. ‘I’ve tried the roots of trees, and I’ve tried banks, and I’ve tried hedges,’ the Pigeon went on, without attending to her; ‘but those serpents! There’s no pleasing them!’ Alice was more and more puzzled, but she thought there was no use in saying anything more till the Pigeon had finished. ‘As if it wasn’t trouble enough hatching the eggs,’ said the Pigeon; ‘but I must be on the look-out for serpents night and day! Why, I haven’t had a wink of sleep these three weeks!’ ‘I’m very sorry you’ve been annoyed,’ said Alice, who was beginning to see its meaning. ‘And just as I’d taken the highest tree in the wood,’ continued the Pigeon, raising its voice to a shriek, ‘and just as I was thinking I should be free of them at last, they must needs come wriggling down from the sky! Ugh, Serpent!’ ‘But I’m not a serpent, I tell you!’ said Alice. ‘I’m a—I’m a—’ ‘Well! What are you?’ said the Pigeon. ‘I can see you’re trying to invent something!’ ‘I—I’m a little girl,’ said Alice, rather doubtfully, as she remembered the number of changes she had gone through that day. ‘A likely story indeed!’ said the Pigeon in a tone of the deepest contempt. ‘I’ve seen a good many little girls in my time, but never one with such a neck as that! No, no! You’re a serpent; and there’s no use denying it. I suppose you’ll be telling me next that you never tasted an egg!’ ‘I have tasted eggs, certainly,’ said Alice, who was a very truthful child; ‘but little girls eat eggs quite as much as serpents do, you know.’ ‘I don’t believe it,’ said the Pigeon; ‘but if they do, why then they’re a kind of serpent, that’s all I can say.’ This was such a new idea to Alice, that she was quite silent for a minute or two, which gave the Pigeon the opportunity of adding, ‘You’re looking for eggs, I know that well enough; and what does it matter to me whether you’re a little girl or a serpent?’ ‘It matters a good deal to me,’ said Alice hastily; ‘but I’m not looking for eggs, as it happens; and if I was, I shouldn’t want yours: I don’t like them raw.’ ‘Well, be off, then!’ said the Pigeon in a sulky tone, as it settled down again into its nest. Alice crouched down among the trees as well as she could, for her neck kept getting entangled among the branches, and every now and then she had to stop and untwist it. After a while she remembered that she still held the pieces of mushroom in her hands, and she set to work very carefully, nibbling first at one and then at the other, and growing sometimes taller and sometimes shorter, until she had succeeded in bringing herself down to her usual height. It was so long since she had been anything near the right size, that it felt quite strange at first; but she got used to it in a few minutes, and began talking to herself, as usual. ‘Come, there’s half my plan done now! How puzzling all these changes are! I’m never sure what I’m going to be, from one minute to another! However, I’ve got back to my right size: the next thing is, to get into that beautiful garden—how is that to be done, I wonder?’ As she said this, she came suddenly upon an open place, with a little house in it about four feet high. ‘Whoever lives there,’ thought Alice, ‘it’ll never do to come upon them this size: why, I should frighten them out of their wits!’ So she began nibbling at the righthand bit again, and did not venture to go near the house till she had brought herself down to nine inches high. ` ================================================ FILE: go/spec/absolute_path.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package spec import ( "errors" "fmt" "regexp" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" ) var datasetCapturePrefixRe = regexp.MustCompile("^(" + datas.DatasetRe.String() + ")") // AbsolutePath describes the location of a Value within a Noms database. // // To locate a value relative to some other value, see Path. To locate a value // globally, see Spec. // // For more on paths, absolute paths, and specs, see: // https://github.com/attic-labs/noms/blob/master/doc/spelling.md. type AbsolutePath struct { // Dataset is the dataset this AbsolutePath is rooted at. Only one of // Dataset and Hash should be set. Dataset string // Hash is the hash this AbsolutePath is rooted at. Only one of Dataset and // Hash should be set. Hash hash.Hash // Path is the relative path from Dataset or Hash. This can be empty. In // that case, the AbsolutePath describes the value at either Dataset or // Hash. Path types.Path } // NewAbsolutePath attempts to parse 'str' and return an AbsolutePath. func NewAbsolutePath(str string) (AbsolutePath, error) { if len(str) == 0 { return AbsolutePath{}, errors.New("Empty path") } var h hash.Hash var dataset string var pathStr string if str[0] == '#' { tail := str[1:] if len(tail) < hash.StringLen { return AbsolutePath{}, errors.New("Invalid hash: " + tail) } hashStr := tail[:hash.StringLen] if h2, ok := hash.MaybeParse(hashStr); ok { h = h2 } else { return AbsolutePath{}, errors.New("Invalid hash: " + hashStr) } pathStr = tail[hash.StringLen:] } else { datasetParts := datasetCapturePrefixRe.FindStringSubmatch(str) if datasetParts == nil { return AbsolutePath{}, fmt.Errorf("Invalid dataset name: %s", str) } dataset = datasetParts[1] pathStr = str[len(dataset):] } if len(pathStr) == 0 { return AbsolutePath{Hash: h, Dataset: dataset}, nil } path, err := types.ParsePath(pathStr) if err != nil { return AbsolutePath{}, err } return AbsolutePath{Hash: h, Dataset: dataset, Path: path}, nil } // Resolve returns the Value reachable by 'p' in 'db'. func (p AbsolutePath) Resolve(db datas.Database) (val types.Value) { if len(p.Dataset) > 0 { var ok bool ds := db.GetDataset(p.Dataset) if val, ok = ds.MaybeHead(); !ok { val = nil } } else if !p.Hash.IsEmpty() { val = db.ReadValue(p.Hash) } else { panic("Unreachable") } if val != nil && p.Path != nil { val = p.Path.Resolve(val, db) } return } func (p AbsolutePath) IsEmpty() bool { return p.Dataset == "" && p.Hash.IsEmpty() } func (p AbsolutePath) String() (str string) { if p.IsEmpty() { return "" } if len(p.Dataset) > 0 { str = p.Dataset } else if !p.Hash.IsEmpty() { str = "#" + p.Hash.String() } else { panic("Unreachable") } return str + p.Path.String() } // ReadAbsolutePaths attempts to parse each path in 'paths' and resolve them. // If any path fails to parse correctly or if any path can be resolved to an // existing Noms Value, then this function returns (nil, error). func ReadAbsolutePaths(db datas.Database, paths ...string) ([]types.Value, error) { r := make([]types.Value, 0, len(paths)) for _, ps := range paths { p, err := NewAbsolutePath(ps) if err != nil { return nil, fmt.Errorf("Invalid input path '%s'", ps) } v := p.Resolve(db) if v == nil { return nil, fmt.Errorf("Input path '%s' does not exist in database", ps) } r = append(r, v) } return r, nil } ================================================ FILE: go/spec/absolute_path_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package spec import ( "fmt" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestAbsolutePathToAndFromString(t *testing.T) { assert := assert.New(t) test := func(str string) { p, err := NewAbsolutePath(str) assert.NoError(err) assert.Equal(str, p.String()) } h := types.Number(42).Hash() // arbitrary hash test(fmt.Sprintf("foo.bar[#%s]", h.String())) test(fmt.Sprintf("#%s.bar[42]", h.String())) } func TestAbsolutePaths(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) s0, s1 := types.String("foo"), types.String("bar") list := types.NewList(db, s0, s1) emptySet := types.NewSet(db) db.WriteValue(s0) db.WriteValue(s1) db.WriteValue(list) db.WriteValue(emptySet) var err error ds := db.GetDataset("ds") ds, err = db.CommitValue(ds, list) assert.NoError(err) head := ds.Head() resolvesTo := func(exp types.Value, str string) { p, err := NewAbsolutePath(str) assert.NoError(err) act := p.Resolve(db) if exp == nil { assert.Nil(act) } else { assert.True(exp.Equals(act), "%s Expected %s Actual %s", str, types.EncodedValue(exp), types.EncodedValue(act)) } } resolvesTo(head, "ds") resolvesTo(emptySet, "ds.parents") resolvesTo(list, "ds.value") resolvesTo(s0, "ds.value[0]") resolvesTo(s1, "ds.value[1]") resolvesTo(head, "#"+head.Hash().String()) resolvesTo(list, "#"+list.Hash().String()) resolvesTo(s0, "#"+s0.Hash().String()) resolvesTo(s1, "#"+s1.Hash().String()) resolvesTo(s0, "#"+list.Hash().String()+"[0]") resolvesTo(s1, "#"+list.Hash().String()+"[1]") resolvesTo(nil, "foo") resolvesTo(nil, "foo.parents") resolvesTo(nil, "foo.value") resolvesTo(nil, "foo.value[0]") resolvesTo(nil, "#"+types.String("baz").Hash().String()) resolvesTo(nil, "#"+types.String("baz").Hash().String()+"[0]") } func TestReadAbsolutePaths(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) s0, s1 := types.String("foo"), types.String("bar") list := types.NewList(db, s0, s1) ds := db.GetDataset("ds") _, err := db.CommitValue(ds, list) assert.NoError(err) vals, err := ReadAbsolutePaths(db, "ds.value[0]", "ds.value[1]") assert.NoError(err) assert.Equal(2, len(vals)) assert.Equal("foo", string(vals[0].(types.String))) assert.Equal("bar", string(vals[1].(types.String))) vals, err = ReadAbsolutePaths(db, "!!#") assert.Nil(vals) assert.Equal("Invalid input path '!!#'", err.Error()) vals, err = ReadAbsolutePaths(db, "invalid.monkey") assert.Nil(vals) assert.Equal("Input path 'invalid.monkey' does not exist in database", err.Error()) } func TestAbsolutePathParseErrors(t *testing.T) { assert := assert.New(t) test := func(path, errMsg string) { p, err := NewAbsolutePath(path) assert.Equal(AbsolutePath{}, p) assert.Error(err) assert.Equal(errMsg, err.Error()) } test("", "Empty path") test(".foo", "Invalid dataset name: .foo") test(".foo.bar.baz", "Invalid dataset name: .foo.bar.baz") test("#", "Invalid hash: ") test("#abc", "Invalid hash: abc") invHash := strings.Repeat("z", hash.StringLen) test("#"+invHash, "Invalid hash: "+invHash) } ================================================ FILE: go/spec/commit_meta.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package spec import ( "errors" "fmt" "strings" "time" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" ) const CommitMetaDateFormat = time.RFC3339 var ( commitMetaDate string commitMetaMessage string commitMetaKeyValueStrings string commitMetaKeyValuePaths string ) // CreateCommitMetaStruct creates and returns a Noms struct suitable for use in CommitOptions.Meta. // It returns types.EmptyStruct and an error if any issues are encountered. // Database is used only if commitMetaKeyValuePaths are provided on the command line and values need to be resolved. // Date should be ISO 8601 format (see CommitMetaDateFormat), if empty the current date is used. // The values passed as command line arguments (if any) are merged with the values provided as function arguments. func CreateCommitMetaStruct(db datas.Database, date, message string, keyValueStrings map[string]string, keyValuePaths map[string]types.Value) (types.Struct, error) { metaValues := types.StructData{} resolvePathFunc := func(path string) (types.Value, error) { absPath, err := NewAbsolutePath(path) if err != nil { return nil, errors.New(fmt.Sprintf("Bad path for meta-p: %s", path)) } return absPath.Resolve(db), nil } parseMetaStrings := func(param string, resolveAsPaths bool) error { if param == "" { return nil } ms := strings.Split(param, ",") for _, m := range ms { kv := strings.Split(m, "=") if len(kv) != 2 { return errors.New(fmt.Sprintf("Unable to parse meta value: %s", m)) } if !types.IsValidStructFieldName(kv[0]) { return errors.New(fmt.Sprintf("Invalid meta key: %s", kv[0])) } if resolveAsPaths { v, err := resolvePathFunc(kv[1]) if err != nil { return err } metaValues[kv[0]] = v } else { metaValues[kv[0]] = types.String(kv[1]) } } return nil } if err := parseMetaStrings(commitMetaKeyValueStrings, false); err != nil { return types.EmptyStruct, err } if err := parseMetaStrings(commitMetaKeyValuePaths, true); err != nil { return types.EmptyStruct, err } for k, v := range keyValueStrings { if !types.IsValidStructFieldName(k) { return types.EmptyStruct, errors.New(fmt.Sprintf("Invalid meta key: %s", k)) } metaValues[k] = types.String(v) } for k, v := range keyValuePaths { if !types.IsValidStructFieldName(k) { return types.EmptyStruct, errors.New(fmt.Sprintf("Invalid meta key: %s", k)) } metaValues[k] = v } if date == "" { date = commitMetaDate } if date == "" { date = time.Now().UTC().Format(CommitMetaDateFormat) } else { _, err := time.Parse(CommitMetaDateFormat, date) if err != nil { return types.EmptyStruct, errors.New(fmt.Sprintf("Unable to parse date: %s, error: %s", date, err)) } } metaValues["date"] = types.String(date) if message != "" { metaValues["message"] = types.String(message) } else if commitMetaMessage != "" { metaValues["message"] = types.String(commitMetaMessage) } return types.NewStruct("Meta", metaValues), nil } ================================================ FILE: go/spec/commit_meta_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package spec import ( "fmt" "strings" "testing" "time" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func isEmptyStruct(s types.Struct) bool { return s.Equals(types.EmptyStruct) } func TestCreateCommitMetaStructBasic(t *testing.T) { assert := assert.New(t) meta, err := CreateCommitMetaStruct(nil, "", "", nil, nil) assert.NoError(err) assert.False(isEmptyStruct(meta)) assert.Equal("Struct Meta {\n date: String,\n}", types.TypeOf(meta).Describe()) } func TestCreateCommitMetaStructFromFlags(t *testing.T) { assert := assert.New(t) setCommitMetaFlags(time.Now().UTC().Format(CommitMetaDateFormat), "this is a message", "k1=v1,k2=v2,k3=v3") defer resetCommitMetaFlags() meta, err := CreateCommitMetaStruct(nil, "", "", nil, nil) assert.NoError(err) assert.Equal("Struct Meta {\n date: String,\n k1: String,\n k2: String,\n k3: String,\n message: String,\n}", types.TypeOf(meta).Describe()) assert.Equal(types.String(commitMetaDate), meta.Get("date")) assert.Equal(types.String(commitMetaMessage), meta.Get("message")) assert.Equal(types.String("v1"), meta.Get("k1")) assert.Equal(types.String("v2"), meta.Get("k2")) assert.Equal(types.String("v3"), meta.Get("k3")) } func TestCreateCommitMetaStructFromArgs(t *testing.T) { assert := assert.New(t) dateArg := time.Now().UTC().Format(CommitMetaDateFormat) messageArg := "this is a message" keyValueArg := map[string]string{"k1": "v1", "k2": "v2", "k3": "v3"} meta, err := CreateCommitMetaStruct(nil, dateArg, messageArg, keyValueArg, nil) assert.NoError(err) assert.Equal("Struct Meta {\n date: String,\n k1: String,\n k2: String,\n k3: String,\n message: String,\n}", types.TypeOf(meta).Describe()) assert.Equal(types.String(dateArg), meta.Get("date")) assert.Equal(types.String(messageArg), meta.Get("message")) assert.Equal(types.String("v1"), meta.Get("k1")) assert.Equal(types.String("v2"), meta.Get("k2")) assert.Equal(types.String("v3"), meta.Get("k3")) } func TestCreateCommitMetaStructFromFlagsAndArgs(t *testing.T) { assert := assert.New(t) setCommitMetaFlags(time.Now().UTC().Format(CommitMetaDateFormat), "this is a message", "k1=v1p1,k2=v2p2,k4=v4p4") defer resetCommitMetaFlags() dateArg := time.Now().UTC().Add(time.Hour * -24).Format(CommitMetaDateFormat) messageArg := "this is a message" keyValueArg := map[string]string{"k1": "v1", "k2": "v2", "k3": "v3"} // args passed in should win over the ones in the flags meta, err := CreateCommitMetaStruct(nil, dateArg, messageArg, keyValueArg, nil) assert.NoError(err) assert.Equal("Struct Meta {\n date: String,\n k1: String,\n k2: String,\n k3: String,\n k4: String,\n message: String,\n}", types.TypeOf(meta).Describe()) assert.Equal(types.String(dateArg), meta.Get("date")) assert.Equal(types.String(messageArg), meta.Get("message")) assert.Equal(types.String("v1"), meta.Get("k1")) assert.Equal(types.String("v2"), meta.Get("k2")) assert.Equal(types.String("v3"), meta.Get("k3")) assert.Equal(types.String("v4p4"), meta.Get("k4")) } func TestCreateCommitMetaStructBadDate(t *testing.T) { assert := assert.New(t) testBadDates := func(cliDateString, argDateString string) { setCommitMetaFlags(cliDateString, "", "") defer resetCommitMetaFlags() meta, err := CreateCommitMetaStruct(nil, argDateString, "", nil, nil) assert.Error(err) assert.True(strings.HasPrefix(err.Error(), "Unable to parse date: ")) assert.True(isEmptyStruct(meta)) } testBadDateMultipleWays := func(dateString string) { testBadDates(dateString, "") testBadDates("", dateString) testBadDates(dateString, dateString) } testBadDateMultipleWays(time.Now().UTC().Format("Jan _2 15:04:05 2006")) testBadDateMultipleWays(time.Now().UTC().Format("Mon Jan _2 15:04:05 2006")) testBadDateMultipleWays(time.Now().UTC().Format("2006-01-02T15:04:05")) } func TestCreateCommitMetaStructBadMetaStrings(t *testing.T) { assert := assert.New(t) testBadMetaSeparator := func(k, v, sep string) { setCommitMetaFlags("", "", fmt.Sprintf("%s%s%s", k, sep, v)) defer resetCommitMetaFlags() meta, err := CreateCommitMetaStruct(nil, "", "", nil, nil) assert.Error(err) assert.True(strings.HasPrefix(err.Error(), "Unable to parse meta value: ")) assert.True(isEmptyStruct(meta)) } testBadMetaKeys := func(k, v string) { testBadMetaSeparator(k, v, ":") testBadMetaSeparator(k, v, "-") setCommitMetaFlags("", "", fmt.Sprintf("%s=%s", k, v)) meta, err := CreateCommitMetaStruct(nil, "", "", nil, nil) assert.Error(err) assert.True(strings.HasPrefix(err.Error(), "Invalid meta key: ")) assert.True(isEmptyStruct(meta)) resetCommitMetaFlags() metaValues := map[string]string{k: v} meta, err = CreateCommitMetaStruct(nil, "", "", metaValues, nil) assert.Error(err) assert.True(strings.HasPrefix(err.Error(), "Invalid meta key: ")) assert.True(isEmptyStruct(meta)) } // Valid names must start with `a-zA-Z` and after that `a-zA-Z0-9_`. testBadMetaKeys("_name", "value") testBadMetaKeys("99problems", "now 100") testBadMetaKeys("one-hundred-bottles", "take one down") testBadMetaKeys("👀", "who watches the watchers?") testBadMetaKeys("key:", "value") } func setCommitMetaFlags(date, message, kvStrings string) { commitMetaDate = date commitMetaMessage = message commitMetaKeyValueStrings = kvStrings } func resetCommitMetaFlags() { setCommitMetaFlags("", "", "") } ================================================ FILE: go/spec/spec.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package spec provides builders and parsers for spelling Noms databases, // datasets and values. package spec import ( "errors" "fmt" "net/url" "os" "regexp" "strings" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/nbs" "github.com/attic-labs/noms/go/types" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/dynamodb" "github.com/aws/aws-sdk-go/service/s3" ) const Separator = "::" var datasetRe = regexp.MustCompile("^" + datas.DatasetRe.String() + "$") var GetAWSSession func() *session.Session = func() *session.Session { return session.Must(session.NewSession(aws.NewConfig().WithRegion("us-west-2"))) } type ProtocolImpl interface { NewChunkStore(sp Spec) (chunks.ChunkStore, error) NewDatabase(sp Spec) (datas.Database, error) } var ExternalProtocols = map[string]ProtocolImpl{} // SpecOptions customize Spec behavior. type SpecOptions struct { // Authorization token for requests. For example, if the database is HTTP // this will used for an `Authorization: Bearer ${authorization}` header. Authorization string } // Spec locates a Noms database, dataset, or value globally. Spec caches // its database instance so it therefore does not reflect new commits in // the db, by (legacy) design. type Spec struct { // Protocol is one of "mem", "ldb", "http", or "https". Protocol string // DatabaseName is the name of the Spec's database, which is the string after // "protocol:". http/https specs include their leading "//" characters. DatabaseName string // Options are the SpecOptions that the Spec was constructed with. Options SpecOptions // Path is nil unless the spec was created with ForPath. Path AbsolutePath // db is lazily created, so it needs to be a pointer to a Database. db *datas.Database } func newSpec(dbSpec string, opts SpecOptions) (Spec, error) { protocol, dbName, err := parseDatabaseSpec(dbSpec) if err != nil { return Spec{}, err } return Spec{ Protocol: protocol, DatabaseName: dbName, Options: opts, db: new(datas.Database), }, nil } // ForDatabase parses a spec for a Database. func ForDatabase(spec string) (Spec, error) { return ForDatabaseOpts(spec, SpecOptions{}) } // ForDatabaseOpts parses a spec for a Database. func ForDatabaseOpts(spec string, opts SpecOptions) (Spec, error) { return newSpec(spec, opts) } // ForDataset parses a spec for a Dataset. func ForDataset(spec string) (Spec, error) { return ForDatasetOpts(spec, SpecOptions{}) } // ForDatasetOpts parses a spec for a Dataset. func ForDatasetOpts(spec string, opts SpecOptions) (Spec, error) { dbSpec, pathStr, err := splitDatabaseSpec(spec) if err != nil { return Spec{}, err } sp, err := newSpec(dbSpec, opts) if err != nil { return Spec{}, err } path, err := NewAbsolutePath(pathStr) if err != nil { return Spec{}, err } if path.Dataset == "" { return Spec{}, errors.New("dataset name required for dataset spec") } if !path.Path.IsEmpty() { return Spec{}, errors.New("path is not allowed for dataset spec") } sp.Path = path return sp, nil } // ForPath parses a spec for a path to a Value. func ForPath(spec string) (Spec, error) { return ForPathOpts(spec, SpecOptions{}) } // ForPathOpts parses a spec for a path to a Value. func ForPathOpts(spec string, opts SpecOptions) (Spec, error) { dbSpec, pathStr, err := splitDatabaseSpec(spec) if err != nil { return Spec{}, err } var path AbsolutePath if pathStr != "" { path, err = NewAbsolutePath(pathStr) if err != nil { return Spec{}, err } } sp, err := newSpec(dbSpec, opts) if err != nil { return Spec{}, err } sp.Path = path return sp, nil } func (sp Spec) String() string { s := sp.Protocol if s != "mem" { s += ":" + sp.DatabaseName } p := sp.Path.String() if p != "" { s += Separator + p } return s } // GetDatabase returns the Database instance that this Spec's DatabaseName // describes. The same Database instance is returned every time, unless Close // is called. If the Spec is closed, it is re-opened with a new Database. func (sp Spec) GetDatabase() datas.Database { if *sp.db == nil { *sp.db = sp.createDatabase() } return *sp.db } // GetDataset returns the current Dataset instance for this Spec's Database. // GetDataset is live, so if Commit is called on this Spec's Database later, a // new up-to-date Dataset will returned on the next call to GetDataset. If // this is not a Dataset spec, returns nil. func (sp Spec) GetDataset() (ds datas.Dataset) { if sp.Path.Dataset != "" { ds = sp.GetDatabase().GetDataset(sp.Path.Dataset) } return } // GetValue returns the Value at this Spec's Path within its Database, or nil // if this isn't a Path Spec or if that path isn't found. func (sp Spec) GetValue() (val types.Value) { if !sp.Path.IsEmpty() { val = sp.Path.Resolve(sp.GetDatabase()) } return } // Href treats the Protocol and DatabaseName as a URL, and returns its href. // For example, the spec http://example.com/path::ds returns // "http://example.com/path". If the Protocol is not "http" or "http", returns // an empty string. func (sp Spec) Href() string { switch proto := sp.Protocol; proto { case "http", "https", "aws": return proto + ":" + sp.DatabaseName default: return "" } } // Pin returns a Spec in which the dataset component, if any, has been replaced // with the hash of the HEAD of that dataset. This "pins" the path to the state // of the database at the current moment in time. Returns itself if the // PathSpec is already "pinned". func (sp Spec) Pin() (Spec, bool) { var ds datas.Dataset if !sp.Path.IsEmpty() { if !sp.Path.Hash.IsEmpty() { // Spec is already pinned. return sp, true } ds = sp.GetDatabase().GetDataset(sp.Path.Dataset) } else { ds = sp.GetDataset() } commit, ok := ds.MaybeHead() if !ok { return Spec{}, false } r := sp r.Path.Hash = commit.Hash() r.Path.Dataset = "" return r, true } func (sp Spec) Close() error { db := *sp.db if db == nil { return nil } *sp.db = nil return db.Close() } func (sp Spec) createDatabase() datas.Database { switch sp.Protocol { case "http", "https", "aws", "nbs", "mem": return datas.NewDatabase(sp.NewChunkStore()) default: impl, ok := ExternalProtocols[sp.Protocol] if !ok { d.PanicIfError(fmt.Errorf("Unknown protocol: %s", sp.Protocol)) } r, err := impl.NewDatabase(sp) d.PanicIfError(err) return r } } // NewChunkStore returns a new ChunkStore instance that this Spec's // DatabaseName describes. It's unusual to call this method, GetDatabase is // more useful. func (sp Spec) NewChunkStore() chunks.ChunkStore { switch sp.Protocol { case "http", "https": return datas.NewHTTPChunkStore(sp.Href(), sp.Options.Authorization) case "aws": parts := strings.SplitN(sp.DatabaseName, "/", 3) // table/bucket/ns d.PanicIfFalse(len(parts) >= 3) // parse should have ensured this was true sess := GetAWSSession() return nbs.NewAWSStore(parts[0], parts[2], parts[1], s3.New(sess), dynamodb.New(sess), 1<<28) case "nbs": os.MkdirAll(sp.DatabaseName, 0777) return nbs.NewLocalStore(sp.DatabaseName, 1<<28) case "mem": storage := &chunks.MemoryStorage{} return storage.NewView() default: impl, ok := ExternalProtocols[sp.Protocol] if !ok { d.PanicIfError(fmt.Errorf("Unknown protocol: %s", sp.Protocol)) } r, err := impl.NewChunkStore(sp) d.PanicIfError(err) return r } } func parseDatabaseSpec(spec string) (protocol, name string, err error) { if len(spec) == 0 { err = fmt.Errorf("Empty spec") return } parts := strings.SplitN(spec, ":", 2) // [protocol] [, path]? // If there was no ":" then this is either a mem spec, or a filesystem path. // This is ambiguous if the file system path is "mem" but that just means the // path needs to be explicitly "nbs:mem". if len(parts) == 1 { if spec == "mem" { protocol = "mem" } else { protocol, name = "nbs", spec } return } if _, ok := ExternalProtocols[parts[0]]; ok { fmt.Println("found external spec", parts[0]) protocol, name = parts[0], parts[1] return } switch parts[0] { case "nbs": protocol, name = parts[0], parts[1] case "aws": p, n := parts[0], parts[1] pattern := regexp.MustCompile("^[^/]+/[^/]+/.*$") if !pattern.MatchString(n) { err = errors.New("aws spec must match pattern aws:" + pattern.String()) } protocol, name = p, n return case "http", "https": u, perr := url.Parse(spec) if perr != nil { err = perr } else if u.Host == "" { err = fmt.Errorf("%s has empty host", spec) } else { protocol, name = parts[0], parts[1] } case "mem": err = fmt.Errorf(`In-memory database must be specified as "mem", not "mem:"`) default: err = fmt.Errorf("Invalid database protocol %s in %s", protocol, spec) } return } func splitDatabaseSpec(spec string) (string, string, error) { lastIdx := strings.LastIndex(spec, Separator) if lastIdx == -1 { return "", "", fmt.Errorf("Missing %s after database in %s", Separator, spec) } return spec[:lastIdx], spec[lastIdx+len(Separator):], nil } ================================================ FILE: go/spec/spec_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package spec import ( "fmt" "io/ioutil" "net/http" "net/http/httptest" "os" "path" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/nbs" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestMemDatabaseSpec(t *testing.T) { assert := assert.New(t) spec, err := ForDatabase("mem") assert.NoError(err) defer spec.Close() assert.Equal("mem", spec.Protocol) assert.Equal("", spec.DatabaseName) assert.True(spec.Path.IsEmpty()) s := types.String("hello") db := spec.GetDatabase() db.WriteValue(s) assert.Equal(s, db.ReadValue(s.Hash())) } func TestMemDatasetSpec(t *testing.T) { assert := assert.New(t) spec, err := ForDataset("mem::test") assert.NoError(err) defer spec.Close() assert.Equal("mem", spec.Protocol) assert.Equal("", spec.DatabaseName) assert.Equal("test", spec.Path.Dataset) assert.True(spec.Path.Path.IsEmpty()) ds := spec.GetDataset() _, ok := spec.GetDataset().MaybeHeadValue() assert.False(ok) s := types.String("hello") ds, err = spec.GetDatabase().CommitValue(ds, s) assert.NoError(err) assert.Equal(s, ds.HeadValue()) } func TestMemHashPathSpec(t *testing.T) { assert := assert.New(t) s := types.String("hello") spec, err := ForPath("mem::#" + s.Hash().String()) assert.NoError(err) defer spec.Close() assert.Equal("mem", spec.Protocol) assert.Equal("", spec.DatabaseName) assert.False(spec.Path.IsEmpty()) // This is a reasonable check but it causes the next GetValue to return nil: // assert.Nil(spec.GetValue()) spec.GetDatabase().WriteValue(s) assert.Equal(s, spec.GetValue()) } func TestMemDatasetPathSpec(t *testing.T) { assert := assert.New(t) spec, err := ForPath("mem::test.value[0]") assert.NoError(err) defer spec.Close() assert.Equal("mem", spec.Protocol) assert.Equal("", spec.DatabaseName) assert.False(spec.Path.IsEmpty()) assert.Nil(spec.GetValue()) db := spec.GetDatabase() ds := db.GetDataset("test") _, err = db.CommitValue(ds, types.NewList(db, types.Number(42))) assert.NoError(err) assert.Equal(types.Number(42), spec.GetValue()) } func TestNBSDatabaseSpec(t *testing.T) { assert := assert.New(t) run := func(prefix string) { tmpDir, err := ioutil.TempDir("", "spec_test") assert.NoError(err) defer os.RemoveAll(tmpDir) s := types.String("string") // Existing database in the database are read from the spec. store1 := path.Join(tmpDir, "store1") os.Mkdir(store1, 0777) func() { db := datas.NewDatabase(nbs.NewLocalStore(store1, 8*(1<<20))) defer db.Close() r := db.WriteValue(s) _, err = db.CommitValue(db.GetDataset("datasetID"), r) assert.NoError(err) }() spec1, err := ForDatabase(prefix + store1) assert.NoError(err) defer spec1.Close() assert.Equal("nbs", spec1.Protocol) assert.Equal(store1, spec1.DatabaseName) assert.Equal(s, spec1.GetDatabase().ReadValue(s.Hash())) // New databases can be created and read/written from. store2 := path.Join(tmpDir, "store2") os.Mkdir(store2, 0777) spec2, err := ForDatabase(prefix + store2) assert.NoError(err) defer spec2.Close() assert.Equal("nbs", spec2.Protocol) assert.Equal(store2, spec2.DatabaseName) db := spec2.GetDatabase() db.WriteValue(s) r := db.WriteValue(s) _, err = db.CommitValue(db.GetDataset("datasetID"), r) assert.NoError(err) assert.Equal(s, db.ReadValue(s.Hash())) } run("") run("nbs:") } // Skip LDB dataset and path tests: the database behaviour is tested in // TestLDBDatabaseSpec, TestMemDatasetSpec/TestMem*PathSpec cover general // dataset/path behaviour, and ForDataset/ForPath test LDB parsing. func TestCloseSpecWithoutOpen(t *testing.T) { s, err := ForDatabase("mem") assert.NoError(t, err) s.Close() } func TestHref(t *testing.T) { assert := assert.New(t) sp, _ := ForDatabase("http://localhost") assert.Equal("http://localhost", sp.Href()) sp, _ = ForDatabase("http://localhost/foo/bar/baz") assert.Equal("http://localhost/foo/bar/baz", sp.Href()) sp, _ = ForDatabase("https://my.example.com/foo/bar/baz") assert.Equal("https://my.example.com/foo/bar/baz", sp.Href()) sp, _ = ForDataset("https://my.example.com/foo/bar/baz::myds") assert.Equal("https://my.example.com/foo/bar/baz", sp.Href()) sp, _ = ForDataset("https://my.example.com:8080/foo/bar/baz::myds") assert.Equal("https://my.example.com:8080/foo/bar/baz", sp.Href()) sp, _ = ForPath("https://my.example.com/foo/bar/baz::myds.my.path") assert.Equal("https://my.example.com/foo/bar/baz", sp.Href()) sp, _ = ForDatabase("aws:table/bucket/ns") assert.Equal("aws:table/bucket/ns", sp.Href()) sp, _ = ForDataset("aws:table/bucket/ns::myds") assert.Equal("aws:table/bucket/ns", sp.Href()) sp, _ = ForPath("aws:table/bucket/ns::myds.my.path") assert.Equal("aws:table/bucket/ns", sp.Href()) sp, err := ForPath("mem::myds.my.path") assert.NoError(err) assert.Equal("", sp.Href()) } func TestForDatabase(t *testing.T) { assert := assert.New(t) badSpecs := []string{ "mem:stuff", "mem::", "mem:", "http:", "http://", "http://%", "https:", "https://", "https://%", "http://::192.30.252.154", "http://0:0:0:0:0:ffff:c01e:fc9a", "http://::ffff:c01e:fc9a", "http://::ffff::1e::9a", "ldb:", "random:", "random:random", "/file/ba:d", "aws:", "aws:t", "aws:t/b", "aws://table/bucket/db", } for _, spec := range badSpecs { _, err := ForDatabase(spec) assert.Error(err, spec) } tmpDir, err := ioutil.TempDir("", "spec_test") assert.NoError(err) defer os.RemoveAll(tmpDir) testCases := []struct { spec, protocol, databaseName, canonicalSpecIfAny string }{ {"http://localhost:8000", "http", "//localhost:8000", ""}, {"http://localhost:8000/fff", "http", "//localhost:8000/fff", ""}, {"https://local.attic.io/john/doe", "https", "//local.attic.io/john/doe", ""}, {"mem", "mem", "", ""}, {tmpDir, "nbs", tmpDir, "nbs:" + tmpDir}, {"nbs:" + tmpDir, "nbs", tmpDir, ""}, {"http://server.com/john/doe?access_token=jane", "http", "//server.com/john/doe?access_token=jane", ""}, {"https://server.com/john/doe/?arg=2&qp1=true&access_token=jane", "https", "//server.com/john/doe/?arg=2&qp1=true&access_token=jane", ""}, {"http://some/::/one", "http", "//some/::/one", ""}, {"http://::1", "http", "//::1", ""}, {"http://192.30.252.154", "http", "//192.30.252.154", ""}, {"aws:table/bucket/db", "aws", "table/bucket/db", ""}, {"aws:table/bucket/db/other/random/crap", "aws", "table/bucket/db/other/random/crap", ""}, } for _, tc := range testCases { spec, err := ForDatabase(tc.spec) assert.NoError(err, tc.spec) defer spec.Close() assert.Equal(tc.protocol, spec.Protocol) assert.Equal(tc.databaseName, spec.DatabaseName) assert.True(spec.Path.IsEmpty()) if tc.canonicalSpecIfAny == "" { assert.Equal(tc.spec, spec.String()) } else { assert.Equal(tc.canonicalSpecIfAny, spec.String()) } } } func TestForDataset(t *testing.T) { assert := assert.New(t) badSpecs := []string{ "mem", "mem:", "mem:::ds", "http", "http:", "http://foo", "monkey", "monkey:balls", "http:::dsname", "mem:/a/bogus/path:dsname", "http://localhost:8000/one", "http://::192.30.252.154::foo", "http://0:0:0:0:0:ffff:c01e:fc9a::foo", "http://::ffff::1e::9a::foo", "nbs:", "nbs:hello", "aws://table:bucket/db::ds", "aws:t::ds", "aws:t/b::ds", "aws://t/b/foo::ds", "mem::foo.value::ds", } for _, spec := range badSpecs { _, err := ForDataset(spec) assert.Error(err, spec) } invalidDatasetNames := []string{" ", "", "$", "#", ":", "\n", "💩"} for _, s := range invalidDatasetNames { _, err := ForDataset("mem::" + s) assert.Error(err) } validDatasetNames := []string{"a", "Z", "0", "/", "-", "_"} for _, s := range validDatasetNames { _, err := ForDataset("mem::" + s) assert.NoError(err) } tmpDir, err := ioutil.TempDir("", "spec_test") assert.NoError(err) defer os.RemoveAll(tmpDir) testCases := []struct { spec, protocol, databaseName, datasetName, canonicalSpecIfAny string }{ {"http://localhost:8000::ds1", "http", "//localhost:8000", "ds1", ""}, {"http://localhost:8000/john/doe/::ds2", "http", "//localhost:8000/john/doe/", "ds2", ""}, {"https://local.attic.io/john/doe::ds3", "https", "//local.attic.io/john/doe", "ds3", ""}, {"http://local.attic.io/john/doe::ds1", "http", "//local.attic.io/john/doe", "ds1", ""}, {"nbs:" + tmpDir + "::ds/one", "nbs", tmpDir, "ds/one", ""}, {tmpDir + "::ds/one", "nbs", tmpDir, "ds/one", "nbs:" + tmpDir + "::ds/one"}, {"http://localhost:8000/john/doe?access_token=abc::ds/one", "http", "//localhost:8000/john/doe?access_token=abc", "ds/one", ""}, {"https://localhost:8000?qp1=x&access_token=abc&qp2=y::ds/one", "https", "//localhost:8000?qp1=x&access_token=abc&qp2=y", "ds/one", ""}, {"http://192.30.252.154::foo", "http", "//192.30.252.154", "foo", ""}, {"http://::1::foo", "http", "//::1", "foo", ""}, {"aws:table/bucket/db::ds", "aws", "table/bucket/db", "ds", ""}, } for _, tc := range testCases { spec, err := ForDataset(tc.spec) assert.NoError(err, tc.spec) defer spec.Close() assert.Equal(tc.protocol, spec.Protocol) assert.Equal(tc.databaseName, spec.DatabaseName) assert.Equal(tc.datasetName, spec.Path.Dataset) if tc.canonicalSpecIfAny == "" { assert.Equal(tc.spec, spec.String()) } else { assert.Equal(tc.canonicalSpecIfAny, spec.String()) } } } func TestForPath(t *testing.T) { assert := assert.New(t) badSpecs := []string{ "mem::#", "mem::#s", "mem::#foobarbaz", "mem::#wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww", "http://::192.30.252.154::baz[42]", "http://0:0:0:0:0:ffff:c01e:fc9a::foo[42].bar", "http://::ffff:c01e:fc9a::foo.foo", "http://::ffff::1e::9a::hello[\"world\"]", "aws://table:bucket/db::foo.foo", "aws://table/db/foo.foo", } for _, bs := range badSpecs { _, err := ForPath(bs) assert.Error(err) } tmpDir, err := ioutil.TempDir("", "spec_test") assert.NoError(err) defer os.RemoveAll(tmpDir) testCases := []struct { spec, protocol, databaseName, pathString, canonicalSpecIfAny string }{ {"http://local.attic.io/john/doe::#0123456789abcdefghijklmnopqrstuv", "http", "//local.attic.io/john/doe", "#0123456789abcdefghijklmnopqrstuv", ""}, {tmpDir + "::#0123456789abcdefghijklmnopqrstuv", "nbs", tmpDir, "#0123456789abcdefghijklmnopqrstuv", "nbs:" + tmpDir + "::#0123456789abcdefghijklmnopqrstuv"}, {"nbs:" + tmpDir + "::#0123456789abcdefghijklmnopqrstuv", "nbs", tmpDir, "#0123456789abcdefghijklmnopqrstuv", ""}, {"mem::#0123456789abcdefghijklmnopqrstuv", "mem", "", "#0123456789abcdefghijklmnopqrstuv", ""}, {"http://local.attic.io/john/doe::#0123456789abcdefghijklmnopqrstuv", "http", "//local.attic.io/john/doe", "#0123456789abcdefghijklmnopqrstuv", ""}, {"http://localhost:8000/john/doe/::ds1", "http", "//localhost:8000/john/doe/", "ds1", ""}, {"http://192.30.252.154::foo.bar", "http", "//192.30.252.154", "foo.bar", ""}, {"http://::1::foo.bar.baz", "http", "//::1", "foo.bar.baz", ""}, {"aws:table/bucket/db::foo.foo", "aws", "table/bucket/db", "foo.foo", ""}, } for _, tc := range testCases { spec, err := ForPath(tc.spec) assert.NoError(err) defer spec.Close() assert.Equal(tc.protocol, spec.Protocol) assert.Equal(tc.databaseName, spec.DatabaseName) assert.Equal(tc.pathString, spec.Path.String()) if tc.canonicalSpecIfAny == "" { assert.Equal(tc.spec, spec.String()) } else { assert.Equal(tc.canonicalSpecIfAny, spec.String()) } } } func TestPinPathSpec(t *testing.T) { assert := assert.New(t) unpinned, err := ForPath("mem::foo.value") assert.NoError(err) defer unpinned.Close() db := unpinned.GetDatabase() db.CommitValue(db.GetDataset("foo"), types.Number(42)) pinned, ok := unpinned.Pin() assert.True(ok) defer pinned.Close() head := db.GetDataset("foo").Head() assert.Equal(head.Hash(), pinned.Path.Hash) assert.Equal(fmt.Sprintf("mem::#%s.value", head.Hash().String()), pinned.String()) assert.Equal(types.Number(42), pinned.GetValue()) assert.Equal(types.Number(42), unpinned.GetValue()) db.CommitValue(db.GetDataset("foo"), types.Number(43)) assert.Equal(types.Number(42), pinned.GetValue()) assert.Equal(types.Number(43), unpinned.GetValue()) } func TestPinDatasetSpec(t *testing.T) { assert := assert.New(t) unpinned, err := ForDataset("mem::foo") assert.NoError(err) defer unpinned.Close() db := unpinned.GetDatabase() db.CommitValue(db.GetDataset("foo"), types.Number(42)) pinned, ok := unpinned.Pin() assert.True(ok) defer pinned.Close() head := db.GetDataset("foo").Head() commitValue := func(val types.Value) types.Value { return val.(types.Struct).Get(datas.ValueField) } assert.Equal(head.Hash(), pinned.Path.Hash) assert.Equal(fmt.Sprintf("mem::#%s", head.Hash().String()), pinned.String()) assert.Equal(types.Number(42), commitValue(pinned.GetValue())) assert.Equal(types.Number(42), unpinned.GetDataset().HeadValue()) db.CommitValue(db.GetDataset("foo"), types.Number(43)) assert.Equal(types.Number(42), commitValue(pinned.GetValue())) assert.Equal(types.Number(43), unpinned.GetDataset().HeadValue()) } func TestAlreadyPinnedPathSpec(t *testing.T) { assert := assert.New(t) unpinned, err := ForPath("mem::#imgp9mp1h3b9nv0gna6mri53dlj9f4ql.value") assert.NoError(err) pinned, ok := unpinned.Pin() assert.True(ok) assert.Equal(unpinned, pinned) } func TestMultipleSpecsSameNBS(t *testing.T) { assert := assert.New(t) tmpDir, err := ioutil.TempDir("", "spec_test") assert.NoError(err) defer os.RemoveAll(tmpDir) spec1, err1 := ForDatabase(tmpDir) spec2, err2 := ForDatabase(tmpDir) assert.NoError(err1) assert.NoError(err2) s := types.String("hello") db := spec1.GetDatabase() r := db.WriteValue(s) _, err = db.CommitValue(db.GetDataset("datasetID"), r) assert.NoError(err) assert.Equal(s, spec2.GetDatabase().ReadValue(s.Hash())) } func TestAcccessingInvalidSpec(t *testing.T) { assert := assert.New(t) test := func(spec string) { sp, err := ForDatabase(spec) assert.Error(err) assert.Equal("", sp.Href()) assert.Panics(func() { sp.GetDatabase() }) assert.Panics(func() { sp.GetDatabase() }) assert.Panics(func() { sp.NewChunkStore() }) assert.Panics(func() { sp.NewChunkStore() }) assert.Panics(func() { sp.Close() }) assert.Panics(func() { sp.Close() }) // Spec was created with ForDatabase, so dataset/path related functions // should just fail not panic. _, ok := sp.Pin() assert.False(ok) assert.Equal(datas.Dataset{}, sp.GetDataset()) assert.Nil(sp.GetValue()) } test("") test("invalid:spec") test("💩:spec") test("http:") test("http:💩:") } type testProtocol struct { name string } func (t *testProtocol) NewChunkStore(sp Spec) (chunks.ChunkStore, error) { t.name = sp.DatabaseName return chunks.NewMemoryStoreFactory().CreateStore(""), nil } func (t *testProtocol) NewDatabase(sp Spec) (datas.Database, error) { t.name = sp.DatabaseName cs, err := t.NewChunkStore(sp) d.PanicIfError(err) return datas.NewDatabase(cs), nil } func TestExternalProtocol(t *testing.T) { assert := assert.New(t) tp := testProtocol{} ExternalProtocols["test"] = &tp sp, err := ForDataset("test:foo::bar") assert.NoError(err) assert.Equal("test", sp.Protocol) assert.Equal("foo", sp.DatabaseName) cs := sp.NewChunkStore() assert.Equal("foo", tp.name) c := chunks.NewChunk([]byte("hi!")) cs.Put(c) assert.True(cs.Has(c.Hash())) tp.name = "" ds := sp.GetDataset() assert.Equal("foo", tp.name) ds, err = ds.Database().CommitValue(ds, types.String("hi!")) d.PanicIfError(err) assert.True(types.String("hi!").Equals(ds.HeadValue())) } func TestMkDirAll(t *testing.T) { assert := assert.New(t) td, err := ioutil.TempDir("", "") assert.NoError(err) p := path.Join(td, "foo", "bar", "baz") sp, err := ForDatabase(p) assert.NoError(err) _ = sp.NewChunkStore() } func TestNetworkError(t *testing.T) { assert := assert.New(t) svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusForbidden) w.Write([]byte("monkey\n")) })) sp, err := ForDatabase(svr.URL) assert.NoError(err) err = d.Try(func() { sp.GetDatabase() }) assert.Equal("Unexpected response: Forbidden: monkey", err.(d.WrappedError).Cause().Error()) } ================================================ FILE: go/spec/util.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package spec import ( "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) func CreateDatabaseSpecString(protocol, db string) string { return Spec{Protocol: protocol, DatabaseName: db}.String() } func CreateValueSpecString(protocol, db, path string) string { p, err := NewAbsolutePath(path) d.Chk.NoError(err) return Spec{Protocol: protocol, DatabaseName: db, Path: p}.String() } func CreateHashSpecString(protocol, db string, h hash.Hash) string { return Spec{Protocol: protocol, DatabaseName: db, Path: AbsolutePath{Hash: h}}.String() } ================================================ FILE: go/types/blob.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "errors" "io" "sync" "runtime" "github.com/attic-labs/noms/go/d" ) // Blob represents a list of Blobs. type Blob struct { sequence } func newBlob(seq sequence) Blob { return Blob{seq} } func NewEmptyBlob(vrw ValueReadWriter) Blob { return Blob{newBlobLeafSequence(vrw, []byte{})} } func (b Blob) Edit() *BlobEditor { return NewBlobEditor(b) } // ReadAt implements the ReaderAt interface. Eagerly loads requested byte-range from the blob p-tree. func (b Blob) ReadAt(p []byte, off int64) (n int, err error) { // TODO: Support negative off? d.PanicIfTrue(off < 0) startIdx := uint64(off) if startIdx >= b.Len() { return 0, io.EOF } endIdx := startIdx + uint64(len(p)) if endIdx > b.Len() { endIdx = b.Len() } if endIdx == b.Len() { err = io.EOF } if startIdx == endIdx { return } leaves, localStart := LoadLeafNodes([]Collection{b}, startIdx, endIdx) endIdx = localStart + endIdx - startIdx startIdx = localStart for _, leaf := range leaves { bl := leaf.asSequence().(blobLeafSequence) localEnd := endIdx data := bl.data() leafLength := uint64(len(data)) if localEnd > leafLength { localEnd = leafLength } src := data[startIdx:localEnd] copy(p[n:], src) n += len(src) endIdx -= localEnd startIdx = 0 } return } func (b Blob) Reader() *BlobReader { return &BlobReader{b, 0} } func (b Blob) Copy(w io.Writer) (n int64) { return b.CopyReadAhead(w, 1<<23 /* 8MB */, 6) } // CopyReadAhead copies the entire contents of |b| to |w|, and attempts to stay // |concurrency| |chunkSize| blocks of bytes ahead of the last byte written to // |w|. func (b Blob) CopyReadAhead(w io.Writer, chunkSize uint64, concurrency int) (n int64) { bChan := make(chan chan []byte, concurrency) go func() { for idx, len := uint64(0), b.Len(); idx < len; { bc := make(chan []byte) bChan <- bc start := idx blockLength := b.Len() - start if blockLength > chunkSize { blockLength = chunkSize } idx += blockLength go func() { buff := make([]byte, blockLength) b.ReadAt(buff, int64(start)) bc <- buff }() } close(bChan) }() // Ensure read-ahead goroutines can exit defer func() { for range bChan { } }() for b := range bChan { ln, err := w.Write(<-b) n += int64(ln) if err != nil { return } } return } // Concat returns a new Blob comprised of this joined with other. It only needs // to visit the rightmost prolly tree chunks of this Blob, and the leftmost // prolly tree chunks of other, so it's efficient. func (b Blob) Concat(other Blob) Blob { seq := concat(b.sequence, other.sequence, func(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker { return b.newChunker(cur, vrw) }) return newBlob(seq) } func (b Blob) newChunker(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker { return newSequenceChunker(cur, 0, vrw, makeBlobLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(BlobKind, vrw), hashValueByte) } func (b Blob) asSequence() sequence { return b.sequence } // Value interface func (b Blob) Value() Value { return b } func (b Blob) WalkValues(cb ValueCallback) { } type BlobReader struct { b Blob pos int64 } func (cbr *BlobReader) Read(p []byte) (n int, err error) { n, err = cbr.b.ReadAt(p, cbr.pos) cbr.pos += int64(n) return } func (cbr *BlobReader) Seek(offset int64, whence int) (int64, error) { abs := int64(cbr.pos) switch whence { case 0: abs = offset case 1: abs += offset case 2: abs = int64(cbr.b.Len()) + offset default: return 0, errors.New("Blob.Reader.Seek: invalid whence") } if abs < 0 { return 0, errors.New("Blob.Reader.Seek: negative position") } cbr.pos = int64(abs) return abs, nil } func makeBlobLeafChunkFn(vrw ValueReadWriter) makeChunkFn { return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { d.PanicIfFalse(level == 0) buff := make([]byte, len(items)) for i, v := range items { buff[i] = v.(byte) } return chunkBlobLeaf(vrw, buff) } } func chunkBlobLeaf(vrw ValueReadWriter, buff []byte) (Collection, orderedKey, uint64) { blob := newBlob(newBlobLeafSequence(vrw, buff)) return blob, orderedKeyFromInt(len(buff)), uint64(len(buff)) } // NewBlob creates a Blob by reading from every Reader in rs and // concatenating the result. NewBlob uses one goroutine per Reader. func NewBlob(vrw ValueReadWriter, rs ...io.Reader) Blob { return readBlobsP(vrw, rs...) } func readBlobsP(vrw ValueReadWriter, rs ...io.Reader) Blob { switch len(rs) { case 0: return NewEmptyBlob(vrw) case 1: return readBlob(rs[0], vrw) } blobs := make([]Blob, len(rs)) wg := &sync.WaitGroup{} wg.Add(len(rs)) for i, r := range rs { i2, r2 := i, r go func() { blobs[i2] = readBlob(r2, vrw) wg.Done() }() } wg.Wait() b := blobs[0] for i := 1; i < len(blobs); i++ { b = b.Concat(blobs[i]) } return b } func readBlob(r io.Reader, vrw ValueReadWriter) Blob { sc := newEmptySequenceChunker(vrw, makeBlobLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(BlobKind, vrw), func(item sequenceItem, rv *rollingValueHasher) { rv.HashByte(item.(byte)) }) // TODO: The code below is temporary. It's basically a custom leaf-level chunker for blobs. There are substational perf gains by doing it this way as it avoids the cost of boxing every single byte which is chunked. chunkBuff := [8192]byte{} chunkBytes := chunkBuff[:] rv := newRollingValueHasher(0) offset := 0 addByte := func(b byte) bool { if offset >= len(chunkBytes) { tmp := make([]byte, len(chunkBytes)*2) copy(tmp, chunkBytes) chunkBytes = tmp } chunkBytes[offset] = b offset++ rv.HashByte(b) return rv.crossedBoundary } mtChan := make(chan chan metaTuple, runtime.NumCPU()) makeChunk := func() { rv.Reset() cp := make([]byte, offset) copy(cp, chunkBytes[0:offset]) ch := make(chan metaTuple) mtChan <- ch go func(ch chan metaTuple, cp []byte) { col, key, numLeaves := chunkBlobLeaf(vrw, cp) ch <- newMetaTuple(vrw.WriteValue(col), key, numLeaves) }(ch, cp) offset = 0 } go func() { readBuff := [8192]byte{} for { n, err := r.Read(readBuff[:]) for i := 0; i < n; i++ { if addByte(readBuff[i]) { makeChunk() } } if err != nil { if err != io.EOF { panic(err) } if offset > 0 { makeChunk() } close(mtChan) break } } }() for ch := range mtChan { mt := <-ch if sc.parent == nil { sc.createParent() } sc.parent.Append(mt) } return newBlob(sc.Done()) } ================================================ FILE: go/types/blob_editor.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "errors" "io" "sync" "github.com/attic-labs/noms/go/d" ) type BlobEditor struct { b Blob edits *blobEdit pos int64 } func NewBlobEditor(b Blob) *BlobEditor { return &BlobEditor{b, nil, 0} } func (be *BlobEditor) Kind() NomsKind { return BlobKind } func (be *BlobEditor) Value() Value { return be.Blob() } func (be *BlobEditor) Blob() Blob { if be.edits == nil { return be.b // no edits } seq := be.b.sequence vrw := seq.valueReadWriter() curs := make([]chan *sequenceCursor, 0) for edit := be.edits; edit != nil; edit = edit.next { edit := edit // TODO: Use ReadMany cc := make(chan *sequenceCursor, 1) curs = append(curs, cc) go func() { cc <- newCursorAtIndex(seq, edit.idx) }() } var ch *sequenceChunker idx := 0 for edit := be.edits; edit != nil; edit = edit.next { cur := <-curs[idx] idx++ if ch == nil { ch = newSequenceChunker(cur, 0, vrw, makeBlobLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(BlobKind, vrw), hashValueByte) } else { ch.advanceTo(cur) } dc := edit.removed for dc > 0 { ch.Skip() dc-- } for _, v := range edit.inserted { ch.Append(v) } } return newBlob(ch.Done()) } func collapseBlobEdit(newEdit, edit *blobEdit) bool { if newEdit.idx+newEdit.removed < edit.idx || edit.idx+uint64(len(edit.inserted)) < newEdit.idx { return false } collapsed := &blobEdit{} if newEdit.idx <= edit.idx { collapsed.idx = newEdit.idx overlap := newEdit.removed - (edit.idx - newEdit.idx) // number of leading N values removed from edit.inserted if overlap < uint64(len(edit.inserted)) { // newEdit doesn't remove all of edit.inserted collapsed.inserted = append(newEdit.inserted, edit.inserted[overlap:]...) collapsed.removed = newEdit.removed + edit.removed - overlap } else { // newEdit removes all of edit.inserted collapsed.inserted = newEdit.inserted collapsed.removed = newEdit.removed + edit.removed - uint64(len(edit.inserted)) } } else { // edit.idx < newEdit.idx collapsed.idx = edit.idx editInsertedLen := uint64(len(edit.inserted)) beginEditRemovePoint := newEdit.idx - edit.idx if beginEditRemovePoint == editInsertedLen { // newEdit took place at the position immediately after the last element of edit.inserted collapsed.inserted = append(edit.inserted, newEdit.inserted...) collapsed.removed = edit.removed + newEdit.removed } else { // newEdit takes place within edit.inserted collapsed.inserted = append(collapsed.inserted, edit.inserted[:beginEditRemovePoint]...) collapsed.inserted = append(collapsed.inserted, newEdit.inserted...) endEditRemovePoint := beginEditRemovePoint + newEdit.removed if endEditRemovePoint < editInsertedLen { // elements of edit.inserted remain beyond newEdit.removed collapsed.removed = edit.removed collapsed.inserted = append(collapsed.inserted, edit.inserted[endEditRemovePoint:]...) } else { collapsed.removed = edit.removed + endEditRemovePoint - editInsertedLen } } } *newEdit = *collapsed return true } func (be *BlobEditor) Len() uint64 { delta := int64(0) for edit := be.edits; edit != nil; edit = edit.next { delta += -int64(edit.removed) + int64(len(edit.inserted)) } return uint64(int64(be.b.Len()) + delta) } func (be *BlobEditor) Splice(idx uint64, deleteCount uint64, insert []byte) *BlobEditor { ne := &blobEdit{idx, deleteCount, insert, nil} var last *blobEdit edit := be.edits for edit != nil { if collapseBlobEdit(ne, edit) { if last == nil { be.edits = edit.next } else { last.next = edit.next } edit = edit.next continue } if edit.idx > ne.idx { break } ne.idx = adjustBlobIdx(ne.idx, edit) last = edit edit = edit.next } if ne.removed == 0 && len(ne.inserted) == 0 { return be // effectively removed 1 or more existing slices } if ne.idx > be.b.Len() { d.Panic("Index Out Of Bounds") } if ne.idx == be.b.Len() && ne.removed > 0 { d.Panic("Index Out Of Bounds") } if last == nil { // Insert |ne| in first position ne.next = be.edits be.edits = ne } else { ne.next = last.next last.next = ne } return be } func (be *BlobEditor) Seek(offset int64, whence int) (int64, error) { abs := int64(be.pos) switch whence { case 0: abs = offset case 1: abs += offset case 2: abs = int64(be.Len()) + offset default: return 0, errors.New("BlobEditor.Seek: invalid whence") } if abs < 0 { return 0, errors.New("BlobEditor.Seek: negative position") } if uint64(abs) > be.Len() { return 0, errors.New("BlobEditor.Seek: sparse blobs not supported") } be.pos = int64(abs) return abs, nil } func (be *BlobEditor) Read(p []byte) (n int, err error) { startIdx := uint64(be.pos) endIdx := startIdx + uint64(len(p)) if endIdx > be.Len() { endIdx = be.Len() } n = int(endIdx - startIdx) if endIdx == be.Len() { err = io.EOF } wg := &sync.WaitGroup{} asyncReadAt := func(length uint64) { idx := int64(startIdx) to := p[:length] wg.Add(1) go func() { be.b.ReadAt(to, idx) wg.Done() }() startIdx += length p = p[length:] } edit := be.edits for edit != nil && startIdx < endIdx { if edit.idx > startIdx { // ReadAt the bytes before the current edit end := endIdx if endIdx > edit.idx { end = edit.idx } asyncReadAt(end - startIdx) continue } insertedLength := uint64(len(edit.inserted)) if edit.idx <= startIdx && startIdx < (edit.idx+insertedLength) { // Copy bytes within the current edit start := startIdx - edit.idx end := endIdx - edit.idx if end > insertedLength { end = insertedLength } copy(p, edit.inserted[start:end]) p = p[end-start:] startIdx += end - start continue } startIdx = adjustBlobIdx(startIdx, edit) endIdx = adjustBlobIdx(endIdx, edit) edit = edit.next } if endIdx > startIdx { // ReadAt any bytes beyond the final edit asyncReadAt(endIdx - startIdx) } wg.Wait() return } func (be *BlobEditor) Write(p []byte) (n int, err error) { removeCount := uint64(len(p)) remaining := be.Len() - uint64(be.pos) if remaining < removeCount { removeCount = remaining } be.Splice(uint64(be.pos), removeCount, p) return len(p), nil } func adjustBlobIdx(idx uint64, e *blobEdit) uint64 { return idx + e.removed - uint64(len(e.inserted)) } type blobEdit struct { idx uint64 removed uint64 inserted []byte next *blobEdit } ================================================ FILE: go/types/blob_editor_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "math/rand" "testing" "io/ioutil" "bytes" "io" "github.com/attic-labs/noms/go/chunks" "github.com/stretchr/testify/assert" ) func TestBlobReadWriteFuzzer(t *testing.T) { rounds := 1024 operations := 512 flushEvery := 16 maxInsertCount := uint64(64) ts := &chunks.TestStorage{} cs := ts.NewView() vs := newValueStoreWithCacheAndPending(cs, 0, 0) r := rand.New(rand.NewSource(0)) nextRandInt := func(from, to uint64) uint64 { return from + uint64(float64(to-from)*r.Float64()) } for i := 0; i < rounds; i++ { b := NewBlob(vs) f, _ := ioutil.TempFile("", "buff") be := b.Edit() for j := 0; j < operations; j++ { if j%2 == 1 { // random read idx := nextRandInt(0, be.Len()) l := nextRandInt(0, be.Len()-idx) f.Seek(int64(idx), 0) be.Seek(int64(idx), 0) ex := make([]byte, l) ac := make([]byte, l) f.Read(ex) be.Read(ac) assert.True(t, bytes.Compare(ex, ac) == 0) } else { // randon write idx := nextRandInt(0, be.Len()) f.Seek(int64(idx), 0) be.Seek(int64(idx), 0) l := nextRandInt(0, maxInsertCount) data, err := ioutil.ReadAll(&io.LimitedReader{R: r, N: int64(l)}) assert.NoError(t, err) f.Write(data) be.Write(data) } if j%flushEvery == 0 { // Flush b = be.Blob() be = b.Edit() } } f.Sync() b = be.Blob() f.Seek(0, 0) info, err := f.Stat() assert.NoError(t, err) assert.True(t, uint64(info.Size()) == b.Len()) expect, err := ioutil.ReadAll(f) assert.NoError(t, err) actual := make([]byte, b.Len()) b.ReadAt(actual, 0) assert.True(t, bytes.Compare(expect, actual) == 0) } } ================================================ FILE: go/types/blob_leaf_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/attic-labs/noms/go/d" type blobLeafSequence struct { leafSequence } func newBlobLeafSequence(vrw ValueReadWriter, data []byte) sequence { d.PanicIfTrue(vrw == nil) offsets := make([]uint32, sequencePartValues+1) w := newBinaryNomsWriter() offsets[sequencePartKind] = w.offset BlobKind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(0) // level offsets[sequencePartCount] = w.offset count := uint64(len(data)) w.writeCount(count) offsets[sequencePartValues] = w.offset w.writeBytes(data) return blobLeafSequence{newLeafSequence(vrw, w.data(), offsets, count)} } func (bl blobLeafSequence) writeTo(w nomsWriter) { w.writeRaw(bl.buff) } // sequence interface func (bl blobLeafSequence) data() []byte { offset := bl.offsets[sequencePartValues] - bl.offsets[sequencePartKind] return bl.buff[offset:] } func (bl blobLeafSequence) getCompareFn(other sequence) compareFn { offsetStart := int(bl.offsets[sequencePartValues] - bl.offsets[sequencePartKind]) obl := other.(blobLeafSequence) otherOffsetStart := int(obl.offsets[sequencePartValues] - obl.offsets[sequencePartKind]) return func(idx, otherIdx int) bool { return bl.buff[offsetStart+idx] == obl.buff[otherOffsetStart+otherIdx] } } func (bl blobLeafSequence) getItem(idx int) sequenceItem { offset := bl.offsets[sequencePartValues] - bl.offsets[sequencePartKind] + uint32(idx) return bl.buff[offset] } func (bl blobLeafSequence) typeOf() *Type { return BlobType } ================================================ FILE: go/types/blob_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "io" "io/ioutil" "math/rand" "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) func randomBuff(powOfTwo uint) []byte { length := 1 << powOfTwo rr := rand.New(rand.NewSource(int64(powOfTwo))) buff := make([]byte, length) rr.Read(buff) return buff } type blobTestSuite struct { collectionTestSuite buff []byte } func newBlobTestSuite(size uint, expectChunkCount int, expectPrependChunkDiff int, expectAppendChunkDiff int) *blobTestSuite { vrw := newTestValueStore() length := 1 << size buff := randomBuff(size) blob := NewBlob(vrw, bytes.NewReader(buff)) return &blobTestSuite{ collectionTestSuite: collectionTestSuite{ col: blob, expectType: BlobType, expectLen: uint64(length), expectChunkCount: expectChunkCount, expectPrependChunkDiff: expectPrependChunkDiff, expectAppendChunkDiff: expectAppendChunkDiff, validate: func(v2 Collection) bool { b2 := v2.(Blob) outBuff := &bytes.Buffer{} b2.Copy(outBuff) return bytes.Compare(outBuff.Bytes(), buff) == 0 }, prependOne: func() Collection { dup := make([]byte, length+1) dup[0] = 0 copy(dup[1:], buff) return NewBlob(vrw, bytes.NewReader(dup)) }, appendOne: func() Collection { dup := make([]byte, length+1) copy(dup, buff) dup[len(dup)-1] = 0 return NewBlob(vrw, bytes.NewReader(dup)) }, }, buff: buff, } } func TestBlobSuite4K(t *testing.T) { suite.Run(t, newBlobTestSuite(12, 2, 2, 2)) } func TestBlobSuite64K(t *testing.T) { suite.Run(t, newBlobTestSuite(16, 15, 2, 2)) } func TestBlobSuite256K(t *testing.T) { suite.Run(t, newBlobTestSuite(18, 64, 2, 2)) } func TestBlobSuite1M(t *testing.T) { suite.Run(t, newBlobTestSuite(20, 245, 2, 2)) } // Checks the first 1/2 of the bytes, then 1/2 of the remainder, then 1/2 of the remainder, etc... func (suite *blobTestSuite) TestRandomRead() { buffReader := bytes.NewReader(suite.buff) blobReader := suite.col.(Blob).Reader() readByteRange := func(r io.ReadSeeker, start, rel, count int64) []byte { bytes := make([]byte, count) n, err := r.Seek(start, 0) suite.NoError(err) suite.Equal(start, n) n2, err := r.Seek(rel, 1) suite.NoError(err) suite.Equal(start+rel, n2) n3, err := io.ReadFull(r, bytes) suite.NoError(err) suite.Equal(int(count), n3) return bytes } readByteRangeFromEnd := func(r io.ReadSeeker, length, offset, count int64) []byte { bytes := make([]byte, count) n, err := r.Seek(offset, 2) suite.NoError(err) suite.Equal(length+offset, n) n2, err := io.ReadFull(r, bytes) suite.NoError(err) suite.Equal(int(count), n2) return bytes } checkByteRange := func(start, rel, count int64) { expect := readByteRange(buffReader, start, rel, count) actual := readByteRange(blobReader, start, rel, count) suite.Equal(expect, actual) } checkByteRangeFromEnd := func(length, offset, count int64) { expect := readByteRangeFromEnd(buffReader, length, offset, count) actual := readByteRangeFromEnd(blobReader, length, offset, count) suite.Equal(expect, actual) } length := int64(len(suite.buff)) start := int64(0) count := int64(length / 2) for count > 2 { checkByteRange(start, 0, count) checkByteRange(0, start, count) checkByteRange(start/2, start-(start/2), count) checkByteRangeFromEnd(length, start-length, count) start = start + count count = (length - start) / 2 } } type testReader struct { readCount int buf *bytes.Buffer } func (r *testReader) Read(p []byte) (n int, err error) { r.readCount++ switch r.readCount { case 1: for i := 0; i < len(p); i++ { p[i] = 0x01 } io.Copy(r.buf, bytes.NewReader(p)) return len(p), nil case 2: p[0] = 0x02 r.buf.WriteByte(p[0]) return 1, io.EOF default: return 0, io.EOF } } func TestBlobFromReaderThatReturnsDataAndError(t *testing.T) { // See issue #264. // This tests the case of building a Blob from a reader who returns both data and an error for the final Read() call. assert := assert.New(t) vrw := newTestValueStore() tr := &testReader{buf: &bytes.Buffer{}} b := NewBlob(vrw, tr) actual := &bytes.Buffer{} io.Copy(actual, b.Reader()) assert.True(bytes.Equal(actual.Bytes(), tr.buf.Bytes())) assert.Equal(byte(2), actual.Bytes()[len(actual.Bytes())-1]) } func TestBlobSplice(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() blob := NewEmptyBlob(vrw) buf := new(bytes.Buffer) blob = blob.Edit().Splice(0, 0, []byte("I'll do anything")).Blob() buf.Reset() buf.ReadFrom(blob.Reader()) assert.Equal(buf.String(), "I'll do anything") blob = blob.Edit().Splice(16, 0, []byte(" for arv")).Blob() buf.Reset() buf.ReadFrom(blob.Reader()) assert.Equal(buf.String(), "I'll do anything for arv") blob = blob.Edit().Splice(0, 0, []byte("Yes, ")).Blob() buf.Reset() buf.ReadFrom(blob.Reader()) assert.Equal(buf.String(), "Yes, I'll do anything for arv") blob = blob.Edit().Splice(5, 20, []byte("it's hard to satisfy")).Blob() buf.Reset() buf.ReadFrom(blob.Reader()) assert.Equal(buf.String(), "Yes, it's hard to satisfy arv") } func TestBlobConcat(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() reload := func(b Blob) Blob { return vs.ReadValue(vs.WriteValue(b).TargetHash()).(Blob) } split := func(b Blob, at int64) (Blob, Blob) { read1, read2 := b.Reader(), b.Reader() b1 := NewBlob(vs, &io.LimitedReader{R: read1, N: at}) read2.Seek(at, 0) b2 := NewBlob(vs, read2) return reload(b1), reload(b2) } // Random 1MB Blob. // Note that List.Concat is exhaustively tested, don't worry here. r := rand.New(rand.NewSource(0)) b := NewBlob(vs, &io.LimitedReader{R: r, N: 1e6}) b = reload(b) b1 := NewEmptyBlob(vs).Concat(b) assert.True(b.Equals(b1)) b2 := b.Concat(NewEmptyBlob(vs)) assert.True(b.Equals(b2)) b3, b4 := split(b, 10) assert.True(b.Equals(b3.Concat(b4))) b5, b6 := split(b, 1e6-10) assert.True(b.Equals(b5.Concat(b6))) b7, b8 := split(b, 1e6/2) assert.True(b.Equals(b7.Concat(b8))) } func TestBlobNewParallel(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() readAll := func(b Blob) []byte { data, err := ioutil.ReadAll(b.Reader()) assert.NoError(err) return data } b := NewBlob(vrw) assert.True(b.Len() == 0) b = NewBlob(vrw, strings.NewReader("abc")) assert.Equal("abc", string(readAll(b))) b = NewBlob(vrw, strings.NewReader("abc"), strings.NewReader("def")) assert.Equal("abcdef", string(readAll(b))) p, size := 100, 1024 r := rand.New(rand.NewSource(0)) data := make([]byte, p*size) _, err := r.Read(data) assert.NoError(err) readers := make([]io.Reader, p) for i := range readers { readers[i] = bytes.NewBuffer(data[i*size : (i+1)*size]) } b = NewBlob(vrw, readers...) assert.Equal(data, readAll(b)) } func TestStreamingParallelBlob(t *testing.T) { assert := assert.New(t) buff := randomBuff(1 << 26 /* 64MB */) chunks := 4 readers := make([]io.Reader, chunks) chunkSize := len(buff) / chunks for i := 0; i < len(readers); i++ { readers[i] = bytes.NewReader(buff[i*chunkSize : (i+1)*chunkSize]) } vs := newTestValueStore() blob := NewBlob(vs, readers...) outBuff := &bytes.Buffer{} blob.Copy(outBuff) assert.True(bytes.Compare(buff, outBuff.Bytes()) == 0) } ================================================ FILE: go/types/bool.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/hash" ) // Bool is a Noms Value wrapper around the primitive bool type. type Bool bool // Value interface func (b Bool) Value() Value { return b } func (b Bool) Equals(other Value) bool { return b == other } func (b Bool) Less(other Value) bool { if b2, ok := other.(Bool); ok { return !bool(b) && bool(b2) } return true } func (b Bool) Hash() hash.Hash { return getHash(b) } func (b Bool) WalkValues(cb ValueCallback) { } func (b Bool) WalkRefs(cb RefCallback) { } func (b Bool) typeOf() *Type { return BoolType } func (b Bool) Kind() NomsKind { return BoolKind } func (b Bool) valueReadWriter() ValueReadWriter { return nil } func (b Bool) writeTo(w nomsWriter) { BoolKind.writeTo(w) w.writeBool(bool(b)) } func (b Bool) valueBytes() []byte { if bool(b) { return []byte{byte(BoolKind), 1} } return []byte{byte(BoolKind), 0} } ================================================ FILE: go/types/codec.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "encoding/binary" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) const initialBufferSize = 2048 type valueBytes interface { valueBytes() []byte } func EncodeValue(v Value) chunks.Chunk { switch v := v.(type) { case valueBytes: return chunks.NewChunk(v.valueBytes()) case *Type: w := newBinaryNomsWriter() v.writeTo(&w) return chunks.NewChunk(w.data()) } panic("unreachable") } func DecodeFromBytes(data []byte, vrw ValueReadWriter) Value { dec := newValueDecoder(data, vrw) v := dec.readValue() d.PanicIfFalse(dec.pos() == uint32(len(data))) return v } func decodeFromBytesWithValidation(data []byte, vrw ValueReadWriter) Value { r := binaryNomsReader{data, 0} dec := newValueDecoderWithValidation(r, vrw) v := dec.readValue() d.PanicIfFalse(dec.pos() == uint32(len(data))) return v } // DecodeValue decodes a value from a chunk source. It is an error to provide an empty chunk. func DecodeValue(c chunks.Chunk, vrw ValueReadWriter) Value { d.PanicIfTrue(c.IsEmpty()) return DecodeFromBytes(c.Data(), vrw) } type nomsWriter interface { writeBool(b bool) writeBytes(v []byte) writeCount(count uint64) writeHash(h hash.Hash) writeNumber(v Number) writeString(v string) writeUint8(v uint8) writeRaw(buff []byte) } type binaryNomsReader struct { buff []byte offset uint32 } func (b *binaryNomsReader) pos() uint32 { return b.offset } func (b *binaryNomsReader) readUint8() uint8 { v := uint8(b.buff[b.offset]) b.offset++ return v } func (b *binaryNomsReader) peekUint8() uint8 { return uint8(b.buff[b.offset]) } func (b *binaryNomsReader) skipUint8() { b.offset++ } func (b *binaryNomsReader) peekKind() NomsKind { return NomsKind(b.peekUint8()) } func (b *binaryNomsReader) readKind() NomsKind { return NomsKind(b.readUint8()) } func (b *binaryNomsReader) skipKind() { b.skipUint8() } func (b *binaryNomsReader) readCount() uint64 { v, count := binary.Uvarint(b.buff[b.offset:]) b.offset += uint32(count) return v } func (b *binaryNomsReader) skipCount() { _, count := binary.Uvarint(b.buff[b.offset:]) b.offset += uint32(count) } func (b *binaryNomsReader) readNumber() Number { // b.assertCanRead(binary.MaxVarintLen64 * 2) i, count := binary.Varint(b.buff[b.offset:]) b.offset += uint32(count) exp, count2 := binary.Varint(b.buff[b.offset:]) b.offset += uint32(count2) return Number(fracExpToFloat(i, int(exp))) } func (b *binaryNomsReader) skipNumber() { _, count := binary.Varint(b.buff[b.offset:]) b.offset += uint32(count) _, count2 := binary.Varint(b.buff[b.offset:]) b.offset += uint32(count2) } func (b *binaryNomsReader) readBool() bool { return b.readUint8() == 1 } func (b *binaryNomsReader) skipBool() { b.skipUint8() } func (b *binaryNomsReader) readString() string { size := uint32(b.readCount()) v := string(b.buff[b.offset : b.offset+size]) b.offset += size return v } func (b *binaryNomsReader) skipString() { size := uint32(b.readCount()) b.offset += size } func (b *binaryNomsReader) readHash() hash.Hash { h := hash.Hash{} copy(h[:], b.buff[b.offset:b.offset+hash.ByteLen]) b.offset += hash.ByteLen return h } func (b *binaryNomsReader) skipHash() { b.offset += hash.ByteLen } func (b *binaryNomsReader) byteSlice(start, end uint32) []byte { return b.buff[start:end] } type binaryNomsWriter struct { buff []byte offset uint32 } func newBinaryNomsWriter() binaryNomsWriter { return binaryNomsWriter{make([]byte, initialBufferSize), 0} } func (b *binaryNomsWriter) data() []byte { return b.buff[0:b.offset] } func (b *binaryNomsWriter) reset() { b.offset = 0 } func (b *binaryNomsWriter) ensureCapacity(n uint32) { length := uint32(len(b.buff)) if b.offset+n <= length { return } old := b.buff for b.offset+n > length { length = length * 2 } b.buff = make([]byte, length, length) copy(b.buff, old) } func (b *binaryNomsWriter) writeBytes(v []byte) { size := uint32(len(v)) b.ensureCapacity(size) copy(b.buff[b.offset:], v) b.offset += size } func (b *binaryNomsWriter) writeUint8(v uint8) { b.ensureCapacity(1) b.buff[b.offset] = byte(v) b.offset++ } func (b *binaryNomsWriter) writeCount(v uint64) { b.ensureCapacity(binary.MaxVarintLen64) count := binary.PutUvarint(b.buff[b.offset:], v) b.offset += uint32(count) } func (b *binaryNomsWriter) writeNumber(v Number) { b.ensureCapacity(binary.MaxVarintLen64 * 2) i, exp := float64ToIntExp(float64(v)) count := binary.PutVarint(b.buff[b.offset:], i) b.offset += uint32(count) count = binary.PutVarint(b.buff[b.offset:], int64(exp)) b.offset += uint32(count) } func (b *binaryNomsWriter) writeBool(v bool) { if v { b.writeUint8(uint8(1)) } else { b.writeUint8(uint8(0)) } } func (b *binaryNomsWriter) writeString(v string) { size := uint32(len(v)) b.writeCount(uint64(size)) b.ensureCapacity(size) copy(b.buff[b.offset:], v) b.offset += size } func (b *binaryNomsWriter) writeHash(h hash.Hash) { b.ensureCapacity(hash.ByteLen) copy(b.buff[b.offset:], h[:]) b.offset += hash.ByteLen } func (b *binaryNomsWriter) writeRaw(buff []byte) { b.ensureCapacity(uint32(len(buff))) copy(b.buff[b.offset:], buff) b.offset += uint32(len(buff)) } ================================================ FILE: go/types/codec_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestCodecWriteNumber(t *testing.T) { test := func(f float64, exp []byte) { w := newBinaryNomsWriter() w.writeNumber(Number(f)) assert.Equal(t, exp, w.data()) } // We use zigzag encoding for the signed bit. For positive n we do 2*n and for negative we do 2*-n - 1 test(0, []byte{0, 0}) // 0 * 2 ** 0 test(1, []byte{1 * 2, 0}) // 1 * 2 ** 0 test(2, []byte{1 * 2, 1 * 2}) // 1 * 2 ** 1 test(-2, []byte{(1 * 2) - 1, 1 * 2}) // -1 * 2 ** 1 test(.5, []byte{1 * 2, 1*2 - 1}) // 1 * 2 ** -1 test(-.5, []byte{1*2 - 1, 1*2 - 1}) // -1 * 2 ** -1 test(.25, []byte{1 * 2, 2*2 - 1}) // 1 * 2 ** -2 test(3, []byte{3 * 2, 0}) // 0b11 * 2 ** 0 test(15, []byte{15 * 2, 0}) // 0b1111 * 2**0 test(256, []byte{1 * 2, 8 * 2}) // 1 * 2*8 test(-15, []byte{15*2 - 1, 0}) // -15 * 2*0 } func TestCodecReadNumber(t *testing.T) { test := func(data []byte, exp float64) { r := binaryNomsReader{buff: data} n := r.readNumber() assert.Equal(t, exp, float64(n)) assert.Equal(t, len(data), int(r.offset)) } test([]byte{0, 0}, 0) // 0 * 2 ** 0 test([]byte{1 * 2, 0}, 1) // 1 * 2 ** 0 test([]byte{1 * 2, 1 * 2}, 2) // 1 * 2 ** 1 test([]byte{1*2 - 1, 1 + 1}, -2) // -1 * 2 ** 1 test([]byte{1 * 2, 1*2 - 1}, .5) // 1 * 2 ** -1 test([]byte{1*2 - 1, 1*2 - 1}, -.5) // -1 * 2 ** -1 test([]byte{1 * 2, 2*2 - 1}, .25) // 1 * 2 ** -2 test([]byte{3 * 2, 0}, 3) // 0b11 * 2 ** 0 test([]byte{15 * 2, 0}, 15) // 0b1111 * 2**0 test([]byte{1 * 2, 8 * 2}, 256) // 1 * 2*8 test([]byte{15*2 - 1, 0}, -15) // -15 * 2*0 } ================================================ FILE: go/types/collection.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types type Collection interface { Value Empty() bool Len() uint64 asSequence() sequence } ================================================ FILE: go/types/collection_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/stretchr/testify/suite" type collectionTestSuite struct { suite.Suite col Collection expectType *Type expectLen uint64 expectChunkCount int expectPrependChunkDiff int expectAppendChunkDiff int validate validateFn prependOne deltaFn appendOne deltaFn } type validateFn func(v2 Collection) bool type deltaFn func() Collection func (suite *collectionTestSuite) TestType() { suite.True(suite.expectType.Equals(TypeOf(suite.col))) } func (suite *collectionTestSuite) TestLen() { suite.Equal(suite.expectLen, suite.col.Len()) suite.Equal(suite.col.Empty(), suite.expectLen == 0) } func (suite *collectionTestSuite) TestEquals() { v2 := suite.col suite.True(suite.col.Equals(v2)) suite.True(v2.Equals(suite.col)) } func (suite *collectionTestSuite) TestChunkCountAndType() { suite.Equal(suite.expectChunkCount, leafCount(suite.col), "chunk count") refType := MakeRefType(suite.expectType) suite.col.WalkRefs(func(r Ref) { suite.True(refType.Equals(TypeOf(r))) }) } func (suite *collectionTestSuite) TestRoundTripAndValidate() { suite.True(suite.validate(suite.col)) } func (suite *collectionTestSuite) TestPrependChunkDiff() { v2 := suite.prependOne() suite.Equal(suite.expectPrependChunkDiff, leafDiffCount(suite.col, v2), "prepend count") } func (suite *collectionTestSuite) TestAppendChunkDiff() { v2 := suite.appendOne() suite.Equal(suite.expectAppendChunkDiff, leafDiffCount(suite.col, v2), "append count") } func deriveCollectionHeight(c Collection) uint64 { return c.asSequence().treeLevel() } func getRefHeightOfCollection(c Collection) uint64 { return c.asSequence().getItem(0).(metaTuple).ref().Height() } ================================================ FILE: go/types/common_supertype.go ================================================ package types import "github.com/attic-labs/noms/go/d" // ContainCommonSupertype returns true if it's possible to synthesize // a non-trivial (i.e. not empty) supertype from types |a| and |b|. // // It is useful for determining whether a subset of values can be extracted // from one object to produce another object. // // The rules for determining whether |a| and |b| intersect are: // - if either type is Value, return true // - if either type is Union, return true iff at least one variant of |a| intersects with one variant of |b| // - if |a| & |b| are not the same kind, return false // - else // - if both are structs, return true iff their names are equal or one name is "", they share a field name // and the type of that field intersects // - if both are refs, sets or lists, return true iff the element type intersects // - if both are maps, return true iff they have a key with the same type and value types that intersect // - else return true func ContainCommonSupertype(a, b *Type) bool { // Avoid cycles internally. return containCommonSupertypeImpl(a, b, nil, nil) } func containCommonSupertypeImpl(a, b *Type, aVisited, bVisited []*Type) bool { if a.TargetKind() == ValueKind || b.TargetKind() == ValueKind { return true } if a.TargetKind() == UnionKind || b.TargetKind() == UnionKind { return unionsIntersect(a, b, aVisited, bVisited) } if a.TargetKind() != b.TargetKind() { return false } switch k := a.TargetKind(); k { case StructKind: return structsIntersect(a, b, aVisited, bVisited) case ListKind, SetKind, RefKind: return containersIntersect(k, a, b, aVisited, bVisited) case MapKind: return mapsIntersect(a, b, aVisited, bVisited) default: return true } } // Checks for intersection between types that may be unions. If either or // both is a union, union, tests all types for intersection. func unionsIntersect(a, b *Type, aVisited, bVisited []*Type) bool { aTypes, bTypes := typeList(a), typeList(b) for _, t := range aTypes { for _, u := range bTypes { if containCommonSupertypeImpl(t, u, aVisited, bVisited) { return true } } } return false } // if |t| is a union, returns all types represented; otherwise returns |t| func typeList(t *Type) typeSlice { if t.Desc.Kind() == UnionKind { return t.Desc.(CompoundDesc).ElemTypes } return typeSlice{t} } func containersIntersect(kind NomsKind, a, b *Type, aVisited, bVisited []*Type) bool { d.Chk.True(kind == a.Desc.Kind() && kind == b.Desc.Kind()) return containCommonSupertypeImpl(a.Desc.(CompoundDesc).ElemTypes[0], b.Desc.(CompoundDesc).ElemTypes[0], aVisited, bVisited) } func mapsIntersect(a, b *Type, aVisited, bVisited []*Type) bool { // true if a and b are the same or (if either is a union) there is // common type between them. hasCommonType := func(a, b *Type) bool { aTypes, bTypes := typeList(a), typeList(b) for _, t := range aTypes { for _, u := range bTypes { if t.Equals(u) { return true } } } return false } d.Chk.True(MapKind == a.Desc.Kind() && MapKind == b.Desc.Kind()) aDesc, bDesc := a.Desc.(CompoundDesc), b.Desc.(CompoundDesc) if !hasCommonType(aDesc.ElemTypes[0], bDesc.ElemTypes[0]) { return false } return containCommonSupertypeImpl(aDesc.ElemTypes[1], bDesc.ElemTypes[1], aVisited, bVisited) } func structsIntersect(a, b *Type, aVisited, bVisited []*Type) bool { _, aFound := indexOfType(a, aVisited) _, bFound := indexOfType(b, bVisited) if aFound && bFound { return true } d.Chk.True(StructKind == a.TargetKind() && StructKind == b.TargetKind()) aDesc := a.Desc.(StructDesc) bDesc := b.Desc.(StructDesc) // must be either the same name or one has no name if aDesc.Name != bDesc.Name && !(aDesc.Name == "" || bDesc.Name == "") { return false } for i, j := 0, 0; i < len(aDesc.fields) && j < len(bDesc.fields); { aName, bName := aDesc.fields[i].Name, bDesc.fields[j].Name if aName < bName { i++ } else if bName < aName { j++ } else if !containCommonSupertypeImpl(aDesc.fields[i].Type, bDesc.fields[j].Type, append(aVisited, a), append(bVisited, b)) { i++ j++ } else { return true } } return false } ================================================ FILE: go/types/common_supertype_test.go ================================================ package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestContainCommonSupertype(t *testing.T) { cases := []struct { a, b *Type out bool }{ // bool & any -> true {ValueType, StringType, true}, // ref & ref -> true {MakeRefType(BoolType), MakeRefType(BoolType), true}, // ref & ref -> false {MakeRefType(NumberType), MakeRefType(StringType), false}, // set & set -> true {MakeSetType(BoolType), MakeSetType(BoolType), true}, // set & set -> false {MakeSetType(BoolType), MakeSetType(StringType), false}, // list & list -> true {MakeListType(BlobType), MakeListType(BlobType), true}, // list & list -> false {MakeListType(BlobType), MakeListType(StringType), false}, // list & list -> true {MakeListType(MakeUnionType(BlobType, StringType, NumberType)), MakeListType(MakeUnionType(StringType, BoolType)), true}, // list & list -> false {MakeListType(MakeUnionType(BlobType, StringType)), MakeListType(MakeUnionType(NumberType, BoolType)), false}, // map & map -> true {MakeMapType(BoolType, BoolType), MakeMapType(BoolType, BoolType), true}, // map & map -> false {MakeMapType(BoolType, BoolType), MakeMapType(BoolType, StringType), false}, // map & map -> false {MakeMapType(BoolType, BoolType), MakeMapType(StringType, BoolType), false}, // map & map -> false {MakeMapType(BoolType, BoolType), MakeMapType(StringType, BoolType), false}, // map & map -> false {MakeMapType(MakeStructTypeFromFields("", FieldMap{"foo": StringType}), BoolType), MakeMapType(MakeStructTypeFromFields("", FieldMap{"foo": StringType, "bar": StringType}), BoolType), false}, // map & map -> true {MakeMapType(MakeUnionType(StringType, BlobType), StringType), MakeMapType(MakeUnionType(NumberType, StringType), StringType), true}, // map & map -> false {MakeMapType(MakeUnionType(BlobType, BoolType), StringType), MakeMapType(MakeUnionType(NumberType, StringType), StringType), false}, // bool & string|bool|blob -> true {BoolType, MakeUnionType(StringType, BoolType, BlobType), true}, // string|bool|blob & blob -> true {MakeUnionType(StringType, BoolType, BlobType), BlobType, true}, // string|bool|blob & number|blob|string -> true {MakeUnionType(StringType, BoolType, BlobType), MakeUnionType(NumberType, BlobType, StringType), true}, // struct{foo:bool} & struct{foo:bool} -> true {MakeStructTypeFromFields("", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("", FieldMap{"foo": BoolType}), true}, // struct{foo:bool} & struct{foo:number} -> false {MakeStructTypeFromFields("", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("", FieldMap{"foo": StringType}), false}, // struct{foo:bool} & struct{foo:bool,bar:number} -> true {MakeStructTypeFromFields("", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("", FieldMap{"foo": BoolType, "bar": NumberType}), true}, // struct{foo:ref} & struct{foo:ref} -> false {MakeStructTypeFromFields("", FieldMap{"foo": MakeRefType(BoolType)}), MakeStructTypeFromFields("", FieldMap{"foo": MakeRefType(NumberType)}), false}, // struct{foo:ref} & struct{foo:ref} -> true {MakeStructTypeFromFields("", FieldMap{"foo": MakeRefType(BoolType)}), MakeStructTypeFromFields("", FieldMap{"foo": MakeRefType(MakeUnionType(NumberType, BoolType))}), true}, // struct A{foo:bool} & struct A{foo:bool, baz:string} -> true {MakeStructTypeFromFields("A", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("A", FieldMap{"foo": BoolType, "baz": StringType}), true}, // struct A{foo:bool, stuff:set} & struct A{foo:bool, stuff:set} -> true {MakeStructTypeFromFields("A", FieldMap{"foo": BoolType, "stuff": MakeSetType(MakeUnionType(StringType, BlobType))}), MakeStructTypeFromFields("A", FieldMap{"foo": BoolType, "stuff": MakeSetType(StringType)}), true}, // struct A{stuff:set} & struct A{foo:bool, stuff:set} -> false {MakeStructTypeFromFields("A", FieldMap{"foo": BoolType, "stuff": MakeSetType(MakeUnionType(StringType, BlobType))}), MakeStructTypeFromFields("A", FieldMap{"stuff": MakeSetType(NumberType)}), false}, // struct A{foo:bool} & struct {foo:bool} -> true {MakeStructTypeFromFields("A", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("", FieldMap{"foo": BoolType}), true}, // struct {foo:bool} & struct A{foo:bool} -> false {MakeStructTypeFromFields("", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("A", FieldMap{"foo": BoolType}), true}, // struct A{foo:bool} & struct B{foo:bool} -> false {MakeStructTypeFromFields("A", FieldMap{"foo": BoolType}), MakeStructTypeFromFields("B", FieldMap{"foo": BoolType}), false}, // map & map -> true {MakeMapType(StringType, MakeStructTypeFromFields("A", FieldMap{"foo": StringType})), MakeMapType(StringType, MakeStructTypeFromFields("A", FieldMap{"foo": StringType, "bar": BoolType})), true}, // struct{foo: string} & struct{foo: string|blob} -> true {MakeStructTypeFromFields("", FieldMap{"foo": StringType}), MakeStructTypeFromFields("", FieldMap{"foo": MakeUnionType(StringType, BlobType)}), true}, // struct{foo: string}|struct{foo: blob} & struct{foo: string|blob} -> true {MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType}), MakeStructTypeFromFields("", FieldMap{"foo": BlobType}), ), MakeStructTypeFromFields("", FieldMap{"foo": MakeUnionType(StringType, BlobType)}), true}, // struct{foo: string}|struct{foo: blob} & struct{foo: number|bool} -> false {MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType}), MakeStructTypeFromFields("", FieldMap{"foo": BlobType}), ), MakeStructTypeFromFields("", FieldMap{"foo": MakeUnionType(NumberType, BoolType)}), false}, // map & map -> true { MakeMapType( MakeStructTypeFromFields("", FieldMap{"x": NumberType, "y": NumberType}), MakeStructTypeFromFields("A", FieldMap{"foo": StringType})), MakeMapType( MakeStructTypeFromFields("", FieldMap{"x": NumberType, "y": NumberType}), MakeStructTypeFromFields("A", FieldMap{"foo": StringType, "bar": BoolType})), true, }, // map & map -> true { MakeMapType( MakeStructTypeFromFields("", FieldMap{"x": NumberType, "y": NumberType}), MakeStructTypeFromFields("A", FieldMap{"foo": StringType})), MakeMapType( MakeStructTypeFromFields("", FieldMap{"x": NumberType, "y": NumberType}), MakeStructTypeFromFields("A", FieldMap{"foo": StringType, "bar": BoolType})), true, }, // struct A{self:A} & struct A{self:A, foo:Number} -> true {MakeStructTypeFromFields("A", FieldMap{"self": MakeCycleType("A")}), MakeStructTypeFromFields("A", FieldMap{"self": MakeCycleType("A"), "foo": NumberType}), true}, // struct{b:Bool} & struct{b?:Bool} -> true { MakeStructType("", StructField{"b", BoolType, false}), MakeStructType("", StructField{"b", BoolType, true}), true, }, // struct{a?:Bool} & struct{b?:Bool} -> false { MakeStructType("", StructField{"a", BoolType, true}), MakeStructType("", StructField{"b", BoolType, true}), false, }, // struct A {b: struct {a: Cycle}} & struct {b: Struct A {b: struct {b: Cycle}}} -> false { MakeStructType("A", StructField{"a", MakeStructType("", StructField{"a", MakeCycleType("A"), false}, ), false}, ), MakeStructType("", StructField{"a", MakeStructType("A", StructField{"a", MakeStructType("", StructField{"a", MakeCycleType("A"), false}, ), false}, ), false}, ), true, }, } for i, c := range cases { act := ContainCommonSupertype(c.a, c.b) assert.Equal(t, c.out, act, "Test case at position %d; \n\ta:%s\n\tb:%s", i, c.a.Describe(), c.b.Describe()) } } ================================================ FILE: go/types/compare_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "sort" "testing" "github.com/stretchr/testify/assert" ) var prefix = []byte{0x01, 0x02, 0x03, 0x04} func TestCompareTotalOrdering(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() // values in increasing order. Some of these are compared by ref so changing the serialization might change the ordering. values := []Value{ Bool(false), Bool(true), Number(-10), Number(0), Number(10), String("a"), String("b"), String("c"), // The order of these are done by the hash. NewSet(vrw, Number(0), Number(1), Number(2), Number(3)), BoolType, // Value - values cannot be value // Cycle - values cannot be cycle // Union - values cannot be unions } for i, vi := range values { for j, vj := range values { if i == j { assert.True(vi.Equals(vj)) } else if i < j { x := vi.Less(vj) assert.True(x) } else { x := vi.Less(vj) assert.False(x) } } } } func TestCompareEmpties(t *testing.T) { assert := assert.New(t) comp := opCacheComparer{} assert.Equal(-1, comp.Compare(prefix, append(prefix, 0xff))) assert.Equal(0, comp.Compare(prefix, prefix)) assert.Equal(1, comp.Compare(append(prefix, 0xff), prefix)) } func TestCompareDifferentPrimitiveTypes(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() defer vrw.Close() nums := ValueSlice{Number(1), Number(2), Number(3)} words := ValueSlice{String("k1"), String("v1")} blob := NewBlob(vrw, bytes.NewBuffer([]byte{1, 2, 3})) nList := NewList(vrw, nums...) nMap := NewMap(vrw, words...) nRef := NewRef(blob) nSet := NewSet(vrw, nums...) nStruct := NewStruct("teststruct", map[string]Value{"f1": Number(1)}) vals := ValueSlice{Bool(true), Number(19), String("hellow"), blob, nList, nMap, nRef, nSet, nStruct} sort.Sort(vals) for i, v1 := range vals { for j, v2 := range vals { iBytes := [1024]byte{} jBytes := [1024]byte{} res := compareEncodedKey(encodeGraphKey(iBytes[:0], v1), encodeGraphKey(jBytes[:0], v2)) assert.Equal(compareInts(i, j), res) } } } func TestComparePrimitives(t *testing.T) { assert := assert.New(t) bools := []Bool{false, true} for i, v1 := range bools { for j, v2 := range bools { res := compareEncodedNomsValues(encode(v1), encode(v2)) assert.Equal(compareInts(i, j), res) } } nums := []Number{-1111.29, -23, 0, 4.2345, 298} for i, v1 := range nums { for j, v2 := range nums { res := compareEncodedNomsValues(encode(v1), encode(v2)) assert.Equal(compareInts(i, j), res) } } words := []String{"", "aaa", "another", "another1"} for i, v1 := range words { for j, v2 := range words { res := compareEncodedNomsValues(encode(v1), encode(v2)) assert.Equal(compareInts(i, j), res) } } } func TestCompareEncodedKeys(t *testing.T) { assert := assert.New(t) comp := opCacheComparer{} vrw := newTestValueStore() defer vrw.Close() k1 := ValueSlice{String("one"), Number(3)} k2 := ValueSlice{String("one"), Number(5)} bs1 := [initialBufferSize]byte{} bs2 := [initialBufferSize]byte{} e1, _ := encodeKeys(bs1[:0], 0x01020304, MapKind, k1) e2, _ := encodeKeys(bs2[:0], 0x01020304, MapKind, k2) assert.Equal(-1, comp.Compare(e1, e2)) } func encode(v Value) []byte { w := &binaryNomsWriter{make([]byte, 128, 128), 0} v.writeTo(w) return w.data() } func compareInts(i, j int) (res int) { if i < j { res = -1 } else if i > j { res = 1 } return } ================================================ FILE: go/types/edit_distance.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types // ported from edit-distance.js, itself a port with minor modifications of // https://github.com/Polymer/observe-js/blob/master/src/observe.js#L1309. import ( "fmt" "math" ) const ( DEFAULT_MAX_SPLICE_MATRIX_SIZE = 2e7 SPLICE_UNASSIGNED = math.MaxUint64 UNCHANGED = 0 UPDATED = 1 INSERTED = 2 REMOVED = 3 ) // Read a Splice as "at SpAt (in the previous state), SpRemoved elements were removed and SpAdded // elements were inserted, which can be found starting at SpFrom in the current state" type Splice struct { SpAt uint64 SpRemoved uint64 SpAdded uint64 SpFrom uint64 } type EditDistanceEqualsFn func(prevIndex uint64, currentIndex uint64) bool func (s Splice) String() string { return fmt.Sprintf("[%d, %d, %d, %d]", s.SpAt, s.SpRemoved, s.SpAdded, s.SpFrom) } func uint64Min(a, b uint64) uint64 { if a < b { return a } return b } func uint64Min3(a, b, c uint64) uint64 { if a < b { if a < c { return a } } else { if b < c { return b } } return c } func reverse(numbers []uint64) []uint64 { newNumbers := make([]uint64, len(numbers)) for i := 0; i < len(numbers); i++ { newNumbers[i] = numbers[len(numbers)-i-1] } return newNumbers } func addSplice(splices []Splice, s Splice) []Splice { if s.SpFrom == SPLICE_UNASSIGNED { s.SpFrom = 0 } splices = append(splices, s) return splices } func calcSplices(previousLength uint64, currentLength uint64, maxSpliceMatrixSize uint64, eqFn EditDistanceEqualsFn) []Splice { minLength := uint64Min(previousLength, currentLength) prefixCount := sharedPrefix(eqFn, minLength) suffixCount := sharedSuffix(eqFn, previousLength, currentLength, minLength-prefixCount) previousStart := prefixCount currentStart := prefixCount previousEnd := previousLength - suffixCount currentEnd := currentLength - suffixCount if (currentEnd-currentStart) == 0 && (previousEnd-previousStart) == 0 { return []Splice{} } if currentStart == currentEnd { return []Splice{{previousStart, previousEnd - previousStart, 0, 0}} } else if previousStart == previousEnd { return []Splice{{previousStart, 0, currentEnd - currentStart, currentStart}} } previousLength = previousEnd - previousStart currentLength = currentEnd - currentStart if previousLength*currentLength > maxSpliceMatrixSize { return []Splice{{0, previousLength, currentLength, 0}} } splices := make([]Splice, 0) distances := calcEditDistances(eqFn, previousStart, previousLength, currentStart, currentLength) ops := operationsFromEditDistances(distances) var splice *Splice index := currentStart previousIndex := previousStart for i := 0; i < len(ops); i++ { switch ops[i] { case UNCHANGED: if splice != nil { splices = addSplice(splices, *splice) splice = nil } index++ previousIndex++ break case UPDATED: if splice == nil { splice = &Splice{index, 0, 0, SPLICE_UNASSIGNED} } if splice.SpFrom == SPLICE_UNASSIGNED { splice.SpFrom = previousIndex } splice.SpRemoved++ splice.SpAdded++ index++ previousIndex++ break case INSERTED: if splice == nil { splice = &Splice{index, 0, 0, SPLICE_UNASSIGNED} } splice.SpAdded++ if splice.SpFrom == SPLICE_UNASSIGNED { splice.SpFrom = previousIndex } previousIndex++ break case REMOVED: if splice == nil { splice = &Splice{index, 0, 0, SPLICE_UNASSIGNED} } splice.SpRemoved++ index++ break } } if splice != nil { splices = addSplice(splices, *splice) } return splices } func calcEditDistances(eqFn EditDistanceEqualsFn, previousStart uint64, previousLen uint64, currentStart uint64, currentLen uint64) [][]uint64 { // "Deletion" columns rowCount := previousLen + 1 columnCount := currentLen + 1 // see https://golang.org/doc/effective_go.html#two_dimensional_slices for below allocation optimization distances := make([][]uint64, rowCount) distance := make([]uint64, rowCount*columnCount) for i := range distances { distances[i], distance = distance[:columnCount], distance[columnCount:] } // "Addition" rows. Initialize null column. for i := uint64(0); i < rowCount; i++ { distances[i][0] = i } // Initialize null row for j := uint64(0); j < columnCount; j++ { distances[0][j] = j } for i := uint64(1); i < rowCount; i++ { for j := uint64(1); j < columnCount; j++ { if eqFn(previousStart+i-1, currentStart+j-1) { distances[i][j] = distances[i-1][j-1] } else { north := distances[i-1][j] + 1 west := distances[i][j-1] + 1 distances[i][j] = uint64Min(north, west) } } } return distances } func operationsFromEditDistances(distances [][]uint64) []uint64 { i := len(distances) - 1 j := len(distances[0]) - 1 current := distances[i][j] edits := make([]uint64, 0) for i > 0 || j > 0 { if i == 0 { edits = append(edits, INSERTED) j-- continue } if j == 0 { edits = append(edits, REMOVED) i-- continue } northWest := distances[i-1][j-1] west := distances[i-1][j] north := distances[i][j-1] minValue := uint64Min3(west, north, northWest) if minValue == northWest { if northWest == current { edits = append(edits, UNCHANGED) } else { edits = append(edits, UPDATED) current = northWest } i-- j-- } else if minValue == west { edits = append(edits, REMOVED) i-- current = west } else { edits = append(edits, INSERTED) j-- current = north } } return reverse(edits) } func sharedPrefix(eqFn EditDistanceEqualsFn, searchLength uint64) uint64 { for i := uint64(0); i < searchLength; i++ { if !eqFn(i, i) { return i } } return searchLength } func sharedSuffix(eqFn EditDistanceEqualsFn, previousLength uint64, currentLength uint64, searchLength uint64) uint64 { count := uint64(0) previousLength-- currentLength-- for count < searchLength && eqFn(previousLength, currentLength) { count++ previousLength-- currentLength-- } return count } ================================================ FILE: go/types/edit_distance_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func assertDiff(assert *assert.Assertions, last []uint64, current []uint64, expect []Splice) { actual := calcSplices(uint64(len(last)), uint64(len(current)), DEFAULT_MAX_SPLICE_MATRIX_SIZE, func(i uint64, j uint64) bool { return last[i] == current[j] }) assert.Equal(expect, actual, "splices are different: \nexpect: %v\nactual: %v\n", expect, actual) } func TestEditDistanceAppend(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2}, []uint64{0, 1, 2, 3, 4, 5}, []Splice{{3, 0, 3, 3}}, ) } func TestEditDistancePrepend(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{3, 4, 5, 6}, []uint64{0, 1, 2, 3, 4, 5, 6}, []Splice{{0, 0, 3, 0}}, ) } func TestEditDistanceChopFromEnd(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5}, []uint64{0, 1, 2}, []Splice{{3, 3, 0, 0}}, ) } func TestEditDistanceChopFromStart(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5}, []uint64{3, 4, 5}, []Splice{{0, 3, 0, 0}}, ) } func TestEditDistanceChopFromMiddle(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5}, []uint64{0, 5}, []Splice{{1, 4, 0, 0}}, ) } func TestEditDistanceA(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8}, []uint64{0, 1, 2, 4, 5, 6, 8}, []Splice{ {3, 1, 0, 0}, {7, 1, 0, 0}, }, ) } func TestEditDistanceRemoveABunch(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{1, 2, 4, 5, 7, 8, 10}, []Splice{ {0, 1, 0, 0}, {3, 1, 0, 0}, {6, 1, 0, 0}, {9, 1, 0, 0}, }, ) } func TestEditDistanceAddABunch(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{0, 'a', 1, 2, 3, 'b', 'c', 'd', 4, 5, 6, 7, 'e', 8, 9, 'f', 10, 'g'}, []Splice{ {1, 0, 1, 1}, {4, 0, 3, 5}, {8, 0, 1, 12}, {10, 0, 1, 15}, {11, 0, 1, 17}, }, ) } func TestEditDistanceUpdateABunch(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{'a', 1, 2, 'b', 'c', 'd', 6, 7, 'e', 9, 10}, []Splice{ {0, 1, 1, 0}, {3, 3, 3, 3}, {8, 1, 1, 8}, }, ) } func TestEditDistanceLeftOverlap(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{0, 1, 2, 3, 'a', 'b', 8, 9, 10}, []Splice{ {4, 4, 2, 4}, }, ) } func TestEditDistanceRightOverlap(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{0, 1, 2, 3, 4, 5, 'a', 'b', 10}, []Splice{ {6, 4, 2, 6}, }, ) } func TestEditDistanceWithin(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{0, 1, 2, 3, 'a', 'b', 10}, []Splice{ {4, 6, 2, 4}, }, ) } func TestEditDistanceWithout(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{0, 1, 2, 3, 4, 5, 'a', 'b', 'c', 'd', 8, 9, 10}, []Splice{ {6, 2, 4, 6}, }, ) } func TestEditDistanceMix1(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []uint64{0, 'a', 1, 'b', 3, 'c', 4, 6, 7, 'e', 'f', 10}, []Splice{ {1, 0, 1, 1}, {2, 1, 1, 3}, {4, 0, 1, 5}, {5, 1, 0, 0}, {8, 2, 2, 9}, }, ) } func TestEditDistanceReverse(t *testing.T) { t.Parallel() assert := assert.New(t) assertDiff(assert, []uint64{0, 1, 2, 3, 4, 5, 6, 7}, []uint64{7, 6, 5, 4, 3, 2, 1, 0}, []Splice{ {0, 3, 4, 0}, {4, 4, 3, 5}, }, ) } ================================================ FILE: go/types/encode_human_readable.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "fmt" "io" "strconv" "sync" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/writers" humanize "github.com/dustin/go-humanize" ) // Clients can register a 'commenter' to return a comment that will get appended // to the first line of encoded values. For example, the noms DateTime struct // normally gets encoded as follows: // lastRefresh: DateTime { // secSinceEpoch: 1.501801626877e+09, // } // // By registering a commenter that returns a nicely formatted date, // the struct will be coded with a comment: // lastRefresh: DateTime { // 2017-08-03T16:07:06-07:00 // secSinceEpoch: 1.501801626877e+09, // } // Function type for commenter functions type HRSCommenter interface { Comment(Value) string } var ( commenterRegistry = map[string]map[string]HRSCommenter{} registryLock sync.RWMutex ) // RegisterHRSCommenter is called to with three arguments: // typename: the name of the struct this function will be applied to // unique: an arbitrary string to differentiate functions that should be applied // to different structs that have the same name (e.g. two implementations of // the "Employee" type. // commenter: an interface with a 'Comment()' function that gets called for all // Values with this name. The function should verify the type of the Value // and, if appropriate, return a non-empty string to be appended as the comment func RegisterHRSCommenter(typename, unique string, commenter HRSCommenter) { registryLock.Lock() defer registryLock.Unlock() commenters := commenterRegistry[typename] if commenters == nil { commenters = map[string]HRSCommenter{} commenterRegistry[typename] = commenters } commenters[unique] = commenter } // UnregisterHRSCommenter will remove a commenter function for a specified // typename/unique string combination. func UnregisterHRSCommenter(typename, unique string) { registryLock.Lock() defer registryLock.Unlock() r := commenterRegistry[typename] if r == nil { return } delete(r, unique) } // GetHRSCommenters the map of 'unique' strings to HRSCommentFunc for // a specified typename. func GetHRSCommenters(typename string) []HRSCommenter { registryLock.RLock() defer registryLock.RUnlock() // need to copy this value so we can release the lock commenters := []HRSCommenter{} for _, f := range commenterRegistry[typename] { commenters = append(commenters, f) } return commenters } // Human Readable Serialization type hrsWriter struct { ind int w io.Writer lineLength int floatFormat byte err error } func (w *hrsWriter) maybeWriteIndentation() { if w.lineLength == 0 { for i := 0; i < w.ind && w.err == nil; i++ { _, w.err = io.WriteString(w.w, " ") } w.lineLength = 2 * w.ind } } func (w *hrsWriter) write(s string) { if w.err != nil { return } w.maybeWriteIndentation() var n int n, w.err = io.WriteString(w.w, s) w.lineLength += n } func (w *hrsWriter) indent() { w.ind++ } func (w *hrsWriter) outdent() { w.ind-- } func (w *hrsWriter) newLine() { w.write("\n") w.lineLength = 0 } // hexWriter is used to write blob byte data as "00 01 ... 0f\n10 11 .." // hexWriter is an io.Writer that writes to an underlying hrsWriter. type hexWriter struct { hrs *hrsWriter count uint sizeWritten bool size uint64 } func (w *hexWriter) Write(p []byte) (n int, err error) { for _, v := range p { if !w.sizeWritten && len(p) > 16 { w.hrs.write(" // ") w.hrs.write(humanize.Bytes(w.size)) w.sizeWritten = true w.hrs.indent() w.hrs.newLine() } if w.count == 16 { w.hrs.newLine() w.count = 0 } else if w.count != 0 { w.hrs.write(" ") } if v < 0x10 { w.hrs.write("0") } w.hrs.write(strconv.FormatUint(uint64(v), 16)) if w.hrs.err != nil { err = w.hrs.err return } n++ w.count++ } if w.sizeWritten { w.hrs.outdent() w.hrs.newLine() } return } func (w *hrsWriter) Write(v Value) { switch v.Kind() { case BoolKind: w.write(strconv.FormatBool(bool(v.(Bool)))) case NumberKind: w.write(strconv.FormatFloat(float64(v.(Number)), w.floatFormat, -1, 64)) case StringKind: w.write(strconv.Quote(string(v.(String)))) case BlobKind: w.write("blob {") blob := v.(Blob) encoder := &hexWriter{hrs: w, size: blob.Len()} _, w.err = io.Copy(encoder, blob.Reader()) w.write("}") case ListKind: w.write("[") w.writeSize(v) w.indent() v.(List).Iter(func(v Value, i uint64) bool { if i == 0 { w.newLine() } w.Write(v) w.write(",") w.newLine() return w.err != nil }) w.outdent() w.write("]") case MapKind: w.write("map {") w.writeSize(v) w.indent() if !v.(Map).Empty() { w.newLine() } v.(Map).Iter(func(key, val Value) bool { w.Write(key) w.write(": ") w.Write(val) w.write(",") w.newLine() return w.err != nil }) w.outdent() w.write("}") case RefKind: w.write("#") w.write(v.(Ref).TargetHash().String()) case SetKind: w.write("set {") w.writeSize(v) w.indent() if !v.(Set).Empty() { w.newLine() } v.(Set).Iter(func(v Value) bool { w.Write(v) w.write(",") w.newLine() return w.err != nil }) w.outdent() w.write("}") case TypeKind: w.writeType(v.(*Type), map[*Type]struct{}{}) case StructKind: w.writeStruct(v.(Struct)) default: panic("unreachable") } } type hrsStructWriter struct { *hrsWriter v Struct } func (w hrsStructWriter) name(n string) { w.write("struct ") if n != "" { w.write(n) w.write(" ") } w.write("{") commenters := GetHRSCommenters(n) for _, commenter := range commenters { if comment := commenter.Comment(w.v); comment != "" { w.write(" // " + comment) break } } w.indent() } func (w hrsStructWriter) count(c uint64) { if c > 0 { w.newLine() } } func (w hrsStructWriter) fieldName(n string) { w.write(n) w.write(": ") } func (w hrsStructWriter) fieldValue(v Value) { w.Write(v) w.write(",") w.newLine() } func (w hrsStructWriter) end() { w.outdent() w.write("}") } func (w *hrsWriter) writeStruct(v Struct) { v.iterParts(hrsStructWriter{w, v}) } func (w *hrsWriter) writeSize(v Value) { switch v.Kind() { case ListKind, MapKind, SetKind: l := v.(Collection).Len() if l < 4 { return } w.write(fmt.Sprintf(" // %s items", humanize.Comma(int64(l)))) default: panic("unreachable") } } func (w *hrsWriter) writeType(t *Type, seenStructs map[*Type]struct{}) { switch t.TargetKind() { case BlobKind, BoolKind, NumberKind, StringKind, TypeKind, ValueKind: w.write(t.TargetKind().String()) case ListKind, RefKind, SetKind, MapKind: w.write(t.TargetKind().String()) w.write("<") for i, et := range t.Desc.(CompoundDesc).ElemTypes { if et.TargetKind() == UnionKind && len(et.Desc.(CompoundDesc).ElemTypes) == 0 { // If one of the element types is an empty union all the other element types must // also be empty union types. break } if i != 0 { w.write(", ") } w.writeType(et, seenStructs) if w.err != nil { break } } w.write(">") case UnionKind: for i, et := range t.Desc.(CompoundDesc).ElemTypes { if i != 0 { w.write(" | ") } w.writeType(et, seenStructs) if w.err != nil { break } } case StructKind: w.writeStructType(t, seenStructs) case CycleKind: name := string(t.Desc.(CycleDesc)) d.PanicIfTrue(name == "") // This can happen for types that have unresolved cyclic refs w.write(fmt.Sprintf("UnresolvedCycle<%s>", name)) if w.err != nil { return } default: panic("unreachable") } } func (w *hrsWriter) writeStructType(t *Type, seenStructs map[*Type]struct{}) { name := t.Desc.(StructDesc).Name if _, ok := seenStructs[t]; ok { w.write(fmt.Sprintf("Cycle<%s>", name)) return } seenStructs[t] = struct{}{} desc := t.Desc.(StructDesc) w.write("Struct ") if desc.Name != "" { w.write(desc.Name + " ") } w.write("{") w.indent() if desc.Len() > 0 { w.newLine() } desc.IterFields(func(name string, t *Type, optional bool) { w.write(name) if optional { w.write("?") } w.write(": ") w.writeType(t, seenStructs) w.write(",") w.newLine() }) w.outdent() w.write("}") } func encodedValueFormatMaxLines(v Value, floatFormat byte, maxLines uint32) string { var buf bytes.Buffer mlw := &writers.MaxLineWriter{Dest: &buf, MaxLines: maxLines} w := &hrsWriter{w: mlw, floatFormat: floatFormat} w.Write(v) if w.err != nil { d.Chk.IsType(writers.MaxLinesError{}, w.err, "Unexpected error: %s", w.err) } return buf.String() } func encodedValueFormat(v Value, floatFormat byte) string { var buf bytes.Buffer w := &hrsWriter{w: &buf, floatFormat: floatFormat} w.Write(v) d.Chk.NoError(w.err) return buf.String() } func EncodedIndexValue(v Value) string { return encodedValueFormat(v, 'f') } // EncodedValue returns a string containing the serialization of a value. func EncodedValue(v Value) string { return encodedValueFormat(v, 'g') } // EncodedValueMaxLines returns a string containing the serialization of a value. // The string is truncated at |maxLines|. func EncodedValueMaxLines(v Value, maxLines uint32) string { return encodedValueFormatMaxLines(v, 'g', maxLines) } // WriteEncodedValue writes the serialization of a value func WriteEncodedValue(w io.Writer, v Value) error { hrs := &hrsWriter{w: w, floatFormat: 'g'} hrs.Write(v) return hrs.err } // WriteEncodedValueMaxLines writes the serialization of a value. Writing will be // stopped and an error returned after |maxLines|. func WriteEncodedValueMaxLines(w io.Writer, v Value, maxLines uint32) error { mlw := &writers.MaxLineWriter{Dest: w, MaxLines: maxLines} hrs := &hrsWriter{w: mlw, floatFormat: 'g'} hrs.Write(v) return hrs.err } ================================================ FILE: go/types/encode_human_readable_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "errors" "strings" "testing" "github.com/attic-labs/noms/go/util/test" "github.com/stretchr/testify/assert" ) func assertWriteHRSEqual(t *testing.T, expected string, v Value) { assert := assert.New(t) var buf bytes.Buffer w := &hrsWriter{w: &buf, floatFormat: 'g'} w.Write(v) assert.Equal(test.RemoveHashes(expected), test.RemoveHashes(buf.String())) } func TestWriteHumanReadablePrimitiveValues(t *testing.T) { assertWriteHRSEqual(t, "true", Bool(true)) assertWriteHRSEqual(t, "false", Bool(false)) assertWriteHRSEqual(t, "0", Number(0)) assertWriteHRSEqual(t, "42", Number(42)) assertWriteHRSEqual(t, "-42", Number(-42)) assertWriteHRSEqual(t, "3.1415926535", Number(3.1415926535)) assertWriteHRSEqual(t, "314159.26535", Number(3.1415926535e5)) assertWriteHRSEqual(t, "3.1415926535e+20", Number(3.1415926535e20)) assertWriteHRSEqual(t, `"abc"`, String("abc")) assertWriteHRSEqual(t, `" "`, String(" ")) assertWriteHRSEqual(t, `"\t"`, String("\t")) assertWriteHRSEqual(t, `"\t"`, String(" ")) assertWriteHRSEqual(t, `"\n"`, String("\n")) assertWriteHRSEqual(t, `"\n"`, String(` `)) assertWriteHRSEqual(t, `"\r"`, String("\r")) assertWriteHRSEqual(t, `"\r\n"`, String("\r\n")) assertWriteHRSEqual(t, `"\xff"`, String("\xff")) assertWriteHRSEqual(t, `"💩"`, String("\xf0\x9f\x92\xa9")) assertWriteHRSEqual(t, `"💩"`, String("💩")) assertWriteHRSEqual(t, `"\a"`, String("\007")) assertWriteHRSEqual(t, `"☺"`, String("\u263a")) } func TestWriteHumanReadableRef(t *testing.T) { vs := newTestValueStore() x := Number(42) rv := vs.WriteValue(x) assertWriteHRSEqual(t, "#0123456789abcdefghijklmnopqrstuv", rv) } func TestWriteHumanReadableCollections(t *testing.T) { vrw := newTestValueStore() l := NewList(vrw, Number(0), Number(1), Number(2), Number(3)) assertWriteHRSEqual(t, "[ // 4 items\n 0,\n 1,\n 2,\n 3,\n]", l) s := NewSet(vrw, Number(0), Number(1), Number(2), Number(3)) assertWriteHRSEqual(t, "set { // 4 items\n 0,\n 1,\n 2,\n 3,\n}", s) m := NewMap(vrw, Number(0), Bool(false), Number(1), Bool(true)) assertWriteHRSEqual(t, "map {\n 0: false,\n 1: true,\n}", m) l2 := NewList(vrw) assertWriteHRSEqual(t, "[]", l2) l3 := NewList(vrw, Number(0)) assertWriteHRSEqual(t, "[\n 0,\n]", l3) nums := make([]Value, 2000) for i := range nums { nums[i] = Number(0) } l4 := NewList(vrw, nums...) assertWriteHRSEqual(t, "[ // 2,000 items\n"+strings.Repeat(" 0,\n", 2000)+"]", l4) } func TestWriteHumanReadableNested(t *testing.T) { vrw := newTestValueStore() l := NewList(vrw, Number(0), Number(1)) l2 := NewList(vrw, Number(2), Number(3)) s := NewSet(vrw, String("a"), String("b")) s2 := NewSet(vrw, String("c"), String("d")) m := NewMap(vrw, s, l, s2, l2) assertWriteHRSEqual(t, `map { set { "c", "d", }: [ 2, 3, ], set { "a", "b", }: [ 0, 1, ], }`, m) } func TestWriteHumanReadableStruct(t *testing.T) { str := NewStruct("S1", StructData{ "x": Number(1), "y": Number(2), }) assertWriteHRSEqual(t, "struct S1 {\n x: 1,\n y: 2,\n}", str) } func TestWriteHumanReadableListOfStruct(t *testing.T) { vrw := newTestValueStore() str1 := NewStruct("S3", StructData{ "x": Number(1), }) str2 := NewStruct("S3", StructData{ "x": Number(2), }) str3 := NewStruct("S3", StructData{ "x": Number(3), }) l := NewList(vrw, str1, str2, str3) assertWriteHRSEqual(t, `[ struct S3 { x: 1, }, struct S3 { x: 2, }, struct S3 { x: 3, }, ]`, l) } func TestWriteHumanReadableBlob(t *testing.T) { vrw := newTestValueStore() assertWriteHRSEqual(t, "blob {}", NewEmptyBlob(vrw)) b1 := NewBlob(vrw, bytes.NewBuffer([]byte{0x01})) assertWriteHRSEqual(t, "blob {01}", b1) b2 := NewBlob(vrw, bytes.NewBuffer([]byte{0x01, 0x02})) assertWriteHRSEqual(t, "blob {01 02}", b2) b3 := NewBlob(vrw, bytes.NewBuffer([]byte{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, })) assertWriteHRSEqual(t, "blob {00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f}", b3) b4 := NewBlob(vrw, bytes.NewBuffer([]byte{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, })) assertWriteHRSEqual(t, "blob { // 17 B\n 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f\n 10\n}", b4) bs := make([]byte, 256) for i := range bs { bs[i] = byte(i) } b5 := NewBlob(vrw, bytes.NewBuffer(bs)) assertWriteHRSEqual(t, "blob { // 256 B\n 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f\n 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f\n 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f\n 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f\n 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f\n 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f\n 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f\n 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f\n 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f\n 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f\n a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af\n b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf\n c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf\n d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df\n e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef\n f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff\n}", b5) b6 := NewBlob(vrw, bytes.NewBuffer(make([]byte, 16*100))) row := " " + strings.Repeat("00 ", 15) + "00\n" s := strings.Repeat(row, 100) assertWriteHRSEqual(t, "blob { // 1.6 kB\n"+s+"}", b6) } func TestWriteHumanReadableListOfBlob(t *testing.T) { vrw := newTestValueStore() b1 := NewBlob(vrw, bytes.NewBuffer([]byte{0x01})) b2 := NewBlob(vrw, bytes.NewBuffer([]byte{0x02})) b3 := NewBlob(vrw, bytes.NewBuffer([]byte{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, })) l := NewList(vrw, b1, NewEmptyBlob(vrw), b2, b3) assertWriteHRSEqual(t, "[ // 4 items\n blob {01},\n blob {},\n blob {02},\n blob { // 17 B\n 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f\n 10\n },\n]", l) } func TestWriteHumanReadableType(t *testing.T) { assertWriteHRSEqual(t, "Bool", BoolType) assertWriteHRSEqual(t, "Blob", BlobType) assertWriteHRSEqual(t, "String", StringType) assertWriteHRSEqual(t, "Number", NumberType) assertWriteHRSEqual(t, "List", MakeListType(NumberType)) assertWriteHRSEqual(t, "Set", MakeSetType(NumberType)) assertWriteHRSEqual(t, "Ref", MakeRefType(NumberType)) assertWriteHRSEqual(t, "Map", MakeMapType(NumberType, StringType)) assertWriteHRSEqual(t, "Number | String", MakeUnionType(NumberType, StringType)) assertWriteHRSEqual(t, "Bool", MakeUnionType(BoolType)) assertWriteHRSEqual(t, "", MakeUnionType()) assertWriteHRSEqual(t, "List", MakeListType(MakeUnionType(NumberType, StringType))) assertWriteHRSEqual(t, "List<>", MakeListType(MakeUnionType())) } func TestRecursiveStruct(t *testing.T) { // struct A { // b: A // c: List // d: struct D { // e: D // f: A // } // } a := MakeStructType("A", StructField{"b", MakeCycleType("A"), false}, StructField{"c", MakeListType(MakeCycleType("A")), false}, StructField{"d", MakeStructType("D", StructField{"e", MakeCycleType("D"), false}, StructField{"f", MakeCycleType("A"), false}, ), false}, ) assertWriteHRSEqual(t, `Struct A { b: Cycle, c: List>, d: Struct D { e: Cycle, f: Cycle, }, }`, a) d, _ := a.Desc.(StructDesc).Field("d") assertWriteHRSEqual(t, `Struct D { e: Cycle, f: Struct A { b: Cycle, c: List>, d: Cycle, }, }`, d) } func TestUnresolvedRecursiveStruct(t *testing.T) { // struct A { // a: A // b: Cycle<1> (unresolved) // } a := MakeStructType("A", StructField{"a", MakeCycleType("A"), false}, StructField{"b", MakeCycleType("X"), false}, ) assertWriteHRSEqual(t, `Struct A { a: Cycle, b: UnresolvedCycle, }`, a) } type errorWriter struct { err error } func (w *errorWriter) Write(p []byte) (int, error) { return 0, w.err } func TestWriteHumanReadableWriterError(t *testing.T) { assert := assert.New(t) err := errors.New("test") w := &errorWriter{err} assert.Equal(err, WriteEncodedValue(w, Number(42))) } func TestEmptyCollections(t *testing.T) { vrw := newTestValueStore() a := MakeStructType("Nothing") assertWriteHRSEqual(t, "Struct Nothing {}", a) b := NewStruct("Rien", StructData{}) assertWriteHRSEqual(t, "struct Rien {}", b) c := MakeMapType(BlobType, NumberType) assertWriteHRSEqual(t, "Map", c) d := NewMap(vrw) assertWriteHRSEqual(t, "map {}", d) e := MakeSetType(StringType) assertWriteHRSEqual(t, "Set", e) f := NewSet(vrw) assertWriteHRSEqual(t, "set {}", f) } func TestEncodedValueMaxLines(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() l1 := NewList(vrw, generateNumbersAsValues(11)...) expected := strings.Join(strings.SplitAfterN(EncodedValue(l1), "\n", 6)[:5], "") assert.Equal(expected, EncodedValueMaxLines(l1, 5)) buf := bytes.Buffer{} WriteEncodedValueMaxLines(&buf, l1, 5) assert.Equal(expected, buf.String()) } func TestWriteHumanReadableStructOptionalFields(t *testing.T) { typ := MakeStructType("S1", StructField{"a", BoolType, false}, StructField{"b", BoolType, true}) assertWriteHRSEqual(t, "Struct S1 {\n a: Bool,\n b?: Bool,\n}", typ) } type TestCommenter struct { prefix string testType *Type } func (c TestCommenter) Comment(v Value) string { if !(v.typeOf().Equals(c.testType)) { return "" } return c.prefix + string(v.(Struct).Get("Name").(String)) } func TestRegisterCommenter(t *testing.T) { a := assert.New(t) tt := NewStruct("TestType1", StructData{"Name": String("abc-123")}) nt := NewStruct("TestType2", StructData{"Name": String("abc-123")}) RegisterHRSCommenter("TestType1", "mylib1", TestCommenter{prefix: "MyTest: ", testType: tt.typeOf()}) s1 := EncodedValue(tt) a.True(strings.Contains(s1, "// MyTest: abc-123")) s1 = EncodedValue(nt) a.False(strings.Contains(s1, "// MyTest: abc-123")) RegisterHRSCommenter("TestType1", "mylib1", TestCommenter{prefix: "MyTest2: ", testType: tt.typeOf()}) s1 = EncodedValue(tt) a.True(strings.Contains(s1, "// MyTest2: abc-123")) UnregisterHRSCommenter("TestType1", "mylib1") s1 = EncodedValue(tt) a.False(strings.Contains(s1, "// MyTest2: abc-123")) } ================================================ FILE: go/types/encoding_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "math" "strconv" "strings" "testing" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func toBinaryNomsReaderData(data []interface{}) []byte { w := newBinaryNomsWriter() for _, v := range data { switch v := v.(type) { case uint8: w.writeUint8(v) case string: w.writeString(v) case Number: w.writeNumber(v) case uint64: w.writeCount(v) case bool: w.writeBool(v) case hash.Hash: w.writeHash(v) case []byte: w.writeCount(uint64(len(v))) w.writeBytes(v) case NomsKind: w.writeUint8(uint8(v)) default: panic("unreachable") } } return w.data() } func assertEncoding(t *testing.T, expect []interface{}, v Value) { expectedAsByteSlice := toBinaryNomsReaderData(expect) vs := newTestValueStore() w := newBinaryNomsWriter() v.writeTo(&w) assert.EqualValues(t, expectedAsByteSlice, w.data()) dec := newValueDecoder(expectedAsByteSlice, vs) v2 := dec.readValue() assert.True(t, v.Equals(v2)) } func TestRoundTrips(t *testing.T) { vs := newTestValueStore() assertRoundTrips := func(v Value) { out := DecodeValue(EncodeValue(v), vs) assert.True(t, v.Equals(out)) } assertRoundTrips(Bool(false)) assertRoundTrips(Bool(true)) assertRoundTrips(Number(0)) assertRoundTrips(Number(-0)) assertRoundTrips(Number(math.Copysign(0, -1))) intTest := []int64{1, 2, 3, 7, 15, 16, 17, 127, 128, 129, 254, 255, 256, 257, 1023, 1024, 1025, 2048, 4096, 8192, 32767, 32768, 65535, 65536, 4294967295, 4294967296, 9223372036854779, 92233720368547760, } for _, v := range intTest { f := float64(v) assertRoundTrips(Number(f)) f = math.Copysign(f, -1) assertRoundTrips(Number(f)) } floatTest := []float64{1.01, 1.001, 1.0001, 1.00001, 1.000001, 100.01, 1000.000001, 122.411912027329, 0.42} for _, f := range floatTest { assertRoundTrips(Number(f)) f = math.Copysign(f, -1) assertRoundTrips(Number(f)) } // JS Number.MAX_SAFE_INTEGER assertRoundTrips(Number(9007199254740991)) // JS Number.MIN_SAFE_INTEGER assertRoundTrips(Number(-9007199254740991)) assertRoundTrips(Number(math.MaxFloat64)) assertRoundTrips(Number(math.Nextafter(1, 2) - 1)) assertRoundTrips(String("")) assertRoundTrips(String("foo")) assertRoundTrips(String("AINT NO THANG")) assertRoundTrips(String("💩")) assertRoundTrips(NewStruct("", StructData{"a": Bool(true), "b": String("foo"), "c": Number(2.3)})) listLeaf := newList(newListLeafSequence(vs, Number(4), Number(5), Number(6), Number(7))) assertRoundTrips(listLeaf) assertRoundTrips(newList(newListMetaSequence(1, []metaTuple{ newMetaTuple(NewRef(listLeaf), orderedKeyFromInt(10), 10), newMetaTuple(NewRef(listLeaf), orderedKeyFromInt(20), 20), }, vs))) } func TestNonFiniteNumbers(tt *testing.T) { t := func(f float64, s string) { v := Number(f) err := d.Try(func() { EncodeValue(v) }) assert.Error(tt, err) assert.Contains(tt, err.Error(), s) } t(math.NaN(), "NaN is not a supported number") t(math.Inf(1), "+Inf is not a supported number") t(math.Inf(-1), "-Inf is not a supported number") } func TestWritePrimitives(t *testing.T) { assertEncoding(t, []interface{}{ BoolKind, true, }, Bool(true)) assertEncoding(t, []interface{}{ BoolKind, false, }, Bool(false)) assertEncoding(t, []interface{}{ NumberKind, Number(0), }, Number(0)) assertEncoding(t, []interface{}{ NumberKind, Number(1000000000000000000), }, Number(1e18)) assertEncoding(t, []interface{}{ NumberKind, Number(10000000000000000000), }, Number(1e19)) assertEncoding(t, []interface{}{ NumberKind, Number(1e+20), }, Number(1e20)) assertEncoding(t, []interface{}{ StringKind, "hi", }, String("hi")) } func TestWriteSimpleBlob(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ BlobKind, uint64(0), []byte{0x00, 0x01}, }, NewBlob(vrw, bytes.NewBuffer([]byte{0x00, 0x01})), ) } func TestWriteList(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(4) /* len */, NumberKind, Number(0), NumberKind, Number(1), NumberKind, Number(2), NumberKind, Number(3), }, NewList(vrw, Number(0), Number(1), Number(2), Number(3)), ) } func TestWriteListOfList(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(2), // len ListKind, uint64(0), uint64(1) /* len */, NumberKind, Number(0), ListKind, uint64(0), uint64(3) /* len */, NumberKind, Number(1), NumberKind, Number(2), NumberKind, Number(3), }, NewList(vrw, NewList(vrw, Number(0)), NewList(vrw, Number(1), Number(2), Number(3))), ) } func TestWriteSet(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ SetKind, uint64(0), uint64(4), /* len */ NumberKind, Number(0), NumberKind, Number(1), NumberKind, Number(2), NumberKind, Number(3), }, NewSet(vrw, Number(3), Number(1), Number(2), Number(0)), ) } func TestWriteSetOfSet(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ SetKind, uint64(0), uint64(2), // len SetKind, uint64(0), uint64(3) /* len */, NumberKind, Number(1), NumberKind, Number(2), NumberKind, Number(3), SetKind, uint64(0), uint64(1) /* len */, NumberKind, Number(0), }, NewSet(vrw, NewSet(vrw, Number(0)), NewSet(vrw, Number(1), Number(2), Number(3))), ) } func TestWriteMap(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ MapKind, uint64(0), uint64(2), /* len */ StringKind, "a", BoolKind, false, StringKind, "b", BoolKind, true, }, NewMap(vrw, String("a"), Bool(false), String("b"), Bool(true)), ) } func TestWriteMapOfMap(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ MapKind, uint64(0), uint64(1), // len MapKind, uint64(0), uint64(1) /* len */, StringKind, "a", NumberKind, Number(0), SetKind, uint64(0), uint64(1) /* len */, BoolKind, true, }, NewMap(vrw, NewMap(vrw, String("a"), Number(0)), NewSet(vrw, Bool(true))), ) } func TestWriteCompoundBlob(t *testing.T) { r1 := hash.Parse("00000000000000000000000000000001") r2 := hash.Parse("00000000000000000000000000000002") r3 := hash.Parse("00000000000000000000000000000003") assertEncoding(t, []interface{}{ BlobKind, uint64(1), uint64(3), // len RefKind, r1, BlobKind, uint64(11), NumberKind, Number(20), uint64(20), RefKind, r2, BlobKind, uint64(22), NumberKind, Number(40), uint64(40), RefKind, r3, BlobKind, uint64(33), NumberKind, Number(60), uint64(60), }, newBlob(newBlobMetaSequence(1, []metaTuple{ newMetaTuple(constructRef(r1, BlobType, 11), orderedKeyFromInt(20), 20), newMetaTuple(constructRef(r2, BlobType, 22), orderedKeyFromInt(40), 40), newMetaTuple(constructRef(r3, BlobType, 33), orderedKeyFromInt(60), 60), }, newTestValueStore())), ) } func TestWriteEmptyStruct(t *testing.T) { assertEncoding(t, []interface{}{ StructKind, "S", uint64(0), /* len */ }, NewStruct("S", nil), ) } func TestWriteStruct(t *testing.T) { assertEncoding(t, []interface{}{ StructKind, "S", uint64(2), /* len */ "b", BoolKind, true, "x", NumberKind, Number(42), }, NewStruct("S", StructData{"x": Number(42), "b": Bool(true)}), ) } func TestWriteStructTooMuchData(t *testing.T) { s := NewStruct("S", StructData{"x": Number(42), "b": Bool(true)}) c := EncodeValue(s) data := c.Data() buff := make([]byte, len(data)+1) copy(buff, data) buff[len(data)] = 5 // Add a bogus extrabyte assert.Panics(t, func() { DecodeFromBytes(buff, nil) }) } func TestWriteStructWithList(t *testing.T) { vrw := newTestValueStore() // struct S {l: List}({l: ["a", "b"]}) assertEncoding(t, []interface{}{ StructKind, "S", uint64(1), /* len */ "l", ListKind, uint64(0), uint64(2) /* len */, StringKind, "a", StringKind, "b", }, NewStruct("S", StructData{"l": NewList(vrw, String("a"), String("b"))}), ) // struct S {l: List<>}({l: []}) assertEncoding(t, []interface{}{ StructKind, "S", uint64(1), /* len */ "l", ListKind, uint64(0), uint64(0), /* len */ }, NewStruct("S", StructData{"l": NewList(vrw)}), ) } func TestWriteStructWithStruct(t *testing.T) { // struct S2 { // x: Number // } // struct S { // s: S2 // } assertEncoding(t, []interface{}{ StructKind, "S", uint64(1), // len "s", StructKind, "S2", uint64(1), /* len */ "x", NumberKind, Number(42), }, // {s: {x: 42}} NewStruct("S", StructData{"s": NewStruct("S2", StructData{"x": Number(42)})}), ) } func TestWriteStructWithBlob(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ StructKind, "S", uint64(1), /* len */ "b", BlobKind, uint64(0), []byte{0x00, 0x01}, }, NewStruct("S", StructData{"b": NewBlob(vrw, bytes.NewBuffer([]byte{0x00, 0x01}))}), ) } func TestWriteCompoundList(t *testing.T) { vrw := newTestValueStore() list1 := newList(newListLeafSequence(vrw, Number(0))) list2 := newList(newListLeafSequence(vrw, Number(1), Number(2), Number(3))) assertEncoding(t, []interface{}{ ListKind, uint64(1), uint64(2), // len, RefKind, list1.Hash(), ListKind, NumberKind, uint64(1), NumberKind, Number(1), uint64(1), RefKind, list2.Hash(), ListKind, NumberKind, uint64(1), NumberKind, Number(3), uint64(3), }, newList(newListMetaSequence(1, []metaTuple{ newMetaTuple(NewRef(list1), orderedKeyFromInt(1), 1), newMetaTuple(NewRef(list2), orderedKeyFromInt(3), 3), }, nil)), ) } func TestWriteCompoundSet(t *testing.T) { vrw := newTestValueStore() set1 := newSet(newSetLeafSequence(vrw, Number(0), Number(1))) set2 := newSet(newSetLeafSequence(vrw, Number(2), Number(3), Number(4))) assertEncoding(t, []interface{}{ SetKind, uint64(1), uint64(2), // len, RefKind, set1.Hash(), SetKind, NumberKind, uint64(1), NumberKind, Number(1), uint64(2), RefKind, set2.Hash(), SetKind, NumberKind, uint64(1), NumberKind, Number(4), uint64(3), }, newSet(newSetMetaSequence(1, []metaTuple{ newMetaTuple(NewRef(set1), orderedKeyFromInt(1), 2), newMetaTuple(NewRef(set2), orderedKeyFromInt(4), 3), }, vrw)), ) } func TestWriteCompoundSetOfBlobs(t *testing.T) { vrw := newTestValueStore() // Blobs are interesting because unlike the numbers used in TestWriteCompondSet, refs are sorted by their hashes, not their value. newBlobOfInt := func(i int) Blob { return NewBlob(vrw, strings.NewReader(strconv.Itoa(i))) } blob0 := newBlobOfInt(0) blob1 := newBlobOfInt(1) blob2 := newBlobOfInt(2) blob3 := newBlobOfInt(3) blob4 := newBlobOfInt(4) set1 := newSet(newSetLeafSequence(vrw, blob0, blob1)) set2 := newSet(newSetLeafSequence(vrw, blob2, blob3, blob4)) assertEncoding(t, []interface{}{ SetKind, uint64(1), uint64(2), // len, RefKind, set1.Hash(), SetKind, BlobKind, uint64(1), hashKind, blob1.Hash(), uint64(2), RefKind, set2.Hash(), SetKind, BlobKind, uint64(1), hashKind, blob4.Hash(), uint64(3), }, newSet(newSetMetaSequence(1, []metaTuple{ newMetaTuple(NewRef(set1), newOrderedKey(blob1), 2), newMetaTuple(NewRef(set2), newOrderedKey(blob4), 3), }, vrw)), ) } func TestWriteListOfUnion(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, // Note that the order of members in a union is determined based on a hash computation; the particular ordering of Number, Bool, String was determined empirically. This must not change unless deliberately and explicitly revving the persistent format. []interface{}{ ListKind, uint64(0), uint64(4) /* len */, StringKind, "0", NumberKind, Number(1), StringKind, "2", BoolKind, true, }, NewList(vrw, String("0"), Number(1), String("2"), Bool(true), ), ) } func TestWriteListOfStruct(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(1), /* len */ StructKind, "S", uint64(1) /* len */, "x", NumberKind, Number(42), }, NewList(vrw, NewStruct("S", StructData{"x": Number(42)})), ) } func TestWriteListOfUnionWithType(t *testing.T) { vrw := newTestValueStore() structType := MakeStructType("S", StructField{"x", NumberType, false}) assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(4), /* len */ BoolKind, true, TypeKind, NumberKind, TypeKind, TypeKind, TypeKind, StructKind, "S", uint64(1) /* len */, "x", NumberKind, false, }, NewList(vrw, Bool(true), NumberType, TypeType, structType, ), ) } func TestWriteRef(t *testing.T) { r := hash.Parse("0123456789abcdefghijklmnopqrstuv") assertEncoding(t, []interface{}{ RefKind, r, NumberKind, uint64(4), }, constructRef(r, NumberType, 4), ) } func TestWriteListOfTypes(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(2), /* len */ TypeKind, BoolKind, TypeKind, StringKind, }, NewList(vrw, BoolType, StringType), ) } func nomsTestWriteRecursiveStruct(t *testing.T) { vrw := newTestValueStore() // struct A6 { // cs: List // v: Number // } assertEncoding(t, []interface{}{ StructKind, "A6", uint64(2) /* len */, "cs", ListKind, CycleKind, uint64(0), "v", NumberKind, ListKind, UnionKind, uint64(0) /* len */, false, uint64(0), /* len */ NumberKind, Number(42), }, // {v: 42, cs: [{v: 555, cs: []}]} NewStruct("A6", StructData{"cs": NewList(vrw), "v": Number(42)}), ) } func TestWriteUnionList(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(3), /* len */ NumberKind, Number(23), StringKind, "hi", NumberKind, Number(42), }, NewList(vrw, Number(23), String("hi"), Number(42)), ) } func TestWriteEmptyUnionList(t *testing.T) { vrw := newTestValueStore() assertEncoding(t, []interface{}{ ListKind, uint64(0), uint64(0), /* len */ }, NewList(vrw), ) } type bogusType int func (bg bogusType) Value() Value { return bg } func (bg bogusType) Equals(other Value) bool { return false } func (bg bogusType) Less(other Value) bool { return false } func (bg bogusType) Hash() hash.Hash { return hash.Hash{} } func (bg bogusType) WalkValues(cb ValueCallback) {} func (bg bogusType) WalkRefs(cb RefCallback) {} func (bg bogusType) Kind() NomsKind { return CycleKind } func (bg bogusType) typeOf() *Type { return MakeCycleType("ABC") } func (bg bogusType) writeTo(nomsWriter) { panic("abc") } func TestBogusValueWithUnresolvedCycle(t *testing.T) { g := bogusType(1) assert.Panics(t, func() { EncodeValue(g) }) } ================================================ FILE: go/types/equals_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "testing" "github.com/stretchr/testify/assert" ) func TestValueEquals(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() values := []func() Value{ func() Value { return Bool(false) }, func() Value { return Bool(true) }, func() Value { return Number(0) }, func() Value { return Number(-1) }, func() Value { return Number(1) }, func() Value { return String("") }, func() Value { return String("hi") }, func() Value { return String("bye") }, func() Value { return NewBlob(vrw, &bytes.Buffer{}) }, func() Value { return NewBlob(vrw, bytes.NewBufferString("hi")) }, func() Value { return NewBlob(vrw, bytes.NewBufferString("bye")) }, func() Value { b1 := NewBlob(vrw, bytes.NewBufferString("hi")) b2 := NewBlob(vrw, bytes.NewBufferString("bye")) return newBlob(newBlobMetaSequence(1, []metaTuple{ newMetaTuple(NewRef(b1), orderedKeyFromInt(2), 2), newMetaTuple(NewRef(b2), orderedKeyFromInt(5), 5), }, nil)) }, func() Value { return NewList(vrw) }, func() Value { return NewList(vrw, String("foo")) }, func() Value { return NewList(vrw, String("bar")) }, func() Value { return NewMap(vrw) }, func() Value { return NewMap(vrw, String("a"), String("a")) }, func() Value { return NewSet(vrw) }, func() Value { return NewSet(vrw, String("hi")) }, func() Value { return BoolType }, func() Value { return StringType }, func() Value { return MakeStructType("a") }, func() Value { return MakeStructType("b") }, func() Value { return MakeListType(BoolType) }, func() Value { return MakeListType(NumberType) }, func() Value { return MakeSetType(BoolType) }, func() Value { return MakeSetType(NumberType) }, func() Value { return MakeRefType(BoolType) }, func() Value { return MakeRefType(NumberType) }, func() Value { return MakeMapType(BoolType, ValueType) }, func() Value { return MakeMapType(NumberType, ValueType) }, } for i, f1 := range values { for j, f2 := range values { if i == j { assert.True(f1().Equals(f2())) } else { assert.False(f1().Equals(f2())) } } v := f1() if v != nil { r := NewRef(v) assert.False(r.Equals(v)) assert.False(v.Equals(r)) } } } ================================================ FILE: go/types/get_hash.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/attic-labs/noms/go/hash" var getHashOverride func(v Value) hash.Hash func getHash(v Value) hash.Hash { if getHashOverride != nil { return getHashOverride(v) } return getHashNoOverride(v) } func getHashNoOverride(v Value) hash.Hash { return EncodeValue(v).Hash() } func EnsureHash(h *hash.Hash, v Value) hash.Hash { if h.IsEmpty() { *h = getHash(v) } return *h } ================================================ FILE: go/types/graph_builder.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // GraphBuilder allows non-RAM-bound construction of a graph of nested Maps whose // leaf collections can be Lists, Sets, or Maps that contain any type of Noms // Values. // // Graphs are built by calling one of the GraphBuilder functions: // MapSet(graphKeys, key, value) // SetInsert(graphKeys, value) // ListAppend(graphKeys, value) // // GraphBuilder uses an opCache to store graph operations in leveldb and to be // able to read them out later in a way which ensures a total ordering of all // the nodes at each level of the graph. (See opcache.go for more info on how // that is done) // // GraphBuilder.Build() does the work of assembling the graph. Build() gets an // iterator for this graph from the opCache and uses it to iterate over all the // operations that have been stored for this graph. opCache ensures that the // operations are returned in optimal sorted order so that sequenceChunker can // most efficiently assemble the graph. Build() will ensure that there is a Map // object for each key in |graphKeys|. Any node that falls in the middle of the // graph must be a Map, although, intermediate nodes may have any element as keys // as long as the path formed by the graphKeys doesn't conflict. // // MapSet(), SetInsert(), and ListAppend() are threadsafe meaning they can safely // be called from different go routines. However, the semantics of ListAppend() // are such that the order of the list will be determined by which thread() calls // ListAppend first (this function call may be modified later to allow specification // of index or order). // // Build() should only be called once, after all the operations for the graph // have been stored. It is the caller's responsibility to make sure that all // calls to the mutation operations have completed before Build() is invoked. // package types import ( "bytes" "fmt" "sync" "github.com/attic-labs/noms/go/d" ) type GraphBuilder struct { opcStore opCacheStore oc opCache vrw ValueReadWriter stack graphStack mutex sync.Mutex } // NewGraphBuilder returns an new GraphBuilder object. func NewGraphBuilder(vrw ValueReadWriter, rootKind NomsKind) *GraphBuilder { return newGraphBuilder(vrw, newLdbOpCacheStore(vrw), rootKind) } func newGraphBuilder(vrw ValueReadWriter, opcStore opCacheStore, rootKind NomsKind) *GraphBuilder { b := &GraphBuilder{oc: opcStore.opCache(), opcStore: opcStore, vrw: vrw} b.pushNewKeyOnStack(String("ROOT"), rootKind) return b } // MapSet will add the key/value pair |k, v| to the map found by traversing // the graph using the |keys| slice. Intermediate maps referenced by |keys| are // created as necessary. This is threadsafe, may call from multiple go routines. func (b *GraphBuilder) MapSet(keys []Value, k Value, v Value) { if b.oc == nil { d.Panic("Can't call MapSet() again after Build()") } b.oc.GraphMapSet(keys, k, v) } // SetInsert will insert the value |v| into the set at path |keys|. Intermediate // maps referenced by |keys| are created as necessary. This is threadsafe, may // call from multiple go routines. func (b *GraphBuilder) SetInsert(keys []Value, v Value) { if b.oc == nil { d.Panic("Can't call SetInsert() again after Build()") } b.oc.GraphSetInsert(keys, v) } // ListAppend will append |v| to the list at path |p|. Intermediate // maps referenced by |keys| are created as necessary. This is threadsafe, may // call from multiple go routines, however append semantics are such that the // elements will be appended in order that functions are called, so order has // to be managed by caller. func (b *GraphBuilder) ListAppend(keys []Value, v Value) { if b.oc == nil { d.Panic("Can't call ListAppend() again after Build()") } b.oc.GraphListAppend(keys, v) } type graphOpContainer struct { keys []Value kind NomsKind item sequenceItem } // Build builds and returns the graph. This method should only be called after all // calls to the mutation operations (i.e. MapSet, SetInsert, and ListAppend) // have completed. It is the caller's responsibility to ensure that this is // the case. Build() will panic if called more than once on any GraphBuilder // object. func (b *GraphBuilder) Build() Value { var opc opCache var opcStore opCacheStore defer func() { opcStore.destroy() }() // Use function here to take advantage fo the deferred call to mutex.Unlock() func() { b.mutex.Lock() defer b.mutex.Unlock() if b.oc == nil { d.Panic("Can only call Build() once") } opcStore, opc = b.opcStore, b.oc b.opcStore, b.oc = nil, nil }() iter := opc.NewIterator() defer iter.Release() // start up a go routine that will do the reading from graphBuilder's private // ldb opCache. graphOpChan := make(chan graphOpContainer, 512) go func() { for iter.Next() { keys, kind, item := iter.GraphOp() container := graphOpContainer{keys: keys, kind: kind, item: item} graphOpChan <- container } close(graphOpChan) }() // iterator returns keys, in sort order by array for goc := range graphOpChan { keys, kind, item := goc.keys, goc.kind, goc.item // Get index of first key that is different than what is on the stack idx := commonPrefixCount(b.stack, keys) if idx == -1 { // no keys have changed we're working on same coll as previous // iteration, just append to sequenceChunker at top of stack b.appendItemToCurrentTopOfStack(kind, item) continue } // Some keys that were in the last graphOp are no longer present // which indicates that we are finished adding to those cols. Pop // those keys from the stack. This will cause any popped cols to be // closed and added to their parents. for idx < b.stack.lastIdx() { b.popKeyFromStack() } // We may have popped some keys off of the stack and are left with // an item to append to the stack of a previously existing key. if b.stack.lastIdx() == len(keys) { b.appendItemToCurrentTopOfStack(kind, item) } // Or we may have some new keys to add to the stack. Add those keys // and then append the item to the top element. for b.stack.lastIdx() < len(keys) { if b.stack.lastIdx() < len(keys)-1 { b.pushNewKeyOnStack(keys[b.stack.lastIdx()], MapKind) } else { b.pushNewKeyOnStack(keys[b.stack.lastIdx()], kind) b.appendItemToCurrentTopOfStack(kind, item) } } } // We're done adding elements. Pop any intermediate keys off the stack and // fold their results into their parent map. for b.stack.len() > 1 { b.popKeyFromStack() } res := b.stack.pop().done() return res } // pushNewKeyOnStack() creates a new graphStackElem node and pushes it on the // stack. The new element contains the |key| and a new sequenceChunker that will // be appended to to build this node in the graph. func (b *GraphBuilder) pushNewKeyOnStack(key Value, kind NomsKind) { var ch *sequenceChunker switch kind { case MapKind: ch = newEmptyMapSequenceChunker(b.vrw) case SetKind: ch = newEmptySetSequenceChunker(b.vrw) case ListKind: ch = newEmptyListSequenceChunker(b.vrw) default: panic("bad 'kind' value in GraphBuilder, newElem()") } b.stack.push(&graphStackElem{key: key, kind: kind, ch: ch}) } // popKeyFromStack() pops the last element off the stack, calls done() to // finish any sequenceChunking that is in progress, and then assigns the // finished collection it's parent map. func (b *GraphBuilder) popKeyFromStack() { elem := b.stack.pop() col := elem.done() top := b.stack.top() top.ch.Append(mapEntry{elem.key, col}) } // appendItemToCurrentTopOfStack() adds the current item to the sequenceChunker // that's on the top of the stack. func (b *GraphBuilder) appendItemToCurrentTopOfStack(kind NomsKind, item sequenceItem) { top := b.stack.top() d.PanicIfTrue(top.kind != kind) top.ch.Append(item) } type graphStackElem struct { key Value kind NomsKind ch *sequenceChunker } type graphStack struct { elems []*graphStackElem } func (s *graphStack) push(e *graphStackElem) { s.elems = append(s.elems, e) } func (s *graphStack) pop() *graphStackElem { l := len(s.elems) - 1 elem := s.elems[l] // last element s.elems = s.elems[:l] // truncate last element return elem } func (s *graphStack) top() *graphStackElem { l := len(s.elems) - 1 return s.elems[l] // last element } func (s *graphStack) len() int { return len(s.elems) } func (s *graphStack) lastIdx() int { return len(s.elems) - 1 } func (s graphStack) String() string { buf := bytes.Buffer{} for i := len(s.elems) - 1; i >= 0; i-- { fmt.Fprintln(&buf, "#:", i, s.elems[i]) } return buf.String() } // done() creates the appropriate collection for this element and returns it func (e *graphStackElem) done() Collection { switch e.kind { case MapKind: return newMap(e.ch.Done().(orderedSequence)) case SetKind: return newSet(e.ch.Done().(orderedSequence)) case ListKind: return newList(e.ch.Done()) } panic("unreachable") } // Returns index of first element in keys that is different from stack. Note, // return value can be equal to len(keys) if there are more element in stack // than in keys func commonPrefixCount(stack graphStack, keys ValueSlice) int { minLen := len(keys) // don't consider the 'ROOT' stack element elems := stack.elems[1:] if len(elems) < minLen { minLen = len(elems) } for i := 0; i < minLen; i++ { if !elems[i].key.Equals(keys[i]) { return i } } if len(keys) == len(elems) { return -1 } return minLen } func (e *graphStackElem) String() string { return fmt.Sprintf("key: %s, kind: %s, seq: %p", EncodedValue(e.key), e.kind, e.ch) } ================================================ FILE: go/types/graph_builder_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "fmt" "math/rand" "testing" "github.com/stretchr/testify/assert" ) func TestGraphBuilderFindIndex(t *testing.T) { assert := assert.New(t) elems := []*graphStackElem{ {key: String("ROOT")}, {key: String("one")}, {key: String("two")}, {key: String("three")}, {key: String("four")}, } s := graphStack{elems: elems} assert.Equal(0, commonPrefixCount(s, []Value{String("zero")})) assert.Equal(1, commonPrefixCount(s, []Value{String("one"), String("zero")})) assert.Equal(3, commonPrefixCount(s, []Value{String("one"), String("two"), String("three")})) assert.Equal(-1, commonPrefixCount(s, []Value{String("one"), String("two"), String("three"), String("four")})) assert.Equal(4, commonPrefixCount(s, []Value{String("one"), String("two"), String("three"), String("four"), String("five")})) values := []Value{String("one"), String("two"), String("three"), String("four")} assert.Equal(-1, commonPrefixCount(graphStack{elems: elems[:1]}, []Value{})) assert.Equal(0, commonPrefixCount(graphStack{elems: elems[:1]}, values)) assert.Equal(1, commonPrefixCount(graphStack{elems: elems[:2]}, values)) assert.Equal(3, commonPrefixCount(graphStack{elems: elems[:4]}, values)) assert.Equal(-1, commonPrefixCount(graphStack{elems: elems}, values)) assert.Equal(2, commonPrefixCount(graphStack{elems: elems[:5]}, values[:2])) } type testGraphOp struct { keys ValueSlice kind NomsKind item sequenceItem } func SafeEquals(v1, v2 Value) bool { if v1 == nil && v2 == nil { return true } if v1 == nil || v2 == nil { return false } return v1.Equals(v2) } func TestGraphBuilderEncodeDecodeAsKey(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() defer vrw.Close() struct1 := NewStruct("teststruct", StructData{ "f1": String("v1"), "f2": String("v2"), }) keys := ValueSlice{Bool(true), Number(19), String("think!"), struct1} byteBuf := [initialBufferSize]byte{} bs := byteBuf[:0] numKeys := len(keys) expectedRes := ValueSlice{} for _, k := range keys { if isKindOrderedByValue(k.Kind()) { expectedRes = append(expectedRes, k) } else { expectedRes = append(expectedRes, nil) } bs = encodeGraphKey(bs, k) } res := ValueSlice{} for pos := 0; pos < numKeys; pos++ { var k Value bs, k = decodeValue(bs, false, vrw) res = append(res, k) } assert.Equal(len(keys), len(res)) for i, origKey := range expectedRes { assert.True(SafeEquals(origKey, res[i])) } } func TestGraphBuilderEncodeDecodeAsValue(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() defer vrw.Close() struct1 := NewStruct("teststruct", StructData{ "f1": String("v1"), "f2": String("v2"), }) keys := ValueSlice{Bool(true), Number(19), String("think!"), struct1} byteBuf := [initialBufferSize]byte{} bs := byteBuf[:0] numKeys := len(keys) for _, k := range keys { bs = encodeGraphValue(bs, k) } res := ValueSlice{} for pos := 0; pos < numKeys; pos++ { var k Value bs, k = decodeValue(bs, true, vrw) res = append(res, k) } assert.Equal(len(keys), len(res)) for i, origKey := range keys { assert.True(SafeEquals(origKey, res[i])) } } func TestGraphBuilderMapSetGraphOp(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() opcStore := newLdbOpCacheStore(vs) opc := opcStore.opCache() defer opcStore.destroy() struct1 := NewStruct("teststruct", StructData{ "f1": String("v1"), "f2": String("v2"), }) keys := ValueSlice{Bool(true), Number(19), String("think!"), struct1} opc.GraphMapSet(keys, String("yo"), Number(199)) iter := opc.NewIterator() assert.True(iter.Next()) keys1, kind, item := iter.GraphOp() assert.Equal(len(keys), len(keys1)) assert.True(keys.Equals(keys1)) assert.Equal(MapKind, kind) assert.IsType(mapEntry{}, item) me := item.(mapEntry) assert.True(String("yo").Equals(me.key)) assert.True(Number(199).Equals(me.value)) assert.False(iter.Next()) } // createTestMap() constructs a graph sized according to the |levels| and // |avgSize| parameters. The graph will contain nested maps with a // depth == |levels|, each map will contain |avgSize| elements of different // types. func createTestMap(vrw ValueReadWriter, levels, avgSize int, valGen func() Value) Map { sampleSize := func() int { size := (int(rand.Int31()) % avgSize) + (avgSize / 2) if size < 2 { return 2 } return size } genLeaf := func() Value { numElems := sampleSize() elems := ValueSlice{} for i := 0; i < numElems; i++ { elems = append(elems, valGen()) } switch rand.Int31() % 3 { case 0: if numElems%2 != 0 { numElems-- } return NewMap(vrw, elems[:numElems]...) case 1: return NewSet(vrw, elems...) case 2: return NewList(vrw, elems...) } panic("unreachable") } var genChildren func(lvl int) Map genChildren = func(lvl int) Map { numChildren := sampleSize() kvs := ValueSlice{} for i := 0; i < numChildren; i++ { if lvl == levels { kvs = append(kvs, valGen(), genLeaf()) } else { // Once in a while, throw in a non-collection value into the // middle of the graph if rand.Int31()%10 == 0 { kvs = append(kvs, valGen(), valGen()) } else { kvs = append(kvs, valGen(), genChildren(lvl+1)) } } } return NewMap(vrw, kvs...) } return genChildren(0) } // valGen() creates a random String, Number, or Struct Value func valGen() Value { num := rand.Int31() % 1000000 switch rand.Int31() % 4 { case 0: return String(fmt.Sprintf("%d", num)) case 1: return Number(num) case 2: return NewStruct("teststruct", map[string]Value{"f1": Number(num)}) case 3: return NewStruct("teststruct", map[string]Value{"f1": String(fmt.Sprintf("%d", num))}) } panic("unreachable") } // dupSlice() duplicates a slice along with it's backing store. func dupSlice(s ValueSlice) ValueSlice { vs := make(ValueSlice, len(s)) copy(vs, s) return vs } func shuffle(a []testGraphOp) { for i := range a { j := rand.Intn(i + 1) if a[i].kind != ListKind && a[j].kind != ListKind { a[i], a[j] = a[j], a[i] } } } // See https://github.com/attic-labs/noms/issues/3840 func TestGraphBuilderNestedMapSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() expected := createTestMap(vs, 3, 4, valGen) b := NewGraphBuilder(vs, MapKind) ops := []testGraphOp{} isNomsCollectionKind := func(kind NomsKind) bool { return kind == MapKind || kind == SetKind || kind == ListKind } var generateOps func(keys []Value, col Value) generateOps = func(keys []Value, col Value) { switch c := col.(type) { case Map: c.Iter(func(k, v Value) bool { if isNomsCollectionKind(v.Kind()) { newKeys := append(keys, k) generateOps(newKeys, v) } else { tgo := testGraphOp{keys: dupSlice(keys), kind: MapKind, item: mapEntry{k, v}} ops = append(ops, tgo) } return false }) case List: c.Iter(func(v Value, idx uint64) bool { tgo := testGraphOp{keys: dupSlice(keys), kind: ListKind, item: v} ops = append(ops, tgo) return false }) case Set: c.Iter(func(v Value) bool { tgo := testGraphOp{keys: dupSlice(keys), kind: SetKind, item: v} ops = append(ops, tgo) return false }) } } generateOps(nil, expected) shuffle(ops) for _, op := range ops { switch op.kind { case MapKind: b.MapSet(op.keys, op.item.(mapEntry).key, op.item.(mapEntry).value) case SetKind: b.SetInsert(op.keys, op.item.(Value)) case ListKind: b.ListAppend(op.keys, op.item.(Value)) } } v := b.Build() assert.NotNil(v) assert.True(expected.Equals(v)) } func ExampleGraphBuilder_Build() { vs := newTestValueStore() defer vs.Close() gb := NewGraphBuilder(vs, MapKind) gb.SetInsert([]Value{String("parent"), String("children")}, String("John")) gb.SetInsert([]Value{String("parent"), String("children")}, String("Mary")) gb.SetInsert([]Value{String("parent"), String("children")}, String("Frieda")) gb.MapSet([]Value{String("parent"), String("ages")}, String("Father"), Number(42)) gb.MapSet([]Value{String("parent"), String("ages")}, String("Mother"), Number(44)) gb.ListAppend([]Value{String("parent"), String("chores")}, String("Make dinner")) gb.ListAppend([]Value{String("parent"), String("chores")}, String("Wash dishes")) gb.ListAppend([]Value{String("parent"), String("chores")}, String("Make breakfast")) gb.ListAppend([]Value{String("parent"), String("chores")}, String("Wash dishes")) gb.MapSet([]Value{String("parent")}, String("combinedAge"), Number(86)) m := gb.Build() fmt.Println("map:", EncodedValue(m)) } ================================================ FILE: go/types/incremental_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/stretchr/testify/assert" ) func getTestVals(vrw ValueReadWriter) []Value { return []Value{ Bool(true), Number(1), String("hi"), NewBlob(vrw, bytes.NewReader([]byte("hi"))), // compoundBlob NewSet(vrw, String("hi")), NewList(vrw, String("hi")), NewMap(vrw, String("hi"), String("hi")), } } func isEncodedOutOfLine(v Value) int { switch v.(type) { case Ref: return 1 } return 0 } func TestIncrementalLoadList(t *testing.T) { assert := assert.New(t) ts := &chunks.TestStorage{} cs := ts.NewView() vs := NewValueStore(cs) expected := NewList(vs, getTestVals(vs)...) hash := vs.WriteValue(expected).TargetHash() vs.Commit(vs.Root(), vs.Root()) actualVar := vs.ReadValue(hash) actual := actualVar.(List) expectedCount := cs.Reads assert.Equal(1, expectedCount) // There will be one read per chunk. chunkReads := make([]int, expected.Len()) for i := uint64(0); i < expected.Len(); i++ { v := actual.Get(i) assert.True(expected.Get(i).Equals(v)) expectedCount += isEncodedOutOfLine(v) assert.Equal(expectedCount+chunkReads[i], cs.Reads) // Do it again to make sure multiple derefs don't do multiple loads. _ = actual.Get(i) assert.Equal(expectedCount+chunkReads[i], cs.Reads) } } func SkipTestIncrementalLoadSet(t *testing.T) { assert := assert.New(t) ts := &chunks.TestStorage{} cs := ts.NewView() vs := NewValueStore(cs) expected := NewSet(vs, getTestVals(vs)...) ref := vs.WriteValue(expected).TargetHash() actualVar := vs.ReadValue(ref) actual := actualVar.(Set) expectedCount := cs.Reads assert.Equal(1, expectedCount) actual.Iter(func(v Value) (stop bool) { expectedCount += isEncodedOutOfLine(v) assert.Equal(expectedCount, cs.Reads) return }) } func SkipTestIncrementalLoadMap(t *testing.T) { assert := assert.New(t) ts := &chunks.TestStorage{} cs := ts.NewView() vs := NewValueStore(cs) expected := NewMap(vs, getTestVals(vs)...) ref := vs.WriteValue(expected).TargetHash() actualVar := vs.ReadValue(ref) actual := actualVar.(Map) expectedCount := cs.Reads assert.Equal(1, expectedCount) actual.Iter(func(k, v Value) (stop bool) { expectedCount += isEncodedOutOfLine(k) expectedCount += isEncodedOutOfLine(v) assert.Equal(expectedCount, cs.Reads) return }) } func SkipTestIncrementalAddRef(t *testing.T) { assert := assert.New(t) ts := &chunks.TestStorage{} cs := ts.NewView() vs := NewValueStore(cs) expectedItem := Number(42) ref := vs.WriteValue(expectedItem) expected := NewList(vs, ref) ref = vs.WriteValue(expected) actualVar := vs.ReadValue(ref.TargetHash()) assert.Equal(1, cs.Reads) assert.True(expected.Equals(actualVar)) actual := actualVar.(List) actualItem := actual.Get(0) assert.Equal(2, cs.Reads) assert.True(expectedItem.Equals(actualItem)) // do it again to make sure caching works. actualItem = actual.Get(0) assert.Equal(2, cs.Reads) assert.True(expectedItem.Equals(actualItem)) } ================================================ FILE: go/types/indexed_sequence_diff.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types func sendSpliceChange(changes chan<- Splice, closeChan <-chan struct{}, splice Splice) bool { select { case changes <- splice: case <-closeChan: return false } return true } func indexedSequenceDiff(last sequence, lastOffset uint64, current sequence, currentOffset uint64, changes chan<- Splice, closeChan <-chan struct{}, maxSpliceMatrixSize uint64) bool { if last.treeLevel() > current.treeLevel() { lastChild := last.getCompositeChildSequence(0, uint64(last.seqLen())) return indexedSequenceDiff(lastChild, lastOffset, current, currentOffset, changes, closeChan, maxSpliceMatrixSize) } if current.treeLevel() > last.treeLevel() { currentChild := current.getCompositeChildSequence(0, uint64(current.seqLen())) return indexedSequenceDiff(last, lastOffset, currentChild, currentOffset, changes, closeChan, maxSpliceMatrixSize) } compareFn := last.getCompareFn(current) initialSplices := calcSplices(uint64(last.seqLen()), uint64(current.seqLen()), maxSpliceMatrixSize, func(i uint64, j uint64) bool { return compareFn(int(i), int(j)) }) for _, splice := range initialSplices { if last.isLeaf() { // This is a leaf sequence, we can just report the splice, but it's indices must be offset. splice.SpAt += lastOffset if splice.SpAdded > 0 { splice.SpFrom += currentOffset } if !sendSpliceChange(changes, closeChan, splice) { return false } continue } if splice.SpRemoved == 0 || splice.SpAdded == 0 { // An entire subtree was removed at a meta level. We must do some math to map the splice from the meta level into the leaf coordinates. beginRemoveIndex := uint64(0) if splice.SpAt > 0 { beginRemoveIndex = last.cumulativeNumberOfLeaves(int(splice.SpAt) - 1) } endRemoveIndex := uint64(0) if splice.SpAt+splice.SpRemoved > 0 { endRemoveIndex = last.cumulativeNumberOfLeaves(int(splice.SpAt+splice.SpRemoved) - 1) } beginAddIndex := uint64(0) if splice.SpFrom > 0 { beginAddIndex = current.cumulativeNumberOfLeaves(int(splice.SpFrom) - 1) } endAddIndex := uint64(0) if splice.SpFrom+splice.SpAdded > 0 { endAddIndex = current.cumulativeNumberOfLeaves(int(splice.SpFrom+splice.SpAdded) - 1) } splice.SpAt = lastOffset + beginRemoveIndex splice.SpRemoved = endRemoveIndex - beginRemoveIndex splice.SpAdded = endAddIndex - beginAddIndex if splice.SpAdded > 0 { splice.SpFrom = currentOffset + beginAddIndex } if !sendSpliceChange(changes, closeChan, splice) { return false } continue } // Meta sequence splice which includes removed & added sub-sequences. Must recurse down. lastChild := last.getCompositeChildSequence(splice.SpAt, splice.SpRemoved) currentChild := current.getCompositeChildSequence(splice.SpFrom, splice.SpAdded) lastChildOffset := lastOffset if splice.SpAt > 0 { lastChildOffset += last.cumulativeNumberOfLeaves(int(splice.SpAt) - 1) } currentChildOffset := currentOffset if splice.SpFrom > 0 { currentChildOffset += current.cumulativeNumberOfLeaves(int(splice.SpFrom) - 1) } if ok := indexedSequenceDiff(lastChild, lastChildOffset, currentChild, currentChildOffset, changes, closeChan, maxSpliceMatrixSize); !ok { return false } } return true } ================================================ FILE: go/types/indexed_sequences.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) func newListMetaSequence(level uint64, tuples []metaTuple, vrw ValueReadWriter) metaSequence { return newMetaSequenceFromTuples(ListKind, level, tuples, vrw) } func newBlobMetaSequence(level uint64, tuples []metaTuple, vrw ValueReadWriter) metaSequence { return newMetaSequenceFromTuples(BlobKind, level, tuples, vrw) } // advanceCursorToOffset advances the cursor as close as possible to idx // // If the cursor references a leaf sequence, // advance to idx, // and return the number of values preceding the idx // If it references a meta-sequence, // advance to the tuple containing idx, // and return the number of leaf values preceding this tuple func advanceCursorToOffset(cur *sequenceCursor, idx uint64) uint64 { seq := cur.seq if ms, ok := seq.(metaSequence); ok { // For a meta sequence, advance the cursor to the smallest position where idx < seq.cumulativeNumLeaves() cur.idx = 0 cum := uint64(0) seqLen := ms.seqLen() // Advance the cursor to the meta-sequence tuple containing idx for cur.idx < seqLen-1 { numLeaves := ms.getNumLeavesAt(cur.idx) if uint64(idx) >= cum+numLeaves { cum += numLeaves cur.idx++ } else { break } } return cum // number of leaves sequences BEFORE cur.idx in meta sequence } seqLen := seq.seqLen() cur.idx = int(idx) if cur.idx > seqLen { cur.idx = seqLen } return uint64(cur.idx) } func newIndexedMetaSequenceChunkFn(kind NomsKind, vrw ValueReadWriter) makeChunkFn { return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { tuples := make([]metaTuple, len(items)) numLeaves := uint64(0) for i, v := range items { mt := v.(metaTuple) tuples[i] = mt numLeaves += mt.numLeaves() } var col Collection if kind == ListKind { col = newList(newListMetaSequence(level, tuples, vrw)) } else { d.PanicIfFalse(BlobKind == kind) col = newBlob(newBlobMetaSequence(level, tuples, vrw)) } return col, orderedKeyFromSum(tuples), numLeaves } } func orderedKeyFromSum(msd []metaTuple) orderedKey { sum := uint64(0) for _, mt := range msd { sum += mt.numLeaves() } return orderedKeyFromUint64(sum) } // LoadLeafNodes loads the set of leaf nodes which contain the items // [startIdx -> endIdx). Returns the set of nodes and the offset within // the first sequence which corresponds to |startIdx|. func LoadLeafNodes(cols []Collection, startIdx, endIdx uint64) ([]Collection, uint64) { vrw := cols[0].asSequence().valueReadWriter() d.PanicIfTrue(vrw == nil) if cols[0].asSequence().isLeaf() { for _, c := range cols { d.PanicIfFalse(c.asSequence().isLeaf()) } return cols, startIdx } level := cols[0].asSequence().treeLevel() childTuples := []metaTuple{} cum := uint64(0) for _, c := range cols { s := c.asSequence() d.PanicIfFalse(s.treeLevel() == level) ms := s.(metaSequence) for _, mt := range ms.tuples() { numLeaves := mt.numLeaves() if cum == 0 && numLeaves <= startIdx { // skip tuples whose items are < startIdx startIdx -= numLeaves endIdx -= numLeaves continue } childTuples = append(childTuples, mt) cum += numLeaves if cum >= endIdx { break } } } hs := make(hash.HashSlice, len(childTuples)) for i, mt := range childTuples { hs[i] = mt.ref().TargetHash() } // Fetch committed child sequences in a single batch readValues := vrw.ReadManyValues(hs) childCols := make([]Collection, len(readValues)) for i, v := range readValues { childCols[i] = v.(Collection) } return LoadLeafNodes(childCols, startIdx, endIdx) } ================================================ FILE: go/types/leaf_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "math" "github.com/attic-labs/noms/go/d" ) type leafSequence struct { sequenceImpl } func newLeafSequence(vrw ValueReadWriter, buff []byte, offsets []uint32, len uint64) leafSequence { return leafSequence{newSequenceImpl(vrw, buff, offsets, len)} } func newLeafSequenceFromValues(kind NomsKind, vrw ValueReadWriter, vs ...Value) leafSequence { d.PanicIfTrue(vrw == nil) w := newBinaryNomsWriter() offsets := make([]uint32, len(vs)+sequencePartValues+1) offsets[sequencePartKind] = w.offset kind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(0) // level offsets[sequencePartCount] = w.offset count := uint64(len(vs)) w.writeCount(count) offsets[sequencePartValues] = w.offset for i, v := range vs { v.writeTo(&w) offsets[i+sequencePartValues+1] = w.offset } return newLeafSequence(vrw, w.data(), offsets, count) } // readLeafSequence reads the data provided by a decoder and moves the decoder forward. func readLeafSequence(dec *valueDecoder) leafSequence { start := dec.pos() offsets, seqLen := skipLeafSequence(dec) end := dec.pos() return newLeafSequence(dec.vrw, dec.byteSlice(start, end), offsets, seqLen) } func skipLeafSequence(dec *valueDecoder) ([]uint32, uint64) { kindPos := dec.pos() dec.skipKind() levelPos := dec.pos() dec.skipCount() // level countPos := dec.pos() count := dec.readCount() offsets := make([]uint32, count+sequencePartValues+1) offsets[sequencePartKind] = kindPos offsets[sequencePartLevel] = levelPos offsets[sequencePartCount] = countPos offsets[sequencePartValues] = dec.pos() for i := uint64(0); i < count; i++ { dec.skipValue() offsets[i+sequencePartValues+1] = dec.pos() } return offsets, count } func (seq leafSequence) values() []Value { return seq.valuesSlice(0, math.MaxUint64) } func (seq leafSequence) valuesSlice(from, to uint64) []Value { if len := seq.Len(); to > len { to = len } dec := seq.decoderSkipToIndex(int(from)) vs := make([]Value, (to-from)*uint64(getValuesPerIdx(seq.Kind()))) for i := range vs { vs[i] = dec.readValue() } return vs } func (seq leafSequence) getCompareFnHelper(other leafSequence) compareFn { dec := seq.decoder() otherDec := other.decoder() return func(idx, otherIdx int) bool { dec.offset = uint32(seq.getItemOffset(idx)) otherDec.offset = uint32(other.getItemOffset(otherIdx)) return dec.readValue().Equals(otherDec.readValue()) } } func (seq leafSequence) getCompareFn(other sequence) compareFn { panic("unreachable") } func (seq leafSequence) typeOf() *Type { dec := seq.decoder() kind := dec.readKind() dec.skipCount() // level count := dec.readCount() ts := make(typeSlice, 0, count) var lastType *Type for i := uint64(0); i < count; i++ { if lastType != nil { offset := dec.offset if dec.isValueSameTypeForSure(lastType) { continue } dec.offset = offset } lastType = dec.readTypeOfValue() ts = append(ts, lastType) } return makeCompoundType(kind, makeUnionType(ts...)) } func (seq leafSequence) numLeaves() uint64 { return seq.len } func (seq leafSequence) getChildSequence(idx int) sequence { return nil } func (seq leafSequence) treeLevel() uint64 { return 0 } func (seq leafSequence) isLeaf() bool { return true } func (seq leafSequence) cumulativeNumberOfLeaves(idx int) uint64 { return uint64(idx) + 1 } func (seq leafSequence) getCompositeChildSequence(start uint64, length uint64) sequence { panic("getCompositeChildSequence called on a leaf sequence") } func (seq leafSequence) getItem(idx int) sequenceItem { dec := seq.decoderSkipToIndex(idx) return dec.readValue() } func getValuesPerIdx(kind NomsKind) int { if kind == MapKind { return 2 } return 1 } ================================================ FILE: go/types/less.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/hash" ) type kindAndHash interface { Kind() NomsKind Hash() hash.Hash } func valueLess(v1, v2 kindAndHash) bool { switch v2.Kind() { case BoolKind, NumberKind, StringKind: return false default: return v1.Hash().Less(v2.Hash()) } } ================================================ FILE: go/types/list.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sync/atomic" "github.com/attic-labs/noms/go/d" ) // List represents a list or an array of Noms values. A list can contain zero or more values of zero // or more types. The type of the list will reflect the type of the elements in the list. For // example: // // l := NewList(Number(1), Bool(true)) // fmt.Println(l.Type().Describe()) // // outputs List // // Lists, like all Noms values are immutable so the "mutation" methods return a new list. type List struct { sequence } func newList(seq sequence) List { return List{seq} } // NewList creates a new List where the type is computed from the elements in the list, populated // with values, chunking if and when needed. func NewList(vrw ValueReadWriter, values ...Value) List { ch := newEmptyListSequenceChunker(vrw) for _, v := range values { ch.Append(v) } return newList(ch.Done()) } // NewStreamingList creates a new List, populated with values, chunking if and when needed. As // chunks are created, they're written to vrw -- including the root chunk of the list. Once the // caller has closed values, the caller can read the completed List from the returned channel. func NewStreamingList(vrw ValueReadWriter, values <-chan Value) <-chan List { out := make(chan List, 1) go func() { defer close(out) ch := newEmptyListSequenceChunker(vrw) for v := range values { ch.Append(v) } out <- newList(ch.Done()) }() return out } func (l List) Edit() *ListEditor { return NewListEditor(l) } // Collection interface func (l List) asSequence() sequence { return l.sequence } // Value interface func (l List) Value() Value { return l } func (l List) WalkValues(cb ValueCallback) { iterAll(l, func(v Value, idx uint64) { cb(v) }) } // Get returns the value at the given index. If this list has been chunked then this will have to // descend into the prolly-tree which leads to Get being O(depth). func (l List) Get(idx uint64) Value { d.PanicIfFalse(idx < l.Len()) cur := newCursorAtIndex(l.sequence, idx) return cur.current().(Value) } // Concat returns a new List comprised of this joined with other. It only needs // to visit the rightmost prolly tree chunks of this List, and the leftmost // prolly tree chunks of other, so it's efficient. func (l List) Concat(other List) List { seq := concat(l.sequence, other.sequence, func(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker { return l.newChunker(cur, vrw) }) return newList(seq) } // Iter iterates over the list and calls f for every element in the list. If f returns true then the // iteration stops. func (l List) Iter(f func(v Value, index uint64) (stop bool)) { idx := uint64(0) cur := newCursorAtIndex(l.sequence, idx) cur.iter(func(v interface{}) bool { if f(v.(Value), uint64(idx)) { return true } idx++ return false }) } func (l List) IterRange(startIdx, endIdx uint64, f func(v Value, idx uint64)) { idx := uint64(startIdx) cb := func(v Value) { f(v, idx) idx++ } iterRange(l, startIdx, endIdx, cb) } // IterAll iterates over the list and calls f for every element in the list. Unlike Iter there is no // way to stop the iteration and all elements are visited. func (l List) IterAll(f func(v Value, index uint64)) { iterAll(l, f) } func iterAll(col Collection, f func(v Value, index uint64)) { concurrency := 6 vcChan := make(chan chan Value, concurrency) // Target reading data in |targetBatchBytes| per thread. We don't know how // many bytes each value is, so update |estimatedNumValues| as data is read. targetBatchBytes := 1 << 23 // 8MB estimatedNumValues := uint64(1000) go func() { for idx, l := uint64(0), col.Len(); idx < l; { numValues := atomic.LoadUint64(&estimatedNumValues) start := idx blockLength := l - start if blockLength > numValues { blockLength = numValues } idx += blockLength vc := make(chan Value) vcChan <- vc go func() { numBytes := iterRange(col, start, start+blockLength, func(v Value) { vc <- v }) close(vc) // Adjust the estimated number of values to try to read // |targetBatchBytes| next time. if numValues == blockLength { scale := float64(targetBatchBytes) / float64(numBytes) atomic.StoreUint64(&estimatedNumValues, uint64(float64(numValues)*scale)) } }() } close(vcChan) }() // Ensure read-ahead goroutines can exit, because the `range` below might not // finish if an |f| callback panics. defer func() { for vc := range vcChan { close(vc) } }() i := uint64(0) for vc := range vcChan { for v := range vc { f(v, i) i++ } } } func iterRange(col Collection, startIdx, endIdx uint64, cb func(v Value)) (numBytes uint64) { l := col.Len() d.PanicIfTrue(startIdx > endIdx || endIdx > l) if startIdx == endIdx { return } leaves, localStart := LoadLeafNodes([]Collection{col}, startIdx, endIdx) endIdx = localStart + endIdx - startIdx startIdx = localStart numValues := 0 valuesPerIdx := uint64(getValuesPerIdx(col.Kind())) for _, leaf := range leaves { seq := leaf.asSequence() values := seq.valuesSlice(startIdx, endIdx) numValues += len(values) for _, v := range values { cb(v) } endIdx = endIdx - uint64(len(values))/valuesPerIdx - startIdx startIdx = 0 numBytes += uint64(len(seq.valueBytes())) // note: should really only include |values| } return } // Iterator returns a ListIterator which can be used to iterate efficiently over a list. func (l List) Iterator() ListIterator { return l.IteratorAt(0) } // IteratorAt returns a ListIterator starting at index. If index is out of bound the iterator will // have reached its end on creation. func (l List) IteratorAt(index uint64) ListIterator { return ListIterator{ newCursorAtIndex(l.sequence, index), } } // Diff streams the diff from last to the current list to the changes channel. Caller can close // closeChan to cancel the diff operation. func (l List) Diff(last List, changes chan<- Splice, closeChan <-chan struct{}) { l.DiffWithLimit(last, changes, closeChan, DEFAULT_MAX_SPLICE_MATRIX_SIZE) } // DiffWithLimit streams the diff from last to the current list to the changes channel. Caller can // close closeChan to cancel the diff operation. // The maxSpliceMatrixSize determines the how big of an edit distance matrix we are willing to // compute versus just saying the thing changed. func (l List) DiffWithLimit(last List, changes chan<- Splice, closeChan <-chan struct{}, maxSpliceMatrixSize uint64) { if l.Equals(last) { return } lLen, lastLen := l.Len(), last.Len() if lLen == 0 { changes <- Splice{0, lastLen, 0, 0} // everything removed return } if lastLen == 0 { changes <- Splice{0, 0, lLen, 0} // everything added return } indexedSequenceDiff(last.sequence, 0, l.sequence, 0, changes, closeChan, maxSpliceMatrixSize) } func (l List) newChunker(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker { return newSequenceChunker(cur, 0, vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes) } func makeListLeafChunkFn(vrw ValueReadWriter) makeChunkFn { return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { d.PanicIfFalse(level == 0) values := make([]Value, len(items)) for i, v := range items { values[i] = v.(Value) } list := newList(newListLeafSequence(vrw, values...)) return list, orderedKeyFromInt(len(values)), uint64(len(values)) } } func newEmptyListSequenceChunker(vrw ValueReadWriter) *sequenceChunker { return newEmptySequenceChunker(vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes) } ================================================ FILE: go/types/list_editor.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sync" "github.com/attic-labs/noms/go/d" ) type ListEditor struct { l List edits *listEdit } func NewListEditor(l List) *ListEditor { return &ListEditor{l, nil} } func (le *ListEditor) Kind() NomsKind { return ListKind } func (le *ListEditor) Value() Value { return le.List() } func (le *ListEditor) List() List { if le.edits == nil { return le.l // no edits } seq := le.l.sequence vrw := seq.valueReadWriter() cursChan := make(chan chan *sequenceCursor) spliceChan := make(chan chan listEdit) go func() { for edit := le.edits; edit != nil; edit = edit.next { edit := edit // TODO: Use ReadMany cc := make(chan *sequenceCursor, 1) cursChan <- cc go func() { cc <- newCursorAtIndex(seq, edit.idx) }() sc := make(chan listEdit, 1) spliceChan <- sc wg := sync.WaitGroup{} subEditors := false for i, v := range edit.inserted { if _, ok := v.(Value); ok { continue } subEditors = true idx, val := i, v wg.Add(1) go func() { edit.inserted[idx] = val.Value() wg.Done() }() } if !subEditors { sc <- *edit continue } go func() { wg.Wait() sc <- *edit }() } close(cursChan) close(spliceChan) }() var ch *sequenceChunker for cc := range cursChan { cur := <-cc sp := <-<-spliceChan if ch == nil { ch = newSequenceChunker(cur, 0, vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes) } else { ch.advanceTo(cur) } dc := sp.removed for dc > 0 { ch.Skip() dc-- } for _, v := range sp.inserted { ch.Append(v) } } return newList(ch.Done()) } func collapseListEdit(newEdit, edit *listEdit) bool { if newEdit.idx+newEdit.removed < edit.idx || edit.idx+uint64(len(edit.inserted)) < newEdit.idx { return false } collapsed := &listEdit{} if newEdit.idx <= edit.idx { collapsed.idx = newEdit.idx overlap := newEdit.removed - (edit.idx - newEdit.idx) // number of leading N values removed from edit.inserted if overlap < uint64(len(edit.inserted)) { // newEdit doesn't remove all of edit.inserted collapsed.inserted = append(newEdit.inserted, edit.inserted[overlap:]...) collapsed.removed = newEdit.removed + edit.removed - overlap } else { // newEdit removes all of edit.inserted collapsed.inserted = newEdit.inserted collapsed.removed = newEdit.removed + edit.removed - uint64(len(edit.inserted)) } } else { // edit.idx < newEdit.idx collapsed.idx = edit.idx editInsertedLen := uint64(len(edit.inserted)) beginEditRemovePoint := newEdit.idx - edit.idx if beginEditRemovePoint == editInsertedLen { // newEdit took place at the position immediately after the last element of edit.inserted collapsed.inserted = append(edit.inserted, newEdit.inserted...) collapsed.removed = edit.removed + newEdit.removed } else { // newEdit takes place within edit.inserted collapsed.inserted = append(collapsed.inserted, edit.inserted[:beginEditRemovePoint]...) collapsed.inserted = append(collapsed.inserted, newEdit.inserted...) endEditRemovePoint := beginEditRemovePoint + newEdit.removed if endEditRemovePoint < editInsertedLen { // elements of edit.inserted remain beyond newEdit.removed collapsed.removed = edit.removed collapsed.inserted = append(collapsed.inserted, edit.inserted[endEditRemovePoint:]...) } else { collapsed.removed = edit.removed + endEditRemovePoint - editInsertedLen } } } *newEdit = *collapsed return true } func (le *ListEditor) Len() uint64 { delta := int64(0) for edit := le.edits; edit != nil; edit = edit.next { delta += -int64(edit.removed) + int64(len(edit.inserted)) } return uint64(int64(le.l.Len()) + delta) } func (le *ListEditor) Splice(idx uint64, deleteCount uint64, vs ...Valuable) *ListEditor { for _, sv := range vs { d.PanicIfTrue(sv == nil) } ne := &listEdit{idx, deleteCount, vs, nil} var last *listEdit edit := le.edits for edit != nil { if collapseListEdit(ne, edit) { if last == nil { le.edits = edit.next } else { last.next = edit.next } edit = edit.next continue } if edit.idx > ne.idx { break } ne.idx = adjustIdx(ne.idx, edit) last = edit edit = edit.next } if ne.removed == 0 && len(ne.inserted) == 0 { return le // effectively removed 1 or more existing slices } if ne.idx > le.l.Len() { d.Panic("Index Out Of Bounds") } if ne.idx == le.l.Len() && ne.removed > 0 { d.Panic("Index Out Of Bounds") } if last == nil { // Insert |ne| in first position ne.next = le.edits le.edits = ne } else { ne.next = last.next last.next = ne } return le } func (le *ListEditor) Set(idx uint64, v Valuable) *ListEditor { return le.Splice(idx, 1, v) } func (le *ListEditor) Append(vs ...Valuable) *ListEditor { return le.Splice(le.Len(), 0, vs...) } func (le *ListEditor) Insert(idx uint64, vs ...Valuable) *ListEditor { return le.Splice(idx, 0, vs...) } func (le *ListEditor) Remove(start uint64, end uint64) *ListEditor { d.PanicIfFalse(start <= end) return le.Splice(start, end-start) } func (le *ListEditor) RemoveAt(idx uint64) *ListEditor { return le.Splice(idx, 1) } func adjustIdx(idx uint64, e *listEdit) uint64 { return idx + e.removed - uint64(len(e.inserted)) } func (le *ListEditor) Get(idx uint64) Valuable { edit := le.edits for edit != nil { if edit.idx > idx { // idx is before next splice return le.l.Get(idx) } if edit.idx <= idx && idx < (edit.idx+uint64(len(edit.inserted))) { // idx is within the insert values of edit return edit.inserted[idx-edit.idx] } idx = adjustIdx(idx, edit) edit = edit.next } return le.l.Get(idx) } type listEdit struct { idx uint64 removed uint64 inserted []Valuable next *listEdit } ================================================ FILE: go/types/list_editor_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "math/rand" "testing" "github.com/stretchr/testify/assert" ) func listOfInts(vrw ValueReadWriter, vals ...int) List { vs := ValueSlice{} for _, v := range vals { vs = append(vs, Number(v)) } return NewList(vrw, vs...) } func testEditor(vrw ValueReadWriter, vals ...int) *ListEditor { return NewListEditor(listOfInts(vrw, vals...)) } func edit(le *ListEditor, idx, remove int, insert ...int) { vals := []Valuable{} for _, v := range insert { vals = append(vals, Number(v)) } le.Splice(uint64(idx), uint64(remove), vals...) } func assertState(t *testing.T, vrw ValueReadWriter, le *ListEditor, expectItems []int, expectEditCount int) { assert.Equal(t, uint64(len(expectItems)), le.Len()) for i, v := range expectItems { assert.Equal(t, Number(v), le.Get(uint64(i))) } actualEditCount := 0 for edit := le.edits; edit != nil; edit = edit.next { actualEditCount++ } assert.Equal(t, expectEditCount, actualEditCount) assert.True(t, listOfInts(vrw, expectItems...).Equals(le.List())) } func TestListEditorBasic(t *testing.T) { vrw := newTestValueStore() t.Run("remove a few", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 2, 2) assertState(t, vrw, le, []int{0, 1, 4, 5}, 1) }) t.Run("insert a few", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 2, 0, 9, 8, 7) assertState(t, vrw, le, []int{0, 1, 9, 8, 7, 2, 3, 4, 5}, 1) }) t.Run("remove 2, insert 3", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 2, 2, 9, 8, 7) assertState(t, vrw, le, []int{0, 1, 9, 8, 7, 4, 5}, 1) }) t.Run("insert 2 twice", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 2, 0, 9, 10) assertState(t, vrw, le, []int{0, 1, 9, 10, 2, 3, 4, 5}, 1) edit(le, 7, 0, 8, 9) assertState(t, vrw, le, []int{0, 1, 9, 10, 2, 3, 4, 8, 9, 5}, 2) }) t.Run("remove 2 twice", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5, 6, 7) edit(le, 5, 2) assertState(t, vrw, le, []int{0, 1, 2, 3, 4, 7}, 1) edit(le, 1, 2) assertState(t, vrw, le, []int{0, 3, 4, 7}, 2) }) } func TestCollapseSplices(t *testing.T) { vrw := newTestValueStore() t.Run("left adjacent", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5, 6, 7) edit(le, 4, 3) assertState(t, vrw, le, []int{0, 1, 2, 3, 7}, 1) edit(le, 1, 3) assertState(t, vrw, le, []int{0, 7}, 1) }) t.Run("left adjacent 2", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5, 6, 7) edit(le, 4, 3, 0, 0) assertState(t, vrw, le, []int{0, 1, 2, 3, 0, 0, 7}, 1) edit(le, 1, 3, 5, 5) assertState(t, vrw, le, []int{0, 5, 5, 0, 0, 7}, 1) }) t.Run("left consume", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5, 6, 7) edit(le, 2, 4) assertState(t, vrw, le, []int{0, 1, 6, 7}, 1) edit(le, 1, 2) assertState(t, vrw, le, []int{0, 7}, 1) }) t.Run("left overlap ", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 3, 2, 7, 8, 9) assertState(t, vrw, le, []int{0, 1, 2, 7, 8, 9, 5}, 1) edit(le, 0, 4) assertState(t, vrw, le, []int{8, 9, 5}, 1) }) t.Run("undo 1", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 2, 3) assertState(t, vrw, le, []int{0, 1, 5}, 1) edit(le, 2, 0, 2, 3, 4) assertState(t, vrw, le, []int{0, 1, 2, 3, 4, 5}, 1) }) t.Run("undo 2", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 2, 0, 9, 8, 7) assertState(t, vrw, le, []int{0, 1, 9, 8, 7, 2, 3, 4, 5}, 1) edit(le, 2, 3) assertState(t, vrw, le, []int{0, 1, 2, 3, 4, 5}, 0) }) t.Run("splice middile of splice", func(t *testing.T) { le := testEditor(vrw, 0, 1) edit(le, 1, 0, 9, 8, 7, 6) assertState(t, vrw, le, []int{0, 9, 8, 7, 6, 1}, 1) edit(le, 2, 2) assertState(t, vrw, le, []int{0, 9, 6, 1}, 1) }) } func TestFuzzFails(t *testing.T) { vrw := newTestValueStore() t.Run("Case 1", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24) edit(le, 23, 0, 0, 3, 2) assertState(t, vrw, le, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 0, 3, 2, 23, 24}, 1) edit(le, 5, 15, 1, 2, 9, 8) assertState(t, vrw, le, []int{0, 1, 2, 3, 4, 1, 2, 9, 8, 20, 21, 22, 0, 3, 2, 23, 24}, 2) edit(le, 4, 7, 7) assertState(t, vrw, le, []int{0, 1, 2, 3, 7, 22, 0, 3, 2, 23, 24}, 2) }) t.Run("Case 2", func(t *testing.T) { le := testEditor(vrw, 0, 1, 2, 3, 4, 5) edit(le, 5, 0, 1, 7, 5, 3, 13, 17) assertState(t, vrw, le, []int{0, 1, 2, 3, 4, 1, 7, 5, 3, 13, 17, 5}, 1) edit(le, 2, 2, 16, 5, 12, 5, 15, 0, 15, 15, 7) assertState(t, vrw, le, []int{0, 1, 16, 5, 12, 5, 15, 0, 15, 15, 7, 4, 1, 7, 5, 3, 13, 17, 5}, 2) edit(le, 8, 5, 4, 13) assertState(t, vrw, le, []int{0, 1, 16, 5, 12, 5, 15, 0, 4, 13, 7, 5, 3, 13, 17, 5}, 1) edit(le, 6, 2, 8, 2, 6, 3, 14, 6) assertState(t, vrw, le, []int{0, 1, 16, 5, 12, 5, 8, 2, 6, 3, 14, 6, 4, 13, 7, 5, 3, 13, 17, 5}, 1) }) } func AsValuables(vs []Value) []Valuable { res := make([]Valuable, len(vs)) for i, v := range vs { res[i] = v } return res } func TestListSpliceFuzzer(t *testing.T) { startCount := 1000 rounds := 1000 splices := 100 maxInsertCount := uint64(50) maxInt := uint64(100) vrw := newTestValueStore() r := rand.New(rand.NewSource(0)) nextRandInt := func(from, to uint64) uint64 { return from + uint64(float64(to-from)*r.Float64()) } nextRandomSplice := func(len int) (idx, remove uint64, insert []Value) { idx = nextRandInt(0, uint64(len)) remove = nextRandInt(0, uint64(len)-idx) insCount := nextRandInt(0, maxInsertCount) for i := uint64(0); i < insCount; i++ { insert = append(insert, Number(nextRandInt(0, maxInt))) } return } for i := 0; i < rounds; i++ { tl := newTestList(startCount) le := tl.toList(vrw).Edit() for j := 0; j < splices; j++ { idx, removed, insert := nextRandomSplice(len(tl)) tl = tl.Splice(int(idx), int(removed), insert...) le.Splice(idx, removed, AsValuables(insert)...) } expect := tl.toList(vrw) actual := le.List() assert.True(t, expect.Equals(actual)) } } ================================================ FILE: go/types/list_iterator.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" ) // ListIterator can be used to efficiently iterate through a Noms List. type ListIterator struct { cursor *sequenceCursor } // Next returns subsequent Values from a List, starting with the index at which the iterator was // created. If there are no more Values, Next() returns nil. func (li ListIterator) Next() (out Value) { if li.cursor == nil { d.Panic("Cannot use a nil ListIterator") } if li.cursor.valid() { out = li.cursor.current().(Value) li.cursor.advance() } return } ================================================ FILE: go/types/list_iterator_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestListIterator(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() numbers := append(generateNumbersAsValues(10), Number(20), Number(25)) l := NewList(vrw, numbers...) i := l.Iterator() vs := iterToSlice(i) assert.True(vs.Equals(numbers), "Expected: %v != actual: %v", numbers, vs) i = l.IteratorAt(3) vs = iterToSlice(i) assert.True(vs.Equals(numbers[3:]), "Expected: %v != actual: %v", numbers, vs) i = l.IteratorAt(l.Len()) assert.Nil(i.Next()) } ================================================ FILE: go/types/list_leaf_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types type listLeafSequence struct { leafSequence } func newListLeafSequence(vrw ValueReadWriter, vs ...Value) sequence { return listLeafSequence{newLeafSequenceFromValues(ListKind, vrw, vs...)} } // sequence interface func (ll listLeafSequence) getCompareFn(other sequence) compareFn { return ll.getCompareFnHelper(other.(listLeafSequence).leafSequence) } ================================================ FILE: go/types/list_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "math/rand" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) const testListSize = 5000 type testList ValueSlice func (tl testList) AsValuables() []Valuable { vs := make([]Valuable, len(tl)) for i, v := range tl { vs[i] = v } return vs } func (tl testList) Set(idx int, v Value) (res testList) { return tl.Splice(idx, 1, v) } func (tl testList) Insert(idx int, vs ...Value) testList { return tl.Splice(idx, 0, vs...) } func (tl testList) Remove(start, end int) testList { return tl.Splice(start, end-start) } func (tl testList) RemoveAt(idx int) testList { return tl.Splice(idx, 1) } func (tl testList) Splice(idx int, remove int, insert ...Value) (res testList) { res = append(res, tl[:idx]...) res = append(res, insert...) res = append(res, tl[idx+remove:]...) return } func (tl testList) Diff(last testList) []Splice { // Note: this could be use tl.toList/last.toList and then tlList.Diff(lastList) // but the purpose of this method is to be redundant. return calcSplices(uint64(len(last)), uint64(len(tl)), DEFAULT_MAX_SPLICE_MATRIX_SIZE, func(i uint64, j uint64) bool { return last[i] == tl[j] }) } func (tl testList) toList(vrw ValueReadWriter) List { return NewList(vrw, tl...) } func newTestList(length int) testList { return generateNumbersAsValues(length) } func newTestListFromList(list List) testList { tl := testList{} list.IterAll(func(v Value, idx uint64) { tl = append(tl, v) }) return tl } func validateList(t *testing.T, vrw ValueReadWriter, l List, values ValueSlice) { assert.True(t, l.Equals(NewList(vrw, values...))) out := ValueSlice{} l.IterAll(func(v Value, idx uint64) { out = append(out, v) }) assert.True(t, out.Equals(values)) } type listTestSuite struct { collectionTestSuite elems testList } func newListTestSuite(size uint, expectChunkCount int, expectPrependChunkDiff int, expectAppendChunkDiff int) *listTestSuite { vrw := newTestValueStore() length := 1 << size elems := newTestList(length) tr := MakeListType(NumberType) list := NewList(vrw, elems...) return &listTestSuite{ collectionTestSuite: collectionTestSuite{ col: list, expectType: tr, expectLen: uint64(length), expectChunkCount: expectChunkCount, expectPrependChunkDiff: expectPrependChunkDiff, expectAppendChunkDiff: expectAppendChunkDiff, validate: func(v2 Collection) bool { l2 := v2.(List) out := ValueSlice{} l2.IterAll(func(v Value, index uint64) { out = append(out, v) }) return ValueSlice(elems).Equals(out) }, prependOne: func() Collection { dup := make([]Value, length+1) dup[0] = Number(0) copy(dup[1:], elems) return NewList(vrw, dup...) }, appendOne: func() Collection { dup := make([]Value, length+1) copy(dup, elems) dup[len(dup)-1] = Number(0) return NewList(vrw, dup...) }, }, elems: elems, } } func (suite *listTestSuite) TestGet() { list := suite.col.(List) for i := 0; i < len(suite.elems); i++ { suite.True(suite.elems[i].Equals(list.Get(uint64(i)))) } suite.Equal(suite.expectLen, list.Len()) } func (suite *listTestSuite) TestIter() { list := suite.col.(List) expectIdx := uint64(0) endAt := suite.expectLen / 2 list.Iter(func(v Value, idx uint64) bool { suite.Equal(expectIdx, idx) expectIdx++ suite.Equal(suite.elems[idx], v) return expectIdx == endAt }) suite.Equal(endAt, expectIdx) } func (suite *listTestSuite) TestIterRange() { list := suite.col.(List) for s := uint64(0); s < 6; s++ { batchSize := list.Len() / (2 << s) expectIdx := uint64(0) for i := uint64(0); i < list.Len(); i += batchSize { list.IterRange(i, i+batchSize, func(v Value, idx uint64) { suite.Equal(expectIdx, idx) expectIdx++ suite.Equal(suite.elems[idx], v) }) } } } func TestListSuite4K(t *testing.T) { suite.Run(t, newListTestSuite(12, 8, 2, 2)) } func TestListSuite8K(t *testing.T) { suite.Run(t, newListTestSuite(14, 22, 2, 2)) } func TestListInsert(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) tl := newTestList(1024) list := tl.toList(vrw) for i := 0; i < len(tl); i += 16 { tl = tl.Insert(i, Number(i)) list = list.Edit().Insert(uint64(i), Number(i)).List() } assert.True(tl.toList(vrw).Equals(list)) } func TestListRemove(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) tl := newTestList(1024) list := tl.toList(vrw) for i := len(tl) - 16; i >= 0; i -= 16 { tl = tl.Remove(i, i+4) list = list.Edit().Remove(uint64(i), uint64(i+4)).List() } assert.True(tl.toList(vrw).Equals(list)) } func TestListRemoveAt(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() l0 := NewList(vrw) l0 = l0.Edit().Append(Bool(false), Bool(true)).List() l1 := l0.Edit().RemoveAt(1).List() assert.True(NewList(vrw, Bool(false)).Equals(l1)) l1 = l1.Edit().RemoveAt(0).List() assert.True(NewList(vrw).Equals(l1)) assert.Panics(func() { l1.Edit().RemoveAt(0).List() }) } func getTestListLen() uint64 { return uint64(64) * 50 } func getTestList() testList { return getTestListWithLen(int(getTestListLen())) } func getTestListWithLen(length int) testList { s := rand.NewSource(42) values := make([]Value, length) for i := 0; i < length; i++ { values[i] = Number(s.Int63() & 0xff) } return values } func getTestListUnique() testList { length := int(getTestListLen()) s := rand.NewSource(42) uniques := map[int64]bool{} for len(uniques) < length { uniques[s.Int63()] = true } values := make([]Value, 0, length) for k := range uniques { values = append(values, Number(k)) } return values } func testListFromNomsList(list List) testList { simple := make(testList, list.Len()) list.IterAll(func(v Value, offset uint64) { simple[offset] = v }) return simple } func TestStreamingListCreation(t *testing.T) { smallTestChunks() defer normalProductionChunks() if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) vs := newTestValueStore() simpleList := getTestList() cl := NewList(vs, simpleList...) valueChan := make(chan Value) listChan := NewStreamingList(vs, valueChan) for _, v := range simpleList { valueChan <- v } close(valueChan) sl := <-listChan assert.True(cl.Equals(sl)) cl.Iter(func(v Value, idx uint64) (done bool) { done = !assert.True(v.Equals(sl.Get(idx))) return }) } func TestListAppend(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) newList := func(items testList) List { return NewList(vrw, items...) } listToSimple := func(cl List) (simple testList) { cl.IterAll(func(v Value, offset uint64) { simple = append(simple, v) }) return } cl := newList(getTestList()) cl2 := cl.Edit().Append(Number(42)).List() cl3 := cl2.Edit().Append(Number(43)).List() cl4 := cl3.Edit().Append(getTestList().AsValuables()...).List() cl5 := cl4.Edit().Append(Number(44), Number(45)).List() cl6 := cl5.Edit().Append(getTestList().AsValuables()...).List() expected := getTestList() assert.Equal(expected, listToSimple(cl)) assert.Equal(getTestListLen(), cl.Len()) assert.True(newList(expected).Equals(cl)) expected = append(expected, Number(42)) assert.Equal(expected, listToSimple(cl2)) assert.Equal(getTestListLen()+1, cl2.Len()) assert.True(newList(expected).Equals(cl2)) expected = append(expected, Number(43)) assert.Equal(expected, listToSimple(cl3)) assert.Equal(getTestListLen()+2, cl3.Len()) assert.True(newList(expected).Equals(cl3)) expected = append(expected, getTestList()...) assert.Equal(expected, listToSimple(cl4)) assert.Equal(2*getTestListLen()+2, cl4.Len()) assert.True(newList(expected).Equals(cl4)) expected = append(expected, Number(44), Number(45)) assert.Equal(expected, listToSimple(cl5)) assert.Equal(2*getTestListLen()+4, cl5.Len()) assert.True(newList(expected).Equals(cl5)) expected = append(expected, getTestList()...) assert.Equal(expected, listToSimple(cl6)) assert.Equal(3*getTestListLen()+4, cl6.Len()) assert.True(newList(expected).Equals(cl6)) } func TestListValidateInsertAscending(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() values := generateNumbersAsValues(1000) s := NewList(vrw) for i, v := range values { s = s.Edit().Insert(uint64(i), v).List() validateList(t, vrw, s, values[0:i+1]) } } func TestListValidateInsertAtZero(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() values := generateNumbersAsValues(1000) s := NewList(vrw) count := len(values) for count > 0 { count-- v := values[count] s = s.Edit().Insert(uint64(0), v).List() validateList(t, vrw, s, values[count:]) } } func TestListInsertNothing(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() cl := getTestList().toList(vrw) assert.True(cl.Equals(cl.Edit().Insert(0).List())) for i := uint64(1); i < getTestListLen(); i *= 2 { assert.True(cl.Equals(cl.Edit().Insert(i).List())) } assert.True(cl.Equals(cl.Edit().Insert(cl.Len() - 1).List())) assert.True(cl.Equals(cl.Edit().Insert(cl.Len()).List())) } func TestListInsertStart(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() cl := getTestList().toList(vrw) cl2 := cl.Edit().Insert(0, Number(42)).List() cl3 := cl2.Edit().Insert(0, Number(43)).List() cl4 := cl3.Edit().Insert(0, getTestList().AsValuables()...).List() cl5 := cl4.Edit().Insert(0, Number(44), Number(45)).List() cl6 := cl5.Edit().Insert(0, getTestList().AsValuables()...).List() expected := getTestList() assert.Equal(expected, testListFromNomsList(cl)) assert.Equal(getTestListLen(), cl.Len()) assert.True(expected.toList(vrw).Equals(cl)) expected = expected.Insert(0, Number(42)) assert.Equal(expected, testListFromNomsList(cl2)) assert.Equal(getTestListLen()+1, cl2.Len()) assert.True(expected.toList(vrw).Equals(cl2)) expected = expected.Insert(0, Number(43)) assert.Equal(expected, testListFromNomsList(cl3)) assert.Equal(getTestListLen()+2, cl3.Len()) assert.True(expected.toList(vrw).Equals(cl3)) expected = expected.Insert(0, getTestList()...) assert.Equal(expected, testListFromNomsList(cl4)) assert.Equal(2*getTestListLen()+2, cl4.Len()) assert.True(expected.toList(vrw).Equals(cl4)) expected = expected.Insert(0, Number(44), Number(45)) assert.Equal(expected, testListFromNomsList(cl5)) assert.Equal(2*getTestListLen()+4, cl5.Len()) assert.True(expected.toList(vrw).Equals(cl5)) expected = expected.Insert(0, getTestList()...) assert.Equal(expected, testListFromNomsList(cl6)) assert.Equal(3*getTestListLen()+4, cl6.Len()) assert.True(expected.toList(vrw).Equals(cl6)) } func TestListInsertMiddle(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() cl := getTestList().toList(vrw) cl2 := cl.Edit().Insert(100, Number(42)).List() cl3 := cl2.Edit().Insert(200, Number(43)).List() cl4 := cl3.Edit().Insert(300, getTestList().AsValuables()...).List() cl5 := cl4.Edit().Insert(400, Number(44), Number(45)).List() cl6 := cl5.Edit().Insert(500, getTestList().AsValuables()...).List() cl7 := cl6.Edit().Insert(600, Number(100)).List() expected := getTestList() assert.Equal(expected, testListFromNomsList(cl)) assert.Equal(getTestListLen(), cl.Len()) assert.True(expected.toList(vrw).Equals(cl)) expected = expected.Insert(100, Number(42)) assert.Equal(expected, testListFromNomsList(cl2)) assert.Equal(getTestListLen()+1, cl2.Len()) assert.True(expected.toList(vrw).Equals(cl2)) expected = expected.Insert(200, Number(43)) assert.Equal(expected, testListFromNomsList(cl3)) assert.Equal(getTestListLen()+2, cl3.Len()) assert.True(expected.toList(vrw).Equals(cl3)) expected = expected.Insert(300, getTestList()...) assert.Equal(expected, testListFromNomsList(cl4)) assert.Equal(2*getTestListLen()+2, cl4.Len()) assert.True(expected.toList(vrw).Equals(cl4)) expected = expected.Insert(400, Number(44), Number(45)) assert.Equal(expected, testListFromNomsList(cl5)) assert.Equal(2*getTestListLen()+4, cl5.Len()) assert.True(expected.toList(vrw).Equals(cl5)) expected = expected.Insert(500, getTestList()...) assert.Equal(expected, testListFromNomsList(cl6)) assert.Equal(3*getTestListLen()+4, cl6.Len()) assert.True(expected.toList(vrw).Equals(cl6)) expected = expected.Insert(600, Number(100)) assert.Equal(expected, testListFromNomsList(cl7)) assert.Equal(3*getTestListLen()+5, cl7.Len()) assert.True(expected.toList(vrw).Equals(cl7)) } func TestListInsertRanges(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() testList := getTestList() whole := testList.toList(vrw) // Compare list equality. Increment by 256 (16^2) because each iteration requires building a new list, which is slow. for incr, i := 256, 0; i < len(testList)-incr; i += incr { for window := 1; window <= incr; window *= 16 { testListPart := testList.Remove(i, i+window) actual := testListPart.toList(vrw).Edit().Insert(uint64(i), testList[i:i+window].AsValuables()...).List() assert.Equal(whole.Len(), actual.Len()) assert.True(whole.Equals(actual)) } } // Compare list length, which doesn't require building a new list every iteration, so the increment can be smaller. for incr, i := 10, 0; i < len(testList); i += incr { assert.Equal(len(testList)+incr, int(whole.Edit().Insert(uint64(i), testList[0:incr].AsValuables()...).List().Len())) } } func TestListRemoveNothing(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() cl := getTestList().toList(vrw) assert.True(cl.Equals(cl.Edit().Remove(0, 0).List())) for i := uint64(1); i < getTestListLen(); i *= 2 { assert.True(cl.Equals(cl.Edit().Remove(i, i).List())) } assert.True(cl.Equals(cl.Edit().Remove(cl.Len()-1, cl.Len()-1).List())) assert.True(cl.Equals(cl.Edit().Remove(cl.Len(), cl.Len()).List())) } func TestListRemoveEverything(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() cl := getTestList().toList(vrw).Edit().Remove(0, getTestListLen()).List() assert.True(NewList(vrw).Equals(cl)) assert.Equal(0, int(cl.Len())) } func TestListRemoveAtMiddle(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() cl := getTestList().toList(vrw) cl2 := cl.Edit().RemoveAt(100).List() cl3 := cl2.Edit().RemoveAt(200).List() expected := getTestList() assert.Equal(expected, testListFromNomsList(cl)) assert.Equal(getTestListLen(), cl.Len()) assert.True(expected.toList(vrw).Equals(cl)) expected = expected.RemoveAt(100) assert.Equal(expected, testListFromNomsList(cl2)) assert.Equal(getTestListLen()-1, cl2.Len()) assert.True(expected.toList(vrw).Equals(cl2)) expected = expected.RemoveAt(200) assert.Equal(expected, testListFromNomsList(cl3)) assert.Equal(getTestListLen()-2, cl3.Len()) assert.True(expected.toList(vrw).Equals(cl3)) } func TestListRemoveRanges(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() testList := getTestList() whole := testList.toList(vrw) // Compare list equality. Increment by 256 (16^2) because each iteration requires building a new list, which is slow. for incr, i := 256, 0; i < len(testList)-incr; i += incr { for window := 1; window <= incr; window *= 16 { testListPart := testList.Remove(i, i+window) expected := testListPart.toList(vrw) actual := whole.Edit().Remove(uint64(i), uint64(i+window)).List() assert.Equal(expected.Len(), actual.Len()) assert.True(expected.Equals(actual)) } } // Compare list length, which doesn't require building a new list every iteration, so the increment can be smaller. for incr, i := 10, 0; i < len(testList)-incr; i += incr { assert.Equal(len(testList)-incr, int(whole.Edit().Remove(uint64(i), uint64(i+incr)).List().Len())) } } func TestListRemoveAtEnd(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() tl := getTestListWithLen(testListSize / 10) cl := tl.toList(vrw) for i := len(tl) - 1; i >= 0; i-- { cl = cl.Edit().Remove(uint64(i), uint64(i+1)).List() expect := tl[0:i].toList(vrw) assert.True(expect.Equals(cl)) } } func TestListSet(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() testList := getTestList() cl := testList.toList(vrw) testIdx := func(idx int, testEquality bool) { newVal := Number(-1) // Test values are never < 0 cl2 := cl.Edit().Set(uint64(idx), newVal).List() assert.False(cl.Equals(cl2)) if testEquality { assert.True(testList.Set(idx, newVal).toList(vrw).Equals(cl2)) } } // Compare list equality. Increment by 100 because each iteration requires building a new list, which is slow, but always test the last index. for incr, i := 100, 0; i < len(testList); i += incr { testIdx(i, true) } testIdx(len(testList)-1, true) // Compare list unequality, which doesn't require building a new list every iteration, so the increment can be smaller. for incr, i := 10, 0; i < len(testList); i += incr { testIdx(i, false) } } func TestListFirstNNumbers(t *testing.T) { vrw := newTestValueStore() nums := generateNumbersAsValues(testListSize) NewList(vrw, nums...) } func TestListRefOfStructFirstNNumbers(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } vrw := newTestValueStore() nums := generateNumbersAsRefOfStructs(vrw, testListSize) NewList(vrw, nums...) } func TestListModifyAfterRead(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() list := getTestList().toList(vs) // Drop chunk values. list = vs.ReadValue(vs.WriteValue(list).TargetHash()).(List) // Modify/query. Once upon a time this would crash. llen := list.Len() z := list.Get(0) list = list.Edit().RemoveAt(0).List() assert.Equal(llen-1, list.Len()) list = list.Edit().Append(z).List() assert.Equal(llen, list.Len()) } func accumulateDiffSplices(l1, l2 List) (diff []Splice) { diffChan := make(chan Splice) go func() { l1.Diff(l2, diffChan, nil) close(diffChan) }() for splice := range diffChan { diff = append(diff, splice) } return } func accumulateDiffSplicesWithLimit(l1, l2 List, maxSpliceMatrixSize uint64) (diff []Splice) { diffChan := make(chan Splice) go func() { l1.DiffWithLimit(l2, diffChan, nil, maxSpliceMatrixSize) close(diffChan) }() for splice := range diffChan { diff = append(diff, splice) } return diff } func TestListDiffIdentical(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums := generateNumbersAsValues(5) l1 := NewList(vrw, nums...) l2 := NewList(vrw, nums...) diff1 := accumulateDiffSplices(l1, l2) diff2 := accumulateDiffSplices(l2, l1) assert.Equal(0, len(diff1)) assert.Equal(0, len(diff2)) } func TestListDiffVersusEmpty(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := generateNumbersAsValues(5) l1 := NewList(vrw, nums1...) l2 := NewList(vrw) diff1 := accumulateDiffSplices(l1, l2) diff2 := accumulateDiffSplices(l2, l1) assert.Equal(len(diff2), len(diff1)) diffExpected := []Splice{ {0, 0, 5, 0}, } assert.Equal(diffExpected, diff1, "expected diff is wrong") } func TestListDiffReverse(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := generateNumbersAsValues(5000) nums2 := reverseValues(nums1) l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff1 := accumulateDiffSplices(l1, l2) diff2 := accumulateDiffSplices(l2, l1) diffExpected := []Splice{ {0, 5000, 5000, 0}, } assert.Equal(diffExpected, diff1, "expected diff is wrong") assert.Equal(diffExpected, diff2, "expected diff is wrong") } func TestListDiffReverseWithLargerLimit(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := generateNumbersAsValues(5000) nums2 := reverseValues(nums1) l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff1 := accumulateDiffSplicesWithLimit(l1, l2, 27e6) diff2 := accumulateDiffSplicesWithLimit(l2, l1, 27e6) assert.Equal(len(diff2), len(diff1)) diffExpected := []Splice{ {0, 2499, 2500, 0}, {2500, 2500, 2499, 2501}, } assert.Equal(diffExpected, diff1, "expected diff is wrong") assert.Equal(diffExpected, diff2, "expected diff is wrong") } func TestListDiffRemove5x100(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := generateNumbersAsValues(5000) nums2 := generateNumbersAsValues(5000) for count := 5; count > 0; count-- { nums2 = spliceValues(nums2, (count-1)*1000, 100) } l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff1 := accumulateDiffSplices(l1, l2) diff2 := accumulateDiffSplices(l2, l1) assert.Equal(len(diff1), len(diff2)) diff2Expected := []Splice{ {0, 100, 0, 0}, {1000, 100, 0, 0}, {2000, 100, 0, 0}, {3000, 100, 0, 0}, {4000, 100, 0, 0}, } assert.Equal(diff2Expected, diff2, "expected diff is wrong") } func TestListDiffAdd5x5(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := generateNumbersAsValues(5000) nums2 := generateNumbersAsValues(5000) for count := 5; count > 0; count-- { nums2 = spliceValues(nums2, (count-1)*1000, 0, Number(0), Number(1), Number(2), Number(3), Number(4)) } l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff1 := accumulateDiffSplices(l1, l2) diff2 := accumulateDiffSplices(l2, l1) assert.Equal(len(diff1), len(diff2)) diff2Expected := []Splice{ {5, 0, 5, 5}, {1000, 0, 5, 1005}, {2000, 0, 5, 2010}, {3000, 0, 5, 3015}, {4000, 0, 5, 4020}, } assert.Equal(diff2Expected, diff2, "expected diff is wrong") } func TestListDiffReplaceReverse5x100(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := generateNumbersAsValues(5000) nums2 := generateNumbersAsValues(5000) for count := 5; count > 0; count-- { out := reverseValues(nums2[(count-1)*1000 : (count-1)*1000+100]) nums2 = spliceValues(nums2, (count-1)*1000, 100, out...) } l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff := accumulateDiffSplices(l2, l1) diffExpected := []Splice{ {0, 49, 50, 0}, {50, 50, 49, 51}, {1000, 49, 50, 1000}, {1050, 50, 49, 1051}, {2000, 49, 50, 2000}, {2050, 50, 49, 2051}, {3000, 49, 50, 3000}, {3050, 50, 49, 3051}, {4000, 49, 50, 4000}, {4050, 50, 49, 4051}, } assert.Equal(diffExpected, diff, "expected diff is wrong") } func TestListDiffString1(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := []Value{String("one"), String("two"), String("three")} nums2 := []Value{String("one"), String("two"), String("three")} l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff := accumulateDiffSplices(l2, l1) assert.Equal(0, len(diff)) } func TestListDiffString2(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := []Value{String("one"), String("two"), String("three")} nums2 := []Value{String("one"), String("two"), String("three"), String("four")} l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff := accumulateDiffSplices(l2, l1) diffExpected := []Splice{ {3, 0, 1, 3}, } assert.Equal(diffExpected, diff, "expected diff is wrong") } func TestListDiffString3(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) nums1 := []Value{String("one"), String("two"), String("three")} nums2 := []Value{String("one"), String("two"), String("four")} l1 := NewList(vrw, nums1...) l2 := NewList(vrw, nums2...) diff := accumulateDiffSplices(l2, l1) diffExpected := []Splice{ {2, 1, 1, 2}, } assert.Equal(diffExpected, diff, "expected diff is wrong") } func TestListDiffLargeWithSameMiddle(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) storage := &chunks.TestStorage{} cs1 := storage.NewView() vs1 := NewValueStore(cs1) nums1 := generateNumbersAsValues(4000) l1 := NewList(vs1, nums1...) hash1 := vs1.WriteValue(l1).TargetHash() vs1.Commit(vs1.Root(), vs1.Root()) refList1 := vs1.ReadValue(hash1).(List) cs2 := storage.NewView() vs2 := NewValueStore(cs2) nums2 := generateNumbersAsValuesFromToBy(5, 3550, 1) l2 := NewList(vs2, nums2...) hash2 := vs2.WriteValue(l2).TargetHash() vs2.Commit(vs1.Root(), vs1.Root()) refList2 := vs2.ReadValue(hash2).(List) // diff lists without value store diff1 := accumulateDiffSplices(l2, l1) assert.Equal(2, len(diff1)) // diff lists from value stores diff2 := accumulateDiffSplices(refList2, refList1) assert.Equal(2, len(diff2)) // diff without and with value store should be same assert.Equal(diff1, diff2) // should only read/write a "small & reasonably sized portion of the total" assert.Equal(9, cs1.Writes) assert.Equal(3, cs1.Reads) assert.Equal(9, cs2.Writes) assert.Equal(3, cs2.Reads) } func TestListDiffAllValuesInSequenceRemoved(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() newSequenceMetaTuple := func(vs ...Value) metaTuple { seq := newListLeafSequence(vrw, vs...) list := newList(seq) return newMetaTuple(vrw.WriteValue(list), orderedKeyFromInt(len(vs)), uint64(len(vs))) } m1 := newSequenceMetaTuple(Number(1), Number(2), Number(3)) m2 := newSequenceMetaTuple(Number(4), Number(5), Number(6), Number(7), Number(8)) m3 := newSequenceMetaTuple(Number(9), Number(10), Number(11), Number(12), Number(13), Number(14), Number(15)) l1 := newList(newListMetaSequence(1, []metaTuple{m1, m3}, vrw)) // [1, 2, 3][9, 10, 11, 12, 13, 14, 15] l2 := newList(newListMetaSequence(1, []metaTuple{m1, m2, m3}, vrw)) // [1, 2, 3][4, 5, 6, 7, 8][9, 10, 11, 12, 13, 14, 15] diff := accumulateDiffSplices(l2, l1) expected := []Splice{ {3, 0, 5, 3}, } assert.Equal(expected, diff) } func TestListTypeAfterMutations(t *testing.T) { smallTestChunks() defer normalProductionChunks() assert := assert.New(t) test := func(n int, c interface{}) { vrw := newTestValueStore() values := generateNumbersAsValues(n) l := NewList(vrw, values...) assert.Equal(l.Len(), uint64(n)) assert.IsType(c, l.asSequence()) assert.True(TypeOf(l).Equals(MakeListType(NumberType))) l = l.Edit().Append(String("a")).List() assert.Equal(l.Len(), uint64(n+1)) assert.IsType(c, l.asSequence()) assert.True(TypeOf(l).Equals(MakeListType(MakeUnionType(NumberType, StringType)))) l = l.Edit().Splice(l.Len()-1, 1).List() assert.Equal(l.Len(), uint64(n)) assert.IsType(c, l.asSequence()) assert.True(TypeOf(l).Equals(MakeListType(NumberType))) } test(15, listLeafSequence{}) test(1500, metaSequence{}) } func TestListRemoveLastWhenNotLoaded(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() reload := func(l List) List { return vs.ReadValue(vs.WriteValue(l).TargetHash()).(List) } tl := newTestList(1024) nl := tl.toList(vs) for len(tl) > 0 { tl = tl[:len(tl)-1] nl = reload(nl.Edit().RemoveAt(uint64(len(tl))).List()) assert.True(tl.toList(vs).Equals(nl)) } } func TestListConcat(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() reload := func(vs *ValueStore, l List) List { return vs.ReadValue(vs.WriteValue(l).TargetHash()).(List) } run := func(seed int64, size, from, to, by int) { r := rand.New(rand.NewSource(seed)) listSlice := make(testList, size) for i := range listSlice { listSlice[i] = Number(r.Intn(size)) } list := listSlice.toList(vs) for i := from; i < to; i += by { fst := reload(vs, listSlice[:i].toList(vs)) snd := reload(vs, listSlice[i:].toList(vs)) actual := fst.Concat(snd) assert.True(list.Equals(actual), "fail at %d/%d (with expected length %d, actual %d)", i, size, list.Len(), actual.Len()) } } run(0, 10, 0, 10, 1) run(1, 100, 0, 100, 1) run(2, 1000, 0, 1000, 10) run(3, 1000, 0, 100, 1) run(4, 1000, 900, 1000, 1) run(5, 1e4, 0, 1e4, 100) run(6, 1e4, 0, 1000, 10) run(7, 1e4, 1e4-1000, 1e4, 10) } func TestListConcatDifferentTypes(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() fst := generateNumbersAsValuesFromToBy(0, testListSize/2, 1) snd := generateNumbersAsStructsFromToBy(testListSize/2, testListSize, 1) var whole ValueSlice whole = append(whole, fst...) whole = append(whole, snd...) concat := NewList(vrw, fst...).Concat(NewList(vrw, snd...)) assert.True(NewList(vrw, whole...).Equals(concat)) } func TestListWithStructShouldHaveOptionalFields(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() list := NewList(vrw, NewStruct("Foo", StructData{ "a": Number(1), }), NewStruct("Foo", StructData{ "a": Number(2), "b": String("bar"), }), ) assert.True( MakeListType(MakeStructType("Foo", StructField{"a", NumberType, false}, StructField{"b", StringType, true}, ), ).Equals(TypeOf(list))) } func TestListWithNil(t *testing.T) { vrw := newTestValueStore() assert.Panics(t, func() { NewList(vrw, nil) }) assert.Panics(t, func() { NewList(vrw, Number(42), nil) }) } func TestListOfListsDoesNotWriteRoots(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() l1 := NewList(vrw, String("a"), String("b")) l2 := NewList(vrw, String("c"), String("d")) l3 := NewList(vrw, l1, l2) assert.Nil(vrw.ReadValue(l1.Hash())) assert.Nil(vrw.ReadValue(l2.Hash())) assert.Nil(vrw.ReadValue(l3.Hash())) vrw.WriteValue(l3) assert.Nil(vrw.ReadValue(l1.Hash())) assert.Nil(vrw.ReadValue(l2.Hash())) } ================================================ FILE: go/types/make_type.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "github.com/attic-labs/noms/go/d" ) func MakePrimitiveType(k NomsKind) *Type { switch k { case BoolKind: return BoolType case NumberKind: return NumberType case StringKind: return StringType case BlobKind: return BlobType case ValueKind: return ValueType case TypeKind: return TypeType } d.Chk.Fail("invalid NomsKind: %d", k) return nil } // MakeUnionType creates a new union type unless the elemTypes can be folded into a single non union type. func MakeUnionType(elemTypes ...*Type) *Type { return simplifyType(makeUnionType(elemTypes...), false) } func MakeListType(elemType *Type) *Type { return simplifyType(makeCompoundType(ListKind, elemType), false) } func MakeSetType(elemType *Type) *Type { return simplifyType(makeCompoundType(SetKind, elemType), false) } func MakeRefType(elemType *Type) *Type { return simplifyType(makeCompoundType(RefKind, elemType), false) } func MakeMapType(keyType, valType *Type) *Type { return simplifyType(makeCompoundType(MapKind, keyType, valType), false) } func MakeStructType(name string, fields ...StructField) *Type { fs := structTypeFields(fields) sort.Sort(fs) return simplifyType(makeStructType(name, fs), false) } // MakeUnionTypeIntersectStructs is a bit of strange function. It creates a // simplified union type except for structs, where it creates interesection // types. // This function will go away so do not use it! func MakeUnionTypeIntersectStructs(elemTypes ...*Type) *Type { return simplifyType(makeUnionType(elemTypes...), true) } func MakeCycleType(name string) *Type { d.PanicIfTrue(name == "") return newType(CycleDesc(name)) } func makePrimitiveType(k NomsKind) *Type { return newType(PrimitiveDesc(k)) } var BoolType = makePrimitiveType(BoolKind) var NumberType = makePrimitiveType(NumberKind) var StringType = makePrimitiveType(StringKind) var BlobType = makePrimitiveType(BlobKind) var TypeType = makePrimitiveType(TypeKind) var ValueType = makePrimitiveType(ValueKind) func makeCompoundType(kind NomsKind, elemTypes ...*Type) *Type { return newType(CompoundDesc{kind, elemTypes}) } func makeUnionType(elemTypes ...*Type) *Type { if len(elemTypes) == 1 { return elemTypes[0] } return makeCompoundType(UnionKind, elemTypes...) } func makeStructTypeQuickly(name string, fields structTypeFields) *Type { return newType(StructDesc{name, fields}) } func makeStructType(name string, fields structTypeFields) *Type { verifyStructName(name) verifyFields(fields) return makeStructTypeQuickly(name, fields) } type FieldMap map[string]*Type func MakeStructTypeFromFields(name string, fields FieldMap) *Type { fs := make(structTypeFields, len(fields)) i := 0 for k, v := range fields { fs[i] = StructField{k, v, false} i++ } sort.Sort(fs) return simplifyType(makeStructType(name, fs), false) } // StructField describes a field in a struct type. type StructField struct { Name string Type *Type Optional bool } type structTypeFields []StructField func (s structTypeFields) Len() int { return len(s) } func (s structTypeFields) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s structTypeFields) Less(i, j int) bool { return s[i].Name < s[j].Name } ================================================ FILE: go/types/map.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "fmt" "sort" "github.com/attic-labs/noms/go/d" ) type Map struct { orderedSequence } func newMap(seq orderedSequence) Map { return Map{seq} } func mapHashValueBytes(item sequenceItem, rv *rollingValueHasher) { entry := item.(mapEntry) hashValueBytes(entry.key, rv) hashValueBytes(entry.value, rv) } func NewMap(vrw ValueReadWriter, kv ...Value) Map { entries := buildMapData(kv) ch := newEmptyMapSequenceChunker(vrw) for _, entry := range entries { ch.Append(entry) } return newMap(ch.Done().(orderedSequence)) } // NewStreamingMap takes an input channel of values and returns a output // channel that will produce a finished Map. Values sent to the input channel // must be alternating keys and values. (e.g. k1, v1, k2, v2...). Moreover keys // need to be added to the channel in Noms sortorder, adding key values to the // input channel out of order will result in a panic. Once the input channel is // closed by the caller, a finished Map will be sent to the output channel. See // graph_builder.go for building collections with values that are not in order. func NewStreamingMap(vrw ValueReadWriter, kvs <-chan Value) <-chan Map { d.PanicIfTrue(vrw == nil) return newStreamingMap(vrw, kvs, func(vrw ValueReadWriter, kvs <-chan Value, outChan chan<- Map) { go readMapInput(vrw, kvs, outChan) }) } type streamingMapReadFunc func(vrw ValueReadWriter, kvs <-chan Value, outChan chan<- Map) func newStreamingMap(vrw ValueReadWriter, kvs <-chan Value, readFunc streamingMapReadFunc) <-chan Map { outChan := make(chan Map, 1) readFunc(vrw, kvs, outChan) return outChan } func readMapInput(vrw ValueReadWriter, kvs <-chan Value, outChan chan<- Map) { defer close(outChan) ch := newEmptyMapSequenceChunker(vrw) var lastK Value nextIsKey := true var k Value for v := range kvs { d.PanicIfTrue(v == nil) if nextIsKey { k = v d.PanicIfFalse(lastK == nil || lastK.Less(k)) lastK = k nextIsKey = false continue } ch.Append(mapEntry{key: k, value: v}) nextIsKey = true } outChan <- newMap(ch.Done().(orderedSequence)) } // Diff computes the diff from |last| to |m| using the top-down algorithm, // which completes as fast as possible while taking longer to return early // results than left-to-right. func (m Map) Diff(last Map, changes chan<- ValueChanged, closeChan <-chan struct{}) { if m.Equals(last) { return } orderedSequenceDiffTopDown(last.orderedSequence, m.orderedSequence, changes, closeChan) } // DiffHybrid computes the diff from |last| to |m| using a hybrid algorithm // which balances returning results early vs completing quickly, if possible. func (m Map) DiffHybrid(last Map, changes chan<- ValueChanged, closeChan <-chan struct{}) { if m.Equals(last) { return } orderedSequenceDiffBest(last.orderedSequence, m.orderedSequence, changes, closeChan) } // DiffLeftRight computes the diff from |last| to |m| using a left-to-right // streaming approach, optimised for returning results early, but not // completing quickly. func (m Map) DiffLeftRight(last Map, changes chan<- ValueChanged, closeChan <-chan struct{}) { if m.Equals(last) { return } orderedSequenceDiffLeftRight(last.orderedSequence, m.orderedSequence, changes, closeChan) } // Collection interface func (m Map) asSequence() sequence { return m.orderedSequence } // Value interface func (m Map) Value() Value { return m } func (m Map) WalkValues(cb ValueCallback) { iterAll(m, func(v Value, idx uint64) { cb(v) }) return } func (m Map) firstOrLast(last bool) (Value, Value) { cur := newCursorAt(m.orderedSequence, emptyKey, false, last) if !cur.valid() { return nil, nil } entry := cur.current().(mapEntry) return entry.key, entry.value } func (m Map) First() (Value, Value) { return m.firstOrLast(false) } func (m Map) Last() (Value, Value) { return m.firstOrLast(true) } func (m Map) At(idx uint64) (key, value Value) { if idx >= m.Len() { panic(fmt.Errorf("Out of bounds: %d >= %d", idx, m.Len())) } cur := newCursorAtIndex(m.orderedSequence, idx) entry := cur.current().(mapEntry) return entry.key, entry.value } func (m Map) MaybeGet(key Value) (v Value, ok bool) { cur := newCursorAtValue(m.orderedSequence, key, false, false) if !cur.valid() { return nil, false } entry := cur.current().(mapEntry) if !entry.key.Equals(key) { return nil, false } return entry.value, true } func (m Map) Has(key Value) bool { cur := newCursorAtValue(m.orderedSequence, key, false, false) if !cur.valid() { return false } entry := cur.current().(mapEntry) return entry.key.Equals(key) } func (m Map) Get(key Value) Value { v, _ := m.MaybeGet(key) return v } type mapIterCallback func(key, value Value) (stop bool) func (m Map) Iter(cb mapIterCallback) { cur := newCursorAt(m.orderedSequence, emptyKey, false, false) cur.iter(func(v interface{}) bool { entry := v.(mapEntry) return cb(entry.key, entry.value) }) } // Any returns true if cb() return true for any of the items in the map. func (m Map) Any(cb func(k, v Value) bool) (yep bool) { m.Iter(func(k, v Value) bool { if cb(k, v) { yep = true return true } return false }) return } func (m Map) Iterator() *MapIterator { return m.IteratorAt(0) } func (m Map) IteratorAt(pos uint64) *MapIterator { return &MapIterator{ cursor: newCursorAtIndex(m.orderedSequence, pos), } } func (m Map) IteratorFrom(key Value) *MapIterator { return &MapIterator{ cursor: newCursorAtValue(m.orderedSequence, key, false, false), } } type mapIterAllCallback func(key, value Value) func (m Map) IterAll(cb mapIterAllCallback) { var k Value iterAll(m, func(v Value, idx uint64) { if k != nil { cb(k, v) k = nil } else { k = v } }) d.PanicIfFalse(k == nil) } func (m Map) IterFrom(start Value, cb mapIterCallback) { cur := newCursorAtValue(m.orderedSequence, start, false, false) cur.iter(func(v interface{}) bool { entry := v.(mapEntry) return cb(entry.key, entry.value) }) } func (m Map) Edit() *MapEditor { return NewMapEditor(m) } func buildMapData(values []Value) mapEntrySlice { if len(values) == 0 { return mapEntrySlice{} } if len(values)%2 != 0 { d.Panic("Must specify even number of key/value pairs") } kvs := make(mapEntrySlice, len(values)/2) for i := 0; i < len(values); i += 2 { d.PanicIfTrue(values[i] == nil) d.PanicIfTrue(values[i+1] == nil) entry := mapEntry{values[i], values[i+1]} kvs[i/2] = entry } uniqueSorted := make(mapEntrySlice, 0, len(kvs)) sort.Stable(kvs) last := kvs[0] for i := 1; i < len(kvs); i++ { kv := kvs[i] if !kv.key.Equals(last.key) { uniqueSorted = append(uniqueSorted, last) } last = kv } return append(uniqueSorted, last) } func makeMapLeafChunkFn(vrw ValueReadWriter) makeChunkFn { return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { d.PanicIfFalse(level == 0) mapData := make([]mapEntry, len(items), len(items)) var lastKey Value for i, v := range items { entry := v.(mapEntry) d.PanicIfFalse(lastKey == nil || lastKey.Less(entry.key)) lastKey = entry.key mapData[i] = entry } m := newMap(newMapLeafSequence(vrw, mapData...)) var key orderedKey if len(mapData) > 0 { key = newOrderedKey(mapData[len(mapData)-1].key) } return m, key, uint64(len(items)) } } func newEmptyMapSequenceChunker(vrw ValueReadWriter) *sequenceChunker { return newEmptySequenceChunker(vrw, makeMapLeafChunkFn(vrw), newOrderedMetaSequenceChunkFn(MapKind, vrw), mapHashValueBytes) } ================================================ FILE: go/types/map_editor.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "github.com/attic-labs/noms/go/d" ) // MapEditor allows for efficient editing of Map-typed prolly trees. Edits // are buffered to memory and can be applied via Build(), which returns a new // Map. Prior to Build(), Get() & Has() will return the value that the resulting // Map would return if it were built immediately prior to the respective call. // Note: The implementation biases performance towards a usage which applies // edits in key-order. type MapEditor struct { m Map edits mapEditSlice // edits may contain duplicate key values, in which case, the last edit of a given key is used normalized bool } func NewMapEditor(m Map) *MapEditor { return &MapEditor{m, mapEditSlice{}, true} } func (me *MapEditor) Kind() NomsKind { return MapKind } func (me *MapEditor) Value() Value { return me.Map() } func (me *MapEditor) Map() Map { if len(me.edits) == 0 { return me.m // no edits } seq := me.m.orderedSequence vrw := seq.valueReadWriter() me.normalize() cursChan := make(chan chan *sequenceCursor) kvsChan := make(chan chan mapEntry) go func() { for i, edit := range me.edits { if i+1 < len(me.edits) && me.edits[i+1].key.Equals(edit.key) { continue // next edit supercedes this one } edit := edit // TODO: Use ReadMany cc := make(chan *sequenceCursor, 1) cursChan <- cc go func() { cc <- newCursorAtValue(seq, edit.key, true, false) }() kvc := make(chan mapEntry, 1) kvsChan <- kvc if edit.value == nil { kvc <- mapEntry{edit.key, nil} continue } if v, ok := edit.value.(Value); ok { kvc <- mapEntry{edit.key, v} continue } go func() { sv := edit.value.Value() kvc <- mapEntry{edit.key, sv} }() } close(cursChan) close(kvsChan) }() var ch *sequenceChunker for cc := range cursChan { cur := <-cc kv := <-<-kvsChan var existingValue Value if cur.idx < cur.seq.seqLen() { ckv := cur.current().(mapEntry) if ckv.key.Equals(kv.key) { existingValue = ckv.value } } if existingValue == nil && kv.value == nil { continue // already non-present } if existingValue != nil && kv.value != nil && existingValue.Equals(kv.value) { continue // same value } if ch == nil { ch = newSequenceChunker(cur, 0, vrw, makeMapLeafChunkFn(vrw), newOrderedMetaSequenceChunkFn(MapKind, vrw), mapHashValueBytes) } else { ch.advanceTo(cur) } if existingValue != nil { ch.Skip() } if kv.value != nil { ch.Append(kv) } } if ch == nil { return me.m // no edits required application } return newMap(ch.Done().(orderedSequence)) } func (me *MapEditor) Set(k Value, v Valuable) *MapEditor { d.PanicIfTrue(v == nil) me.set(k, v) return me } func (me *MapEditor) SetM(kv ...Valuable) *MapEditor { d.PanicIfFalse(len(kv)%2 == 0) for i := 0; i < len(kv); i += 2 { me.Set(kv[i].(Value), kv[i+1]) } return me } func (me *MapEditor) Remove(k Value) *MapEditor { me.set(k, nil) return me } func (me *MapEditor) Get(k Value) Valuable { if idx, found := me.findEdit(k); found { return me.edits[idx].value } return me.m.Get(k) } func (me *MapEditor) Has(k Value) bool { if idx, found := me.findEdit(k); found { return me.edits[idx].value != nil } return me.m.Has(k) } func (me *MapEditor) set(k Value, v Valuable) { if len(me.edits) == 0 { me.edits = append(me.edits, mapEdit{k, v}) return } final := me.edits[len(me.edits)-1] if final.key.Equals(k) { me.edits[len(me.edits)-1] = mapEdit{k, v} return // update the last edit } me.edits = append(me.edits, mapEdit{k, v}) if me.normalized && final.key.Less(k) { // fast-path: edits take place in key-order return } // de-normalize me.normalized = false } // Find the edit position of the last edit for a given key func (me *MapEditor) findEdit(k Value) (idx int, found bool) { me.normalize() idx = sort.Search(len(me.edits), func(i int) bool { return !me.edits[i].key.Less(k) }) if idx == len(me.edits) { return } if !me.edits[idx].key.Equals(k) { return } // advance to final edit position where kv.key == k for idx < len(me.edits) && me.edits[idx].key.Equals(k) { idx++ } idx-- found = true return } func (me *MapEditor) normalize() { if me.normalized { return } sort.Stable(me.edits) // TODO: GC duplicate keys over some threshold of collectable memory? me.normalized = true } type mapEdit struct { key Value value Valuable } type mapEditSlice []mapEdit func (mes mapEditSlice) Len() int { return len(mes) } func (mes mapEditSlice) Swap(i, j int) { mes[i], mes[j] = mes[j], mes[i] } func (mes mapEditSlice) Less(i, j int) bool { return mes[i].key.Less(mes[j].key) } ================================================ FILE: go/types/map_iterator.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types // MapIterator can efficiently iterate through a Noms Map. type MapIterator struct { cursor *sequenceCursor currentKey Value currentValue Value } func (mi *MapIterator) Valid() bool { return mi.cursor.valid() } func (mi *MapIterator) Entry() (k Value, v Value) { return mi.Key(), mi.Value() } func (mi *MapIterator) Key() Value { if !mi.cursor.valid() { return nil } return mi.cursor.current().(mapEntry).key } func (mi *MapIterator) Value() Value { if !mi.cursor.valid() { return nil } return mi.cursor.current().(mapEntry).value } func (mi *MapIterator) Position() uint64 { if !mi.cursor.valid() { return 0 } return uint64(mi.cursor.idx) } // Prev returns the previous entry from the Map. If there is no previous entry, Prev() returns nils. func (mi *MapIterator) Prev() bool { if !mi.cursor.valid() { return false } return mi.cursor.retreat() } // Next returns the subsequent entries from the Map, starting with the entry at which the iterator // was created. If there are no more entries, Next() returns nils. func (mi *MapIterator) Next() bool { if !mi.cursor.valid() { return false } return mi.cursor.advance() } ================================================ FILE: go/types/map_iterator_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "fmt" "testing" "github.com/stretchr/testify/assert" ) func TestMapIterator(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() me := NewMap(vrw).Edit() for i := 0; i < 5; i++ { me.Set(String(string(byte(65+i))), Number(i)) } m := me.Map() tc := []struct { reverse bool iter bool iterAt uint64 iterFrom string expected []string }{ {false, true, 0, "", []string{"A", "B", "C", "D", "E"}}, {false, false, 0, "", []string{"A", "B", "C", "D", "E"}}, {false, false, 2, "", []string{"C", "D", "E"}}, {false, false, 4, "", []string{"E"}}, {false, false, 5, "", []string{}}, {false, false, 0, "A", []string{"A", "B", "C", "D", "E"}}, {false, false, 0, "C", []string{"C", "D", "E"}}, {false, false, 4, "E", []string{"E"}}, {false, false, 0, "AA", []string{"B", "C", "D", "E"}}, {false, false, 0, "F", []string{}}, {true, false, 0, "", []string{}}, {true, true, 0, "", []string{}}, {true, false, 2, "", []string{"C", "B", "A"}}, {true, false, 4, "", []string{"E", "D", "C", "B", "A"}}, {true, false, 5, "", []string{}}, {true, false, 0, "A", []string{"A"}}, {true, false, 0, "C", []string{"C", "B", "A"}}, {true, false, 0, "E", []string{"E", "D", "C", "B", "A"}}, {true, false, 0, "AA", []string{"B", "A"}}, {true, false, 0, "F", []string{}}, } for i, t := range tc { lbl := fmt.Sprintf("test case %d", i) var it *MapIterator if t.iter { it = m.Iterator() } else if t.iterFrom != "" { it = m.IteratorFrom(String(t.iterFrom)) } else { it = m.IteratorAt(t.iterAt) } for i, e := range t.expected { lbl := fmt.Sprintf("%s: iteration %d", lbl, i) assert.True(it.Valid(), lbl) assert.Equal(e, string(it.Key().(String)), lbl) assert.True(m.Get(it.Key()).Equals(it.Value()), lbl) k, v := it.Entry() assert.Equal(e, string(k.(String)), lbl) assert.True(m.Get(it.Key()).Equals(v), lbl) assert.True(m.Get(it.Key()).Equals(Number(it.Position())), lbl) var last bool if t.reverse { last = it.Prev() } else { last = it.Next() } assert.Equal(i < len(t.expected)-1, last, lbl) assert.Equal(i < len(t.expected)-1, it.Valid(), lbl) } } } ================================================ FILE: go/types/map_leaf_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "github.com/attic-labs/noms/go/d" ) type mapLeafSequence struct { leafSequence } type mapEntry struct { key Value value Value } func (entry mapEntry) writeTo(w nomsWriter) { entry.key.writeTo(w) entry.value.writeTo(w) } func readMapEntry(r *valueDecoder) mapEntry { return mapEntry{r.readValue(), r.readValue()} } func (entry mapEntry) equals(other mapEntry) bool { return entry.key.Equals(other.key) && entry.value.Equals(other.value) } type mapEntrySlice []mapEntry func (mes mapEntrySlice) Len() int { return len(mes) } func (mes mapEntrySlice) Swap(i, j int) { mes[i], mes[j] = mes[j], mes[i] } func (mes mapEntrySlice) Less(i, j int) bool { return mes[i].key.Less(mes[j].key) } func (mes mapEntrySlice) Equals(other mapEntrySlice) bool { if mes.Len() != other.Len() { return false } for i, v := range mes { if !v.equals(other[i]) { return false } } return true } func newMapLeafSequence(vrw ValueReadWriter, data ...mapEntry) orderedSequence { d.PanicIfTrue(vrw == nil) offsets := make([]uint32, len(data)+sequencePartValues+1) w := newBinaryNomsWriter() offsets[sequencePartKind] = w.offset MapKind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(0) // level offsets[sequencePartCount] = w.offset count := uint64(len(data)) w.writeCount(count) offsets[sequencePartValues] = w.offset for i, me := range data { me.writeTo(&w) offsets[i+sequencePartValues+1] = w.offset } return mapLeafSequence{newLeafSequence(vrw, w.data(), offsets, count)} } func (ml mapLeafSequence) writeTo(w nomsWriter) { w.writeRaw(ml.buff) } // sequence interface func (ml mapLeafSequence) getItem(idx int) sequenceItem { dec := ml.decoderSkipToIndex(idx) return readMapEntry(&dec) } func (ml mapLeafSequence) WalkRefs(cb RefCallback) { walkRefs(ml.valueBytes(), cb) } func (ml mapLeafSequence) entries() mapEntrySlice { dec, count := ml.decoderSkipToValues() entries := make(mapEntrySlice, count) for i := uint64(0); i < count; i++ { entries[i] = mapEntry{dec.readValue(), dec.readValue()} } return entries } func (ml mapLeafSequence) getCompareFn(other sequence) compareFn { dec1 := ml.decoder() ml2 := other.(mapLeafSequence) dec2 := ml2.decoder() return func(idx, otherIdx int) bool { dec1.offset = uint32(ml.getItemOffset(idx)) dec2.offset = uint32(ml2.getItemOffset(otherIdx)) k1 := dec1.readValue() k2 := dec2.readValue() if !k1.Equals(k2) { return false } v1 := dec1.readValue() v2 := dec2.readValue() return v1.Equals(v2) } } func (ml mapLeafSequence) typeOf() *Type { dec, count := ml.decoderSkipToValues() kts := make(typeSlice, 0, count) vts := make(typeSlice, 0, count) var lastKeyType, lastValueType *Type for i := uint64(0); i < count; i++ { if lastKeyType != nil && lastValueType != nil { offset := dec.offset if dec.isValueSameTypeForSure(lastKeyType) && dec.isValueSameTypeForSure(lastValueType) { continue } dec.offset = offset } lastKeyType = dec.readTypeOfValue() kts = append(kts, lastKeyType) lastValueType = dec.readTypeOfValue() vts = append(vts, lastValueType) } return makeCompoundType(MapKind, makeUnionType(kts...), makeUnionType(vts...)) } // orderedSequence interface func (ml mapLeafSequence) decoderSkipToIndex(idx int) valueDecoder { offset := ml.getItemOffset(idx) return ml.decoderAtOffset(offset) } func (ml mapLeafSequence) getKey(idx int) orderedKey { dec := ml.decoderSkipToIndex(idx) return newOrderedKey(dec.readValue()) } func (ml mapLeafSequence) search(key orderedKey) int { return sort.Search(int(ml.Len()), func(i int) bool { return !ml.getKey(i).Less(key) }) } func (ml mapLeafSequence) getValue(idx int) Value { dec := ml.decoderSkipToIndex(idx) dec.skipValue() return dec.readValue() } ================================================ FILE: go/types/map_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "fmt" "math/rand" "sort" "sync" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) const testMapSize = 8000 type genValueFn func(i int) Value type testMap struct { entries mapEntrySlice knownBadKey Value } func (tm testMap) SetValue(i int, v Value) testMap { entries := make([]mapEntry, 0, len(tm.entries)) entries = append(entries, tm.entries...) entries[i].value = v return testMap{entries, tm.knownBadKey} } func (tm testMap) Remove(from, to int) testMap { entries := make([]mapEntry, 0, len(tm.entries)-(to-from)) entries = append(entries, tm.entries[:from]...) entries = append(entries, tm.entries[to:]...) return testMap{entries, tm.knownBadKey} } func (tm testMap) MaybeGet(key Value) (v Value, ok bool) { for _, entry := range tm.entries { if entry.key.Equals(key) { return entry.value, true } } return nil, false } func (tm testMap) Diff(last testMap) (added []Value, removed []Value, modified []Value) { // Note: this could be use tm.toMap/last.toMap and then tmMap.Diff(lastMap) but the // purpose of this method is to be redundant. if len(tm.entries) == 0 && len(last.entries) == 0 { return // nothing changed } if len(tm.entries) == 0 { // everything removed for _, entry := range last.entries { removed = append(removed, entry.key) } return } if len(last.entries) == 0 { // everything added for _, entry := range tm.entries { added = append(added, entry.key) } return } for _, entry := range tm.entries { otherValue, exists := last.MaybeGet(entry.key) if !exists { added = append(added, entry.key) } else if !entry.value.Equals(otherValue) { modified = append(modified, entry.key) } } for _, entry := range last.entries { _, exists := tm.MaybeGet(entry.key) if !exists { removed = append(removed, entry.key) } } return } func (tm testMap) toMap(vrw ValueReadWriter) Map { keyvals := []Value{} for _, entry := range tm.entries { keyvals = append(keyvals, entry.key, entry.value) } return NewMap(vrw, keyvals...) } func toValuable(vs ValueSlice) []Valuable { vb := make([]Valuable, len(vs)) for i, v := range vs { vb[i] = v } return vb } func (tm testMap) Flatten(from, to int) []Value { flat := make([]Value, 0, len(tm.entries)*2) for _, entry := range tm.entries[from:to] { flat = append(flat, entry.key) flat = append(flat, entry.value) } return flat } func (tm testMap) FlattenAll() []Value { return tm.Flatten(0, len(tm.entries)) } func newSortedTestMap(length int, gen genValueFn) testMap { keys := make(ValueSlice, 0, length) for i := 0; i < length; i++ { keys = append(keys, gen(i)) } sort.Sort(keys) entries := make([]mapEntry, 0, len(keys)) for i, k := range keys { entries = append(entries, mapEntry{k, Number(i * 2)}) } return testMap{entries, Number(length + 2)} } func newTestMapFromMap(m Map) testMap { entries := make([]mapEntry, 0, m.Len()) m.IterAll(func(key, value Value) { entries = append(entries, mapEntry{key, value}) }) return testMap{entries, Number(-0)} } func newRandomTestMap(length int, gen genValueFn) testMap { s := rand.NewSource(4242) used := map[int]bool{} mask := int(0xffffff) entries := make([]mapEntry, 0, length) for len(entries) < length { v := int(s.Int63()) & mask if _, ok := used[v]; !ok { entry := mapEntry{gen(v), gen(v * 2)} entries = append(entries, entry) used[v] = true } } return testMap{entries, gen(mask + 1)} } func validateMap(t *testing.T, vrw ValueReadWriter, m Map, entries mapEntrySlice) { tm := testMap{entries: entries} assert.True(t, m.Equals(tm.toMap(vrw))) out := mapEntrySlice{} m.IterAll(func(k Value, v Value) { out = append(out, mapEntry{k, v}) }) assert.True(t, out.Equals(entries)) } type mapTestSuite struct { collectionTestSuite elems testMap } func newMapTestSuite(size uint, expectChunkCount int, expectPrependChunkDiff int, expectAppendChunkDiff int, gen genValueFn) *mapTestSuite { vrw := newTestValueStore() length := 1 << size keyType := TypeOf(gen(0)) elems := newSortedTestMap(length, gen) tr := MakeMapType(keyType, NumberType) tmap := NewMap(vrw, elems.FlattenAll()...) return &mapTestSuite{ collectionTestSuite: collectionTestSuite{ col: tmap, expectType: tr, expectLen: uint64(length), expectChunkCount: expectChunkCount, expectPrependChunkDiff: expectPrependChunkDiff, expectAppendChunkDiff: expectAppendChunkDiff, validate: func(v2 Collection) bool { if v2.Len() != uint64(elems.entries.Len()) { fmt.Println("lengths not equal:", v2.Len(), elems.entries.Len()) return false } l2 := v2.(Map) idx := uint64(0) l2.Iter(func(key, value Value) (stop bool) { entry := elems.entries[idx] if !key.Equals(entry.key) { fmt.Printf("%d: %s (%s)\n!=\n%s (%s)\n", idx, EncodedValue(key), key.Hash(), EncodedValue(entry.key), entry.key.Hash()) stop = true } if !value.Equals(entry.value) { fmt.Printf("%s (%s) !=\n%s (%s)\n", EncodedValue(value), value.Hash(), EncodedValue(entry.value), entry.value.Hash()) stop = true } idx++ return }) return idx == v2.Len() }, prependOne: func() Collection { dup := make([]mapEntry, length+1) dup[0] = mapEntry{Number(-1), Number(-2)} copy(dup[1:], elems.entries) flat := []Value{} for _, entry := range dup { flat = append(flat, entry.key, entry.value) } return NewMap(vrw, flat...) }, appendOne: func() Collection { dup := make([]mapEntry, length+1) copy(dup, elems.entries) dup[len(dup)-1] = mapEntry{Number(length*2 + 1), Number((length*2 + 1) * 2)} flat := []Value{} for _, entry := range dup { flat = append(flat, entry.key, entry.value) } return NewMap(vrw, flat...) }, }, elems: elems, } } func (suite *mapTestSuite) createStreamingMap(vs *ValueStore) Map { kvChan := make(chan Value) mapChan := NewStreamingMap(vs, kvChan) for _, entry := range suite.elems.entries { kvChan <- entry.key kvChan <- entry.value } close(kvChan) return <-mapChan } func (suite *mapTestSuite) TestStreamingMap() { vs := newTestValueStore() defer vs.Close() m := suite.createStreamingMap(vs) suite.True(suite.validate(m), "map not valid") } func (suite *mapTestSuite) TestStreamingMapOrder() { vs := newTestValueStore() defer vs.Close() entries := make(mapEntrySlice, len(suite.elems.entries)) copy(entries, suite.elems.entries) entries[0], entries[1] = entries[1], entries[0] kvChan := make(chan Value, len(entries)*2) for _, e := range entries { kvChan <- e.key kvChan <- e.value } close(kvChan) readInput := func(vrw ValueReadWriter, kvs <-chan Value, outChan chan<- Map) { readMapInput(vrw, kvs, outChan) } testFunc := func() { outChan := newStreamingMap(vs, kvChan, readInput) <-outChan } suite.Panics(testFunc) } func (suite *mapTestSuite) TestStreamingMap2() { wg := sync.WaitGroup{} vs := newTestValueStore() defer vs.Close() wg.Add(2) var m1, m2 Map go func() { m1 = suite.createStreamingMap(vs) wg.Done() }() go func() { m2 = suite.createStreamingMap(vs) wg.Done() }() wg.Wait() suite.True(suite.validate(m1), "map 'm1' not valid") suite.True(suite.validate(m2), "map 'm2' not valid") } func TestMapSuite4K(t *testing.T) { suite.Run(t, newMapTestSuite(12, 4, 2, 2, newNumber)) } func TestMapSuite4KStructs(t *testing.T) { suite.Run(t, newMapTestSuite(12, 11, 2, 2, newNumberStruct)) } func newNumber(i int) Value { return Number(i) } func newNumberStruct(i int) Value { return NewStruct("", StructData{"n": Number(i)}) } func getTestNativeOrderMap(scale int, vrw ValueReadWriter) testMap { return newRandomTestMap(64*scale, newNumber) } func getTestRefValueOrderMap(scale int, vrw ValueReadWriter) testMap { return newRandomTestMap(64*scale, newNumber) } func getTestRefToNativeOrderMap(scale int, vrw ValueReadWriter) testMap { return newRandomTestMap(64*scale, func(i int) Value { return vrw.WriteValue(Number(i)) }) } func getTestRefToValueOrderMap(scale int, vrw ValueReadWriter) testMap { return newRandomTestMap(64*scale, func(i int) Value { return vrw.WriteValue(NewSet(vrw, Number(i))) }) } func accumulateMapDiffChanges(m1, m2 Map) (added []Value, removed []Value, modified []Value) { changes := make(chan ValueChanged) go func() { m1.Diff(m2, changes, nil) close(changes) }() for change := range changes { if change.ChangeType == DiffChangeAdded { added = append(added, change.Key) } else if change.ChangeType == DiffChangeRemoved { removed = append(removed, change.Key) } else { modified = append(modified, change.Key) } } return } func diffMapTest(assert *assert.Assertions, m1 Map, m2 Map, numAddsExpected int, numRemovesExpected int, numModifiedExpected int) (added []Value, removed []Value, modified []Value) { added, removed, modified = accumulateMapDiffChanges(m1, m2) assert.Equal(numAddsExpected, len(added), "num added is not as expected") assert.Equal(numRemovesExpected, len(removed), "num removed is not as expected") assert.Equal(numModifiedExpected, len(modified), "num modified is not as expected") tm1 := newTestMapFromMap(m1) tm2 := newTestMapFromMap(m2) tmAdded, tmRemoved, tmModified := tm1.Diff(tm2) assert.Equal(numAddsExpected, len(tmAdded), "num added is not as expected") assert.Equal(numRemovesExpected, len(tmRemoved), "num removed is not as expected") assert.Equal(numModifiedExpected, len(tmModified), "num modified is not as expected") assert.Equal(added, tmAdded, "map added != tmMap added") assert.Equal(removed, tmRemoved, "map removed != tmMap removed") assert.Equal(modified, tmModified, "map modified != tmMap modified") return } func TestMapDiff(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() testMap1 := newRandomTestMap(64*2, newNumber) testMap2 := newRandomTestMap(64*2, newNumber) testMapAdded, testMapRemoved, testMapModified := testMap1.Diff(testMap2) map1 := testMap1.toMap(vrw) map2 := testMap2.toMap(vrw) mapDiffAdded, mapDiffRemoved, mapDiffModified := accumulateMapDiffChanges(map1, map2) assert.Equal(t, testMapAdded, mapDiffAdded, "testMap.diff != map.diff") assert.Equal(t, testMapRemoved, mapDiffRemoved, "testMap.diff != map.diff") assert.Equal(t, testMapModified, mapDiffModified, "testMap.diff != map.diff") } func TestMapMutationReadWriteCount(t *testing.T) { // This test is a sanity check that we are reading a "reasonable" number of // sequences while mutating maps. // TODO: We are currently un-reasonable. temp := MakeStructTemplate("Foo", []string{"Bool", "Number", "String1", "String2"}) newLargeStruct := func(i int) Value { return temp.NewStruct([]Value{ Bool(i%2 == 0), Number(i), String(fmt.Sprintf("I AM A REALLY REALY REALL SUPER CALIFRAGILISTICLY CRAZY-ASSED LONGTASTIC String %d", i)), String(fmt.Sprintf("I am a bit shorter and also more chill: %d", i)), }) } ts := &chunks.TestStorage{} cs := ts.NewView() vs := newValueStoreWithCacheAndPending(cs, 0, 0) me := NewMap(vs).Edit() for i := 0; i < 10000; i++ { me.Set(Number(i), newLargeStruct(i)) } m := me.Map() r := vs.WriteValue(m) vs.Commit(vs.Root(), vs.Root()) m = r.TargetValue(vs).(Map) every := 100 me = m.Edit() for i := 0; i < 10000; i++ { if i%every == 0 { k := Number(i) s := me.Get(Number(i)).(Struct) s = s.Set("Number", Number(float64(s.Get("Number").(Number))+1)) me.Set(k, s) } i++ } cs.Writes = 0 cs.Reads = 0 m = me.Map() vs.Commit(vs.Root(), vs.Root()) assert.Equal(t, uint64(3), NewRef(m).Height()) assert.Equal(t, 105, cs.Reads) assert.Equal(t, 62, cs.Writes) } func TestMapInfiniteChunkBug(t *testing.T) { smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() keyLen := chunkWindow + 1 buff := &bytes.Buffer{} for i := uint32(0); i < keyLen; i++ { buff.WriteString("s") } prefix := buff.String() me := NewMap(vrw).Edit() for i := 0; i < 10000; i++ { me.Set(String(prefix+fmt.Sprintf("%d", i)), Number(i)) } me.Map() } func TestNewMap(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() m := NewMap(vrw) assert.Equal(uint64(0), m.Len()) m = NewMap(vrw, String("foo1"), String("bar1"), String("foo2"), String("bar2")) assert.Equal(uint64(2), m.Len()) assert.True(String("bar1").Equals(m.Get(String("foo1")))) assert.True(String("bar2").Equals(m.Get(String("foo2")))) } func TestMapUniqueKeysString(t *testing.T) { vrw := newTestValueStore() assert := assert.New(t) l := []Value{ String("hello"), String("world"), String("foo"), String("bar"), String("bar"), String("foo"), String("hello"), String("foo"), } m := NewMap(vrw, l...) assert.Equal(uint64(3), m.Len()) assert.True(String("foo").Equals(m.Get(String("hello")))) } func TestMapUniqueKeysNumber(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() l := []Value{ Number(4), Number(1), Number(0), Number(2), Number(1), Number(2), Number(3), Number(4), Number(1), Number(5), } m := NewMap(vrw, l...) assert.Equal(uint64(4), m.Len()) assert.True(Number(5).Equals(m.Get(Number(1)))) } type toTestMapFunc func(scale int, vrw ValueReadWriter) testMap func TestMapHas(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) m := tm.toMap(vrw) m2 := vrw.ReadValue(vrw.WriteValue(m).TargetHash()).(Map) for _, entry := range tm.entries { k, v := entry.key, entry.value assert.True(m.Has(k)) assert.True(m.Get(k).Equals(v)) assert.True(m2.Has(k)) assert.True(m2.Get(k).Equals(v)) } diffMapTest(assert, m, m2, 0, 0, 0) } doTest(getTestNativeOrderMap, 16) doTest(getTestRefValueOrderMap, 2) doTest(getTestRefToNativeOrderMap, 2) doTest(getTestRefToValueOrderMap, 2) } func TestMapRemoveMasksUnderlyingMap(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() k := String("foo") me := NewMap(vrw, k, String("bar")).Edit() me.Remove(k) assert.False(me.Has(k)) assert.Nil(me.Get(k)) two := Number(2) me.Set(two, two) me.Remove(two) assert.False(me.Has(two)) assert.Nil(me.Get(two)) me2 := NewMap(vrw).Edit() three := Number(3) me.Set(three, me2) me.Remove(three) assert.False(me.Has(three)) assert.Nil(me.Get(three)) } func TestMapHasRemove(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() me := NewMap(vrw).Edit() bothHave := func(k Value) bool { meHas := me.Has(k) mHas := me.Map().Has(k) assert.Equal(meHas, mHas) return meHas } assert.False(bothHave(String("a"))) me.Set(String("a"), String("a")) assert.True(bothHave(String("a"))) me.Remove(String("a")) assert.False(bothHave(String("a"))) me.Set(String("a"), String("a")) assert.True(bothHave(String("a"))) me.Set(String("a"), String("a")) assert.True(bothHave(String("a"))) // In-order insertions me.Set(String("b"), String("b")) me.Set(String("c"), String("c")) assert.True(bothHave(String("a"))) assert.True(bothHave(String("b"))) assert.True(bothHave(String("c"))) // Out-of-order insertions me.Set(String("z"), String("z")) me.Set(String("y"), String("y")) assert.True(bothHave(String("z"))) assert.True(bothHave(String("y"))) assert.True(bothHave(String("a"))) assert.True(bothHave(String("b"))) assert.True(bothHave(String("c"))) // Removals me.Remove(String("z")).Remove(String("y")).Remove(String("a")).Remove(String("b")).Remove(String("c")).Remove(String("never-inserted")) assert.False(bothHave(String("z"))) assert.False(bothHave(String("y"))) assert.False(bothHave(String("a"))) assert.False(bothHave(String("b"))) assert.False(bothHave(String("c"))) m := me.Map() assert.True(m.Len() == 0) } func TestMapRemove(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(incr int, toTestMap toTestMapFunc, scale int) { vs := newTestValueStore() tm := toTestMap(scale, vs) whole := tm.toMap(vs) run := func(i int) { expected := tm.Remove(i, i+1).toMap(vs) actual := whole.Edit().Remove(tm.entries[i].key).Map() assert.Equal(expected.Len(), actual.Len()) assert.True(expected.Equals(actual)) diffMapTest(assert, expected, actual, 0, 0, 0) } for i := 0; i < len(tm.entries); i += incr { run(i) } run(len(tm.entries) - 1) } doTest(128, getTestNativeOrderMap, 32) doTest(64, getTestRefValueOrderMap, 4) doTest(64, getTestRefToNativeOrderMap, 4) doTest(64, getTestRefToValueOrderMap, 4) } func TestMapRemoveNonexistentKey(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() tm := getTestNativeOrderMap(2, vrw) original := tm.toMap(vrw) actual := original.Edit().Remove(Number(-1)).Map() // rand.Int63 returns non-negative numbers. assert.Equal(original.Len(), actual.Len()) assert.True(original.Equals(actual)) } func TestMapFirst(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() m1 := NewMap(vrw) k, v := m1.First() assert.Nil(k) assert.Nil(v) m1 = m1.Edit().Set(String("foo"), String("bar")).Set(String("hot"), String("dog")).Map() ak, av := m1.First() var ek, ev Value m1.Iter(func(k, v Value) (stop bool) { ek, ev = k, v return true }) assert.True(ek.Equals(ak)) assert.True(ev.Equals(av)) } func TestMapFirst2(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() doTest := func(toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) m := tm.toMap(vrw) sort.Stable(tm.entries) actualKey, actualValue := m.First() assert.True(tm.entries[0].key.Equals(actualKey)) assert.True(tm.entries[0].value.Equals(actualValue)) } doTest(getTestNativeOrderMap, 16) doTest(getTestRefValueOrderMap, 2) doTest(getTestRefToNativeOrderMap, 2) doTest(getTestRefToValueOrderMap, 2) } func TestMapLast(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() m1 := NewMap(vrw) k, v := m1.First() assert.Nil(k) assert.Nil(v) m1 = m1.Edit().Set(String("foo"), String("bar")).Set(String("hot"), String("dog")).Map() ak, av := m1.Last() var ek, ev Value m1.Iter(func(k, v Value) (stop bool) { ek, ev = k, v return false }) assert.True(ek.Equals(ak)) assert.True(ev.Equals(av)) } func TestMapLast2(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) smallTestChunks() defer normalProductionChunks() doTest := func(toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) m := tm.toMap(vrw) sort.Stable(tm.entries) actualKey, actualValue := m.Last() assert.True(tm.entries[len(tm.entries)-1].key.Equals(actualKey)) assert.True(tm.entries[len(tm.entries)-1].value.Equals(actualValue)) } doTest(getTestNativeOrderMap, 16) doTest(getTestRefValueOrderMap, 2) doTest(getTestRefToNativeOrderMap, 2) doTest(getTestRefToValueOrderMap, 2) } func TestMapSetGet(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() me := NewMap(vrw).Edit() bothAre := func(k Value) Value { meV := me.Get(k) mV := me.Map().Get(k) assert.True((meV == nil && mV == nil) || meV.(Value).Equals(mV)) return mV } assert.Nil(bothAre(String("a"))) me.Set(String("a"), Number(42)) assert.True(Number(42).Equals(bothAre(String("a")))) me.Set(String("a"), Number(43)) assert.True(Number(43).Equals(bothAre(String("a")))) me.Remove(String("a")) assert.Nil(bothAre(String("a"))) // in-order insertions me.Set(String("b"), Number(43)) me.Set(String("c"), Number(44)) assert.True(Number(43).Equals(bothAre(String("b")))) assert.True(Number(44).Equals(bothAre(String("c")))) // out-of-order insertions me.Set(String("z"), Number(0)) me.Set(String("y"), Number(1)) assert.True(Number(0).Equals(bothAre(String("z")))) assert.True(Number(1).Equals(bothAre(String("y")))) // removals me.Remove(String("z")) me.Remove(String("a")) me.Remove(String("y")) me.Remove(String("b")) me.Remove(String("c")) assert.Nil(bothAre(String("a"))) assert.Nil(bothAre(String("b"))) assert.Nil(bothAre(String("c"))) assert.Nil(bothAre(String("y"))) assert.Nil(bothAre(String("z"))) assert.Nil(bothAre(String("never-inserted"))) m := me.Map() assert.True(m.Len() == 0) } func validateMapInsertion(t *testing.T, tm testMap) { vrw := newTestValueStore() allMe := NewMap(vrw).Edit() incrMe := NewMap(vrw).Edit() for i, entry := range tm.entries { allMe.Set(entry.key, entry.value) incrMe.Set(entry.key, entry.value) m1 := allMe.Map() m2 := incrMe.Map() validateMap(t, vrw, m1, tm.entries[0:i+1]) validateMap(t, vrw, m2, tm.entries[0:i+1]) incrMe = m2.Edit() } } func TestMapValidateInsertAscending(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() validateMapInsertion(t, newSortedTestMap(300, newNumber)) } func TestMapSet(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(incr, offset int, toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) expected := tm.toMap(vrw) run := func(from, to int) { actual := tm.Remove(from, to).toMap(vrw).Edit().SetM(toValuable(tm.Flatten(from, to))...).Map() assert.Equal(expected.Len(), actual.Len()) assert.True(expected.Equals(actual)) diffMapTest(assert, expected, actual, 0, 0, 0) } for i := 0; i < len(tm.entries)-offset; i += incr { run(i, i+offset) } run(len(tm.entries)-offset, len(tm.entries)) } doTest(18, 3, getTestNativeOrderMap, 9) doTest(128, 1, getTestNativeOrderMap, 32) doTest(64, 1, getTestRefValueOrderMap, 4) doTest(64, 1, getTestRefToNativeOrderMap, 4) doTest(64, 1, getTestRefToValueOrderMap, 4) } func TestMapSetM(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() m1 := NewMap(vrw) m2 := m1.Edit().SetM().Map() assert.True(m1.Equals(m2)) m3 := m2.Edit().SetM(String("foo"), String("bar"), String("hot"), String("dog")).Map() assert.Equal(uint64(2), m3.Len()) assert.True(String("bar").Equals(m3.Get(String("foo")))) assert.True(String("dog").Equals(m3.Get(String("hot")))) m4 := m3.Edit().SetM(String("mon"), String("key")).Map() assert.Equal(uint64(2), m3.Len()) assert.Equal(uint64(3), m4.Len()) } func TestMapSetExistingKeyToNewValue(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() assert := assert.New(t) tm := getTestNativeOrderMap(2, vrw) original := tm.toMap(vrw) expectedWorking := tm actual := original for i, entry := range tm.entries { newValue := Number(int64(entry.value.(Number)) + 1) expectedWorking = expectedWorking.SetValue(i, newValue) actual = actual.Edit().Set(entry.key, newValue).Map() } expected := expectedWorking.toMap(vrw) assert.Equal(expected.Len(), actual.Len()) assert.True(expected.Equals(actual)) assert.False(original.Equals(actual)) diffMapTest(assert, expected, actual, 0, 0, 0) } // BUG 98 func TestMapDuplicateSet(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() m1 := NewMap(vrw, Bool(true), Bool(true), Number(42), Number(42), Number(42), Number(42)) assert.Equal(uint64(2), m1.Len()) } func TestMapMaybeGet(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) m := tm.toMap(vrw) for _, entry := range tm.entries { v, ok := m.MaybeGet(entry.key) if assert.True(ok, "%v should have been in the map!", entry.key) { assert.True(v.Equals(entry.value), "%v != %v", v, entry.value) } } _, ok := m.MaybeGet(tm.knownBadKey) assert.False(ok, "m should not contain %v", tm.knownBadKey) } doTest(getTestNativeOrderMap, 2) doTest(getTestRefValueOrderMap, 2) doTest(getTestRefToNativeOrderMap, 2) doTest(getTestRefToValueOrderMap, 2) } func TestMapIter(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() m := NewMap(vrw) type entry struct { key Value value Value } type resultList []entry results := resultList{} got := func(key, val Value) bool { for _, r := range results { if key.Equals(r.key) && val.Equals(r.value) { return true } } return false } stop := false cb := func(k, v Value) bool { results = append(results, entry{k, v}) return stop } m.Iter(cb) assert.Equal(0, len(results)) m = m.Edit().Set(String("a"), Number(0)).Set(String("b"), Number(1)).Map() m.Iter(cb) assert.Equal(2, len(results)) assert.True(got(String("a"), Number(0))) assert.True(got(String("b"), Number(1))) results = resultList{} stop = true m.Iter(cb) assert.Equal(1, len(results)) // Iteration order not guaranteed, but it has to be one of these. assert.True(got(String("a"), Number(0)) || got(String("b"), Number(1))) } func TestMapIter2(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() doTest := func(toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) m := tm.toMap(vrw) sort.Sort(tm.entries) idx := uint64(0) endAt := uint64(64) m.Iter(func(k, v Value) (done bool) { assert.True(tm.entries[idx].key.Equals(k)) assert.True(tm.entries[idx].value.Equals(v)) if idx == endAt { done = true } idx++ return }) assert.Equal(endAt, idx-1) } doTest(getTestNativeOrderMap, 16) doTest(getTestRefValueOrderMap, 2) doTest(getTestRefToNativeOrderMap, 2) doTest(getTestRefToValueOrderMap, 2) } func TestMapAny(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() p := func(k, v Value) bool { return k.Equals(String("foo")) && v.Equals(String("bar")) } assert.False(NewMap(vrw).Any(p)) assert.False(NewMap(vrw, String("foo"), String("baz")).Any(p)) assert.True(NewMap(vrw, String("foo"), String("bar")).Any(p)) } func TestMapIterAll(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(toTestMap toTestMapFunc, scale int) { vrw := newTestValueStore() tm := toTestMap(scale, vrw) m := tm.toMap(vrw) sort.Sort(tm.entries) idx := uint64(0) m.IterAll(func(k, v Value) { assert.True(tm.entries[idx].key.Equals(k)) assert.True(tm.entries[idx].value.Equals(v)) idx++ }) } doTest(getTestNativeOrderMap, 16) doTest(getTestRefValueOrderMap, 2) doTest(getTestRefToNativeOrderMap, 2) doTest(getTestRefToValueOrderMap, 2) } func TestMapEquals(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() m1 := NewMap(vrw) m2 := m1 m3 := NewMap(vrw) assert.True(m1.Equals(m2)) assert.True(m2.Equals(m1)) assert.True(m3.Equals(m2)) assert.True(m2.Equals(m3)) diffMapTest(assert, m1, m2, 0, 0, 0) diffMapTest(assert, m1, m3, 0, 0, 0) diffMapTest(assert, m2, m1, 0, 0, 0) diffMapTest(assert, m2, m3, 0, 0, 0) diffMapTest(assert, m3, m1, 0, 0, 0) diffMapTest(assert, m3, m2, 0, 0, 0) m1 = NewMap(vrw, String("foo"), Number(0.0), String("bar"), NewList(vrw)) m2 = m2.Edit().Set(String("foo"), Number(0.0)).Set(String("bar"), NewList(vrw)).Map() assert.True(m1.Equals(m2)) assert.True(m2.Equals(m1)) assert.False(m2.Equals(m3)) assert.False(m3.Equals(m2)) diffMapTest(assert, m1, m2, 0, 0, 0) diffMapTest(assert, m1, m3, 2, 0, 0) diffMapTest(assert, m2, m1, 0, 0, 0) diffMapTest(assert, m2, m3, 2, 0, 0) diffMapTest(assert, m3, m1, 0, 2, 0) diffMapTest(assert, m3, m2, 0, 2, 0) } func TestMapNotStringKeys(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() b1 := NewBlob(vrw, bytes.NewBufferString("blob1")) b2 := NewBlob(vrw, bytes.NewBufferString("blob2")) l := []Value{ Bool(true), String("true"), Bool(false), String("false"), Number(1), String("Number: 1"), Number(0), String("Number: 0"), b1, String("blob1"), b2, String("blob2"), NewList(vrw), String("empty list"), NewList(vrw, NewList(vrw)), String("list of list"), NewMap(vrw), String("empty map"), NewMap(vrw, NewMap(vrw), NewMap(vrw)), String("map of map/map"), NewSet(vrw), String("empty set"), NewSet(vrw, NewSet(vrw)), String("map of set/set"), } m1 := NewMap(vrw, l...) assert.Equal(uint64(12), m1.Len()) for i := 0; i < len(l); i += 2 { assert.True(m1.Get(l[i]).Equals(l[i+1])) } assert.Nil(m1.Get(Number(42))) } func testMapOrder(assert *assert.Assertions, vrw ValueReadWriter, keyType, valueType *Type, tuples []Value, expectOrdering []Value) { m := NewMap(vrw, tuples...) i := 0 m.IterAll(func(key, value Value) { assert.Equal(expectOrdering[i].Hash().String(), key.Hash().String()) i++ }) } func TestMapOrdering(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vrw := newTestValueStore() testMapOrder(assert, vrw, StringType, StringType, []Value{ String("a"), String("unused"), String("z"), String("unused"), String("b"), String("unused"), String("y"), String("unused"), String("c"), String("unused"), String("x"), String("unused"), }, []Value{ String("a"), String("b"), String("c"), String("x"), String("y"), String("z"), }, ) testMapOrder(assert, vrw, NumberType, StringType, []Value{ Number(0), String("unused"), Number(1000), String("unused"), Number(1), String("unused"), Number(100), String("unused"), Number(2), String("unused"), Number(10), String("unused"), }, []Value{ Number(0), Number(1), Number(2), Number(10), Number(100), Number(1000), }, ) testMapOrder(assert, vrw, NumberType, StringType, []Value{ Number(0), String("unused"), Number(-30), String("unused"), Number(25), String("unused"), Number(1002), String("unused"), Number(-5050), String("unused"), Number(23), String("unused"), }, []Value{ Number(-5050), Number(-30), Number(0), Number(23), Number(25), Number(1002), }, ) testMapOrder(assert, vrw, NumberType, StringType, []Value{ Number(0.0001), String("unused"), Number(0.000001), String("unused"), Number(1), String("unused"), Number(25.01e3), String("unused"), Number(-32.231123e5), String("unused"), Number(23), String("unused"), }, []Value{ Number(-32.231123e5), Number(0.000001), Number(0.0001), Number(1), Number(23), Number(25.01e3), }, ) testMapOrder(assert, vrw, ValueType, StringType, []Value{ String("a"), String("unused"), String("z"), String("unused"), String("b"), String("unused"), String("y"), String("unused"), String("c"), String("unused"), String("x"), String("unused"), }, []Value{ String("a"), String("b"), String("c"), String("x"), String("y"), String("z"), }, ) testMapOrder(assert, vrw, BoolType, StringType, []Value{ Bool(true), String("unused"), Bool(false), String("unused"), }, []Value{ Bool(false), Bool(true), }, ) } func TestMapEmpty(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() me := NewMap(vrw).Edit() empty := func() bool { return me.Map().Empty() } assert.True(empty()) me.Set(Bool(false), String("hi")) assert.False(empty()) me.Set(NewList(vrw), NewMap(vrw)) assert.False(empty()) } func TestMapType(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() emptyMapType := MakeMapType(MakeUnionType(), MakeUnionType()) m := NewMap(vrw) assert.True(TypeOf(m).Equals(emptyMapType)) m2 := m.Edit().Remove(String("B")).Map() assert.True(emptyMapType.Equals(TypeOf(m2))) tr := MakeMapType(StringType, NumberType) m2 = m.Edit().Set(String("A"), Number(1)).Map() assert.True(tr.Equals(TypeOf(m2))) m2 = m.Edit().Set(String("B"), Number(2)).Set(String("C"), Number(2)).Map() assert.True(tr.Equals(TypeOf(m2))) m3 := m2.Edit().Set(String("A"), Bool(true)).Map() assert.True(MakeMapType(StringType, MakeUnionType(BoolType, NumberType)).Equals(TypeOf(m3)), TypeOf(m3).Describe()) m4 := m3.Edit().Set(Bool(true), Number(1)).Map() assert.True(MakeMapType(MakeUnionType(BoolType, StringType), MakeUnionType(BoolType, NumberType)).Equals(TypeOf(m4))) } func TestMapChunks(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() l1 := NewMap(vrw, Number(0), Number(1)) c1 := getChunks(l1) assert.Len(c1, 0) l2 := NewMap(vrw, NewRef(Number(0)), Number(1)) c2 := getChunks(l2) assert.Len(c2, 1) l3 := NewMap(vrw, Number(0), NewRef(Number(1))) c3 := getChunks(l3) assert.Len(c3, 1) } func TestMapFirstNNumbers(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) vrw := newTestValueStore() kvs := []Value{} for i := 0; i < testMapSize; i++ { kvs = append(kvs, Number(i), Number(i+1)) } m := NewMap(vrw, kvs...) assert.Equal(deriveCollectionHeight(m), getRefHeightOfCollection(m)) } func TestMapRefOfStructFirstNNumbers(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) vs := newTestValueStore() kvs := []Value{} for i := 0; i < testMapSize; i++ { k := vs.WriteValue(NewStruct("num", StructData{"n": Number(i)})) v := vs.WriteValue(NewStruct("num", StructData{"n": Number(i + 1)})) assert.NotNil(k) assert.NotNil(v) kvs = append(kvs, k, v) } m := NewMap(vs, kvs...) // height + 1 because the leaves are Ref values (with height 1). assert.Equal(deriveCollectionHeight(m)+1, getRefHeightOfCollection(m)) } func TestMapModifyAfterRead(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() m := getTestNativeOrderMap(2, vs).toMap(vs) // Drop chunk values. m = vs.ReadValue(vs.WriteValue(m).TargetHash()).(Map) // Modify/query. Once upon a time this would crash. fst, fstval := m.First() m = m.Edit().Remove(fst).Map() assert.False(m.Has(fst)) fst2, _ := m.First() assert.True(m.Has(fst2)) m = m.Edit().Set(fst, fstval).Map() assert.True(m.Has(fst)) } func TestMapTypeAfterMutations(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() test := func(n int, c interface{}) { values := make([]Value, 2*n) for i := 0; i < n; i++ { values[2*i] = Number(i) values[2*i+1] = Number(i) } m := NewMap(vrw, values...) assert.Equal(m.Len(), uint64(n)) assert.IsType(c, m.asSequence()) assert.True(TypeOf(m).Equals(MakeMapType(NumberType, NumberType))) m = m.Edit().Set(String("a"), String("a")).Map() assert.Equal(m.Len(), uint64(n+1)) assert.IsType(c, m.asSequence()) assert.True(TypeOf(m).Equals(MakeMapType(MakeUnionType(NumberType, StringType), MakeUnionType(NumberType, StringType)))) m = m.Edit().Remove(String("a")).Map() assert.Equal(m.Len(), uint64(n)) assert.IsType(c, m.asSequence()) assert.True(TypeOf(m).Equals(MakeMapType(NumberType, NumberType))) } test(10, mapLeafSequence{}) test(8000, metaSequence{}) } func TestCompoundMapWithValuesOfEveryType(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() v := Number(42) kvs := []Value{ // Values Bool(true), v, Number(0), v, String("hello"), v, NewBlob(vrw, bytes.NewBufferString("buf")), v, NewSet(vrw, Bool(true)), v, NewList(vrw, Bool(true)), v, NewMap(vrw, Bool(true), Number(0)), v, NewStruct("", StructData{"field": Bool(true)}), v, // Refs of values NewRef(Bool(true)), v, NewRef(Number(0)), v, NewRef(String("hello")), v, NewRef(NewBlob(vrw, bytes.NewBufferString("buf"))), v, NewRef(NewSet(vrw, Bool(true))), v, NewRef(NewList(vrw, Bool(true))), v, NewRef(NewMap(vrw, Bool(true), Number(0))), v, NewRef(NewStruct("", StructData{"field": Bool(true)})), v, } m := NewMap(vrw, kvs...) for i := 1; m.asSequence().isLeaf(); i++ { k := Number(i) kvs = append(kvs, k, v) m = m.Edit().Set(k, v).Map() } assert.Equal(len(kvs)/2, int(m.Len())) fk, fv := m.First() assert.True(bool(fk.(Bool))) assert.True(v.Equals(fv)) for i, keyOrValue := range kvs { if i%2 == 0 { assert.True(m.Has(keyOrValue)) assert.True(v.Equals(m.Get(keyOrValue))) } else { assert.True(v.Equals(keyOrValue)) } } for len(kvs) > 0 { k := kvs[0] kvs = kvs[2:] m = m.Edit().Remove(k).Map() assert.False(m.Has(k)) assert.Equal(len(kvs)/2, int(m.Len())) } } func TestMapRemoveLastWhenNotLoaded(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() reload := func(m Map) Map { return vs.ReadValue(vs.WriteValue(m).TargetHash()).(Map) } tm := getTestNativeOrderMap(4, vs) nm := tm.toMap(vs) for len(tm.entries) > 0 { entr := tm.entries last := entr[len(entr)-1] entr = entr[:len(entr)-1] tm.entries = entr nm = reload(nm.Edit().Remove(last.key).Map()) assert.True(tm.toMap(vs).Equals(nm)) } } func TestMapIterFrom(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() test := func(m Map, start, end Value) ValueSlice { res := ValueSlice{} m.IterFrom(start, func(k, v Value) bool { if end.Less(k) { return true } res = append(res, k, v) return false }) return res } kvs := generateNumbersAsValuesFromToBy(-50, 50, 1) m1 := NewMap(vrw, kvs...) assert.True(kvs.Equals(test(m1, nil, Number(1000)))) assert.True(kvs.Equals(test(m1, Number(-1000), Number(1000)))) assert.True(kvs.Equals(test(m1, Number(-50), Number(1000)))) assert.True(kvs[2:].Equals(test(m1, Number(-49), Number(1000)))) assert.True(kvs[2:].Equals(test(m1, Number(-48), Number(1000)))) assert.True(kvs[4:].Equals(test(m1, Number(-47), Number(1000)))) assert.True(kvs[98:].Equals(test(m1, Number(48), Number(1000)))) assert.True(kvs[0:0].Equals(test(m1, Number(100), Number(1000)))) assert.True(kvs[50:60].Equals(test(m1, Number(0), Number(8)))) } func TestMapAt(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() values := []Value{Bool(false), Number(42), String("a"), String("b"), String("c"), String("d")} m := NewMap(vrw, values...) for i := 0; i < len(values); i += 2 { k, v := m.At(uint64(i / 2)) assert.Equal(values[i], k) assert.Equal(values[i+1], v) } assert.Panics(func() { m.At(42) }) } func TestMapWithStructShouldHaveOptionalFields(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() list := NewMap(vrw, String("one"), NewStruct("Foo", StructData{ "a": Number(1), }), String("two"), NewStruct("Foo", StructData{ "a": Number(2), "b": String("bar"), }), ) assert.True( MakeMapType(StringType, MakeStructType("Foo", StructField{"a", NumberType, false}, StructField{"b", StringType, true}, ), ).Equals(TypeOf(list))) // transpose list = NewMap(vrw, NewStruct("Foo", StructData{ "a": Number(1), }), String("one"), NewStruct("Foo", StructData{ "a": Number(2), "b": String("bar"), }), String("two"), ) assert.True( MakeMapType( MakeStructType("Foo", StructField{"a", NumberType, false}, StructField{"b", StringType, true}, ), StringType, ).Equals(TypeOf(list))) } func TestMapWithNil(t *testing.T) { vrw := newTestValueStore() assert.Panics(t, func() { NewMap(nil, Number(42)) }) assert.Panics(t, func() { NewSet(vrw, Number(42), nil) }) assert.Panics(t, func() { NewMap(vrw, String("a"), String("b"), nil, Number(42)) }) assert.Panics(t, func() { NewSet(vrw, String("a"), String("b"), Number(42), nil) }) } func TestNestedEditing(t *testing.T) { vrw := newTestValueStore() me0 := NewMap(vrw).Edit() // m.a.a me1a := NewMap(vrw).Edit() me0.Set(String("a"), me1a) se2a := NewSet(vrw).Edit() me1a.Set(String("a"), se2a) se2a.Insert(String("a")) // m.b.b me1b := NewMap(vrw).Edit() me0.Set(String("b"), me1b) se2b := NewSet(vrw).Edit() me1b.Set(String("b"), se2b) se2b.Insert(String("b")) mOut := me0.Map() assert.True(t, mOut.Equals(NewMap(vrw, String("a"), NewMap(vrw, String("a"), NewSet(vrw, String("a")), ), String("b"), NewMap(vrw, String("b"), NewSet(vrw, String("b")), ), ))) se2a.Remove(String("a")).Insert(String("aa")) se2b.Remove(String("b")).Insert(String("bb")) mOut = me0.Map() assert.True(t, mOut.Equals(NewMap(vrw, String("a"), NewMap(vrw, String("a"), NewSet(vrw, String("aa")), ), String("b"), NewMap(vrw, String("b"), NewSet(vrw, String("bb")), ), ))) se2a.Remove(String("aa")) se2b.Remove(String("bb")) mOut = me0.Map() fmt.Println(EncodedValue(mOut)) assert.True(t, mOut.Equals(NewMap(vrw, String("a"), NewMap(vrw, String("a"), NewSet(vrw)), String("b"), NewMap(vrw, String("b"), NewSet(vrw)), ))) // do not remove empty } ================================================ FILE: go/types/meta_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) const ( objectWindowSize = 8 orderedSequenceWindowSize = 1 objectPattern = uint32(1<<6 - 1) // Average size of 64 elements ) var emptyKey = orderedKey{} func newMetaTuple(ref Ref, key orderedKey, numLeaves uint64) metaTuple { d.PanicIfTrue(ref.buff == nil) w := newBinaryNomsWriter() var offsets [metaTuplePartNumLeaves + 1]uint32 offsets[metaTuplePartRef] = w.offset ref.writeTo(&w) offsets[metaTuplePartKey] = w.offset key.writeTo(&w) offsets[metaTuplePartNumLeaves] = w.offset w.writeCount(numLeaves) return metaTuple{w.data(), offsets} } // metaTuple is a node in a Prolly Tree, consisting of data in the node (either tree leaves or other metaSequences), and a Value annotation for exploring the tree (e.g. the largest item if this an ordered sequence). type metaTuple struct { buff []byte offsets [metaTuplePartNumLeaves + 1]uint32 } const ( metaTuplePartRef = 0 metaTuplePartKey = 1 metaTuplePartNumLeaves = 2 ) func (mt metaTuple) decoderAtPart(part uint32) valueDecoder { offset := mt.offsets[part] - mt.offsets[metaTuplePartRef] return newValueDecoder(mt.buff[offset:], nil) } func (mt metaTuple) ref() Ref { dec := mt.decoderAtPart(metaTuplePartRef) return dec.readRef() } func (mt metaTuple) key() orderedKey { dec := mt.decoderAtPart(metaTuplePartKey) return dec.readOrderedKey() } func (mt metaTuple) numLeaves() uint64 { dec := mt.decoderAtPart(metaTuplePartNumLeaves) return dec.readCount() } func (mt metaTuple) getChildSequence(vr ValueReader) sequence { return mt.ref().TargetValue(vr).(Collection).asSequence() } func (mt metaTuple) writeTo(w nomsWriter) { w.writeRaw(mt.buff) } // orderedKey is a key in a Prolly Tree level, which is a metaTuple in a metaSequence, or a value in a leaf sequence. // |v| may be nil or |h| may be empty, but not both. type orderedKey struct { isOrderedByValue bool v Value h hash.Hash } func newOrderedKey(v Value) orderedKey { if isKindOrderedByValue(v.Kind()) { return orderedKey{true, v, hash.Hash{}} } return orderedKey{false, v, v.Hash()} } func orderedKeyFromHash(h hash.Hash) orderedKey { return orderedKey{false, nil, h} } func orderedKeyFromInt(n int) orderedKey { return newOrderedKey(Number(n)) } func orderedKeyFromUint64(n uint64) orderedKey { return newOrderedKey(Number(n)) } func (key orderedKey) Less(mk2 orderedKey) bool { switch { case key.isOrderedByValue && mk2.isOrderedByValue: return key.v.Less(mk2.v) case key.isOrderedByValue: return true case mk2.isOrderedByValue: return false default: d.PanicIfTrue(key.h.IsEmpty() || mk2.h.IsEmpty()) return key.h.Less(mk2.h) } } func (key orderedKey) writeTo(w nomsWriter) { if !key.isOrderedByValue { d.PanicIfTrue(key != emptyKey && key.h.IsEmpty()) hashKind.writeTo(w) w.writeHash(key.h) } else { key.v.writeTo(w) } } type metaSequence struct { sequenceImpl } func newMetaSequence(vrw ValueReadWriter, buff []byte, offsets []uint32, len uint64) metaSequence { return metaSequence{newSequenceImpl(vrw, buff, offsets, len)} } // readLeafSequence reads the data provided by a decoder and moves the decoder forward. func readMetaSequence(dec *valueDecoder) metaSequence { start := dec.pos() offsets, len := skipMetaSequence(dec) end := dec.pos() return newMetaSequence(dec.vrw, dec.byteSlice(start, end), offsets, len) } func skipMetaSequence(dec *valueDecoder) ([]uint32, uint64) { kindPos := dec.pos() dec.skipKind() levelPos := dec.pos() dec.skipCount() // level countPos := dec.pos() count := dec.readCount() valuesPos := dec.pos() offsets := make([]uint32, count+sequencePartValues+1) offsets[sequencePartKind] = kindPos offsets[sequencePartLevel] = levelPos offsets[sequencePartCount] = countPos offsets[sequencePartValues] = valuesPos length := uint64(0) for i := uint64(0); i < count; i++ { dec.skipValue() // ref dec.skipValue() // v length += dec.readCount() // numLeaves offsets[i+sequencePartValues+1] = dec.pos() } return offsets, length } func newMetaSequenceFromTuples(kind NomsKind, level uint64, tuples []metaTuple, vrw ValueReadWriter) metaSequence { d.PanicIfFalse(level > 0) w := newBinaryNomsWriter() offsets := make([]uint32, len(tuples)+sequencePartValues+1) offsets[sequencePartKind] = w.offset kind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(level) offsets[sequencePartCount] = w.offset w.writeCount(uint64(len(tuples))) offsets[sequencePartValues] = w.offset length := uint64(0) for i, mt := range tuples { length += mt.numLeaves() mt.writeTo(&w) offsets[i+sequencePartValues+1] = w.offset } return newMetaSequence(vrw, w.data(), offsets, length) } func (ms metaSequence) tuples() []metaTuple { dec, count := ms.decoderSkipToValues() tuples := make([]metaTuple, count) for i := uint64(0); i < count; i++ { tuples[i] = ms.readTuple(&dec) } return tuples } func (ms metaSequence) getKey(idx int) orderedKey { dec := ms.decoderSkipToIndex(idx) dec.skipValue() // ref return dec.readOrderedKey() } func (ms metaSequence) search(key orderedKey) int { return sort.Search(ms.seqLen(), func(i int) bool { return !ms.getKey(i).Less(key) }) } func (ms metaSequence) cumulativeNumberOfLeaves(idx int) uint64 { cum := uint64(0) dec, _ := ms.decoderSkipToValues() for i := 0; i <= idx; i++ { dec.skipValue() // ref dec.skipValue() // v cum += dec.readCount() } return cum } func (ms metaSequence) getCompareFn(other sequence) compareFn { dec := ms.decoder() oms := other.(metaSequence) otherDec := oms.decoder() return func(idx, otherIdx int) bool { return ms.getRefAt(&dec, idx).TargetHash() == oms.getRefAt(&otherDec, otherIdx).TargetHash() } } func (ms metaSequence) readTuple(dec *valueDecoder) metaTuple { var offsets [metaTuplePartNumLeaves + 1]uint32 start := dec.offset offsets[metaTuplePartRef] = start dec.skipRef() offsets[metaTuplePartKey] = dec.offset dec.skipOrderedKey() offsets[metaTuplePartNumLeaves] = dec.offset dec.skipCount() end := dec.offset return metaTuple{dec.byteSlice(start, end), offsets} } func (ms metaSequence) getRefAt(dec *valueDecoder, idx int) Ref { dec.offset = uint32(ms.getItemOffset(idx)) return dec.readRef() } func (ms metaSequence) getNumLeavesAt(idx int) uint64 { dec := ms.decoderSkipToIndex(idx) dec.skipValue() dec.skipOrderedKey() return dec.readCount() } // sequence interface func (ms metaSequence) getItem(idx int) sequenceItem { dec := ms.decoderSkipToIndex(idx) return ms.readTuple(&dec) } func (ms metaSequence) valuesSlice(from, to uint64) []Value { panic("meta sequence") } func (ms metaSequence) typeOf() *Type { dec, count := ms.decoderSkipToValues() ts := make(typeSlice, 0, count) var lastRef Ref for i := uint64(0); i < count; i++ { ref := dec.readRef() if lastRef.IsZeroValue() || !lastRef.isSameTargetType(ref) { lastRef = ref t := ref.TargetType() ts = append(ts, t) } dec.skipOrderedKey() // key dec.skipCount() // numLeaves } return makeUnionType(ts...) } func (ms metaSequence) numLeaves() uint64 { return ms.len } func (ms metaSequence) treeLevel() uint64 { dec := ms.decoderAtPart(sequencePartLevel) return dec.readCount() } func (ms metaSequence) isLeaf() bool { d.PanicIfTrue(ms.treeLevel() == 0) return false } // metaSequence interface func (ms metaSequence) getChildSequence(idx int) sequence { mt := ms.getItem(idx).(metaTuple) // TODO: IsZeroValue? if mt.buff == nil { return nil } return mt.getChildSequence(ms.vrw) } // Returns the sequences pointed to by all items[i], s.t. start <= i < end, and returns the // concatentation as one long composite sequence func (ms metaSequence) getCompositeChildSequence(start uint64, length uint64) sequence { level := ms.treeLevel() d.PanicIfFalse(level > 0) if length == 0 { return emptySequence{level - 1} } output := ms.getChildren(start, start+length) if level > 1 { var metaItems []metaTuple for _, seq := range output { metaItems = append(metaItems, seq.(metaSequence).tuples()...) } return newMetaSequenceFromTuples(ms.Kind(), level-1, metaItems, ms.vrw) } switch ms.Kind() { case ListKind: var valueItems []Value for _, seq := range output { valueItems = append(valueItems, seq.(listLeafSequence).values()...) } return newListLeafSequence(ms.vrw, valueItems...) case MapKind: var valueItems []mapEntry for _, seq := range output { valueItems = append(valueItems, seq.(mapLeafSequence).entries()...) } return newMapLeafSequence(ms.vrw, valueItems...) case SetKind: var valueItems []Value for _, seq := range output { valueItems = append(valueItems, seq.(setLeafSequence).values()...) } return newSetLeafSequence(ms.vrw, valueItems...) } panic("unreachable") } // fetches child sequences from start (inclusive) to end (exclusive). func (ms metaSequence) getChildren(start, end uint64) (seqs []sequence) { d.Chk.True(end <= uint64(ms.seqLen())) d.Chk.True(start <= end) seqs = make([]sequence, end-start) hs := make(hash.HashSlice, len(seqs)) dec := ms.decoder() for i := start; i < end; i++ { hs[i-start] = ms.getRefAt(&dec, int(i)).TargetHash() } if len(hs) == 0 { return // can occur with ptree that is fully uncommitted } // Fetch committed child sequences in a single batch readValues := ms.vrw.ReadManyValues(hs) for i, v := range readValues { seqs[i] = v.(Collection).asSequence() } return } func metaHashValueBytes(item sequenceItem, rv *rollingValueHasher) { rv.hashBytes(item.(metaTuple).buff) } type emptySequence struct { level uint64 } func (es emptySequence) getItem(idx int) sequenceItem { panic("empty sequence") } func (es emptySequence) seqLen() int { return 0 } func (es emptySequence) numLeaves() uint64 { return 0 } func (es emptySequence) valueReadWriter() ValueReadWriter { return nil } func (es emptySequence) WalkRefs(cb RefCallback) { } func (es emptySequence) getCompareFn(other sequence) compareFn { return func(idx, otherIdx int) bool { panic("empty sequence") } } func (es emptySequence) getKey(idx int) orderedKey { panic("empty sequence") } func (es emptySequence) search(key orderedKey) int { panic("empty sequence") } func (es emptySequence) getValue(idx int) Value { panic("empty sequence") } func (es emptySequence) cumulativeNumberOfLeaves(idx int) uint64 { panic("empty sequence") } func (es emptySequence) getChildSequence(i int) sequence { return nil } func (es emptySequence) Kind() NomsKind { panic("empty sequence") } func (es emptySequence) typeOf() *Type { panic("empty sequence") } func (es emptySequence) getCompositeChildSequence(start uint64, length uint64) sequence { d.PanicIfFalse(es.level > 0) d.PanicIfFalse(start == 0) d.PanicIfFalse(length == 0) return emptySequence{es.level - 1} } func (es emptySequence) treeLevel() uint64 { return es.level } func (es emptySequence) isLeaf() bool { return es.level == 0 } func (es emptySequence) Hash() hash.Hash { panic("empty sequence") } func (es emptySequence) Equals(other Value) bool { panic("empty sequence") } func (es emptySequence) Less(other Value) bool { panic("empty sequence") } func (es emptySequence) valueBytes() []byte { panic("empty sequence") } func (es emptySequence) valuesSlice(from, to uint64) []Value { panic("empty sequence") } func (es emptySequence) writeTo(nomsWriter) { panic("empty sequence") } func (es emptySequence) Empty() bool { panic("empty sequence") } func (es emptySequence) Len() uint64 { panic("empty sequence") } func (es emptySequence) asValueImpl() valueImpl { panic("empty sequence") } ================================================ FILE: go/types/noms_kind.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types // NomsKind allows a TypeDesc to indicate what kind of type is described. type NomsKind uint8 // All supported kinds of Noms types are enumerated here. // The ordering of these (especially Bool, Number and String) is important for ordering of values. const ( BoolKind NomsKind = iota NumberKind StringKind BlobKind ValueKind ListKind MapKind RefKind SetKind // Keep StructKind and CycleKind together. StructKind CycleKind TypeKind UnionKind // Internal to decoder hashKind ) var KindToString = map[NomsKind]string{ BlobKind: "Blob", BoolKind: "Bool", CycleKind: "Cycle", ListKind: "List", MapKind: "Map", NumberKind: "Number", RefKind: "Ref", SetKind: "Set", StructKind: "Struct", StringKind: "String", TypeKind: "Type", UnionKind: "Union", ValueKind: "Value", } // String returns the name of the kind. func (k NomsKind) String() string { return KindToString[k] } // IsPrimitiveKind returns true if k represents a Noms primitive type, which excludes collections (List, Map, Set), Refs, Structs, Symbolic and Unresolved types. func IsPrimitiveKind(k NomsKind) bool { switch k { case BoolKind, NumberKind, StringKind, BlobKind, ValueKind, TypeKind: return true default: return false } } // isKindOrderedByValue determines if a value is ordered by its value instead of its hash. func isKindOrderedByValue(k NomsKind) bool { return k <= StringKind } func (k NomsKind) writeTo(w nomsWriter) { w.writeUint8(uint8(k)) } ================================================ FILE: go/types/number.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "encoding/binary" "math" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) // Number is a Noms Value wrapper around the primitive float64 type. type Number float64 // Value interface func (v Number) Value() Value { return v } func (v Number) Equals(other Value) bool { return v == other } func (v Number) Less(other Value) bool { if v2, ok := other.(Number); ok { return v < v2 } return NumberKind < other.Kind() } func (v Number) Hash() hash.Hash { return getHash(v) } func (v Number) WalkValues(cb ValueCallback) { } func (v Number) WalkRefs(cb RefCallback) { } func (v Number) typeOf() *Type { return NumberType } func (v Number) Kind() NomsKind { return NumberKind } func (v Number) valueReadWriter() ValueReadWriter { return nil } func (v Number) writeTo(w nomsWriter) { NumberKind.writeTo(w) f := float64(v) if math.IsNaN(f) || math.IsInf(f, 0) { d.Panic("%f is not a supported number", f) } w.writeNumber(v) } func (v Number) valueBytes() []byte { // We know the size of the buffer here so allocate it once. // NumberKind, int (Varint), exp (Varint) buff := make([]byte, 1+2*binary.MaxVarintLen64) w := binaryNomsWriter{buff, 0} v.writeTo(&w) return buff[:w.offset] } ================================================ FILE: go/types/number_util.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "math" func float64IsInt(f float64) bool { return math.Trunc(f) == f } // convert float64 to int64 where f == i * 2^exp func float64ToIntExp(f float64) (int64, int) { if f == 0 { return 0, 0 } isNegative := math.Signbit(f) f = math.Abs(f) frac, exp := math.Frexp(f) // frac is [.5, 1) // Move frac up until it is an integer. for !float64IsInt(frac) { frac *= 2 exp-- } if isNegative { frac *= -1 } return int64(frac), exp } // fracExpToFloat returns frac * 2 ** exp func fracExpToFloat(frac int64, exp int) float64 { return float64(frac) * math.Pow(2, float64(exp)) } ================================================ FILE: go/types/opcache.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // opCache stores build operations on a graph of nested Maps whose leaves can // in turn be Set, Map, or List collections containing any Noms Value. // OpCacheIterator returns operations in sorted order. // // OpCache uses a special encoding of the information supplied by the MapSet(), // ListAppend(), or SetInsert() operation stored in the ldbKey combined with // custom ldb Comparer object implemented in opcache_compare.go to make this // happen. // // Ldb keys are encoded byte arrays that contain the following information: // 4-bytes -- uint32 in BigEndian order which identifies this key/value // as belonging to a particular graph // 1-byte -- a NomsKind value that represents the collection type that is // being acted on. This will either be MapKind, SetKind, or ListKind. // 1-byte -- uint8 representing the number of NomsValues encoded in this key // // After this 6-byte header, there is a section of bytes for each value encoded // into the key. Each value has a 1-byte prefix: // 1-byte -- a NomsKind value that represents the type of value that is // being encoded. // The 1-byte NomsKind value determines what follows, if this value is // BoolKind, NumberKind, or StringKind, the rest of the bytes are: // 4-bytes -- uint32 length of the Value serialization // n-bytes -- the serialized value // If the NomsKind byte has any other value, it is followed by: // 20-bytes -- digest of Value's hash // // Whenever the value is encoded as a hash digest in the ldbKey, it's actual value // needs to get stored in the ldbValue. (More about this later) // // There are 3 operation types on opCache: MapSet(), SetInsert(), and ListAppend(). // Each one stores slightly different things in the ldbKey. // MapSet() -- stores each graphKey and the key to the final Map // ValueSet() -- stores each graphKey and the Value being inserted into the set // ListAppend() -- stores each graphKey and a Number() containing an uint64 value // that is shared across all collections and lists which is incremented each time // ListAppend() is called. // // The ldbValue also stores different information for each mutation operation. An // ldbValue has a 1-byte uint8 header that is the number of values that are encoded // into it. // 1-byte -- uint8 indicating number of values encoded into this byte array // Then for each encoded value it contains: // 4-byte -- uint32 indicating length of value serialization // n-bytes -- the serialized value // // The ldbValue contains the following values for each type of mutation: // MapSet() -- stores any graphKeys that were encoded as a hash digest in // the ldbKey. The mapKey if it was encoded as a hash digest in the ldbKey // and the value being set in the map. // SetInsert() -- stores any graphKeys that were encoded as a hash digest in // the ldbKey. The value being inserted into the set if it was encoded into the // ldbKey as a hash digest. // ListAppend() -- stores any graphKeys that were encoded as a hash digest in the // ldbKey. The value being appended to the list. // package types import ( "encoding/binary" "io/ioutil" "os" "sync/atomic" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/syndtr/goleveldb/leveldb" ldbIterator "github.com/syndtr/goleveldb/leveldb/iterator" "github.com/syndtr/goleveldb/leveldb/opt" "github.com/syndtr/goleveldb/leveldb/util" ) const uint32Size = 4 type opCacheStore interface { opCache() opCache destroy() error } type opCache interface { // This method can be called from multiple go routines. GraphMapSet(keys ValueSlice, mapKey Value, mapVal Value) // This method can be called from multiple go routines. GraphSetInsert(keys ValueSlice, val Value) // This method can be called from multiple go routines, however items will // be appended to the list based on the order that routines execute // this method. GraphListAppend(keys ValueSlice, val Value) NewIterator() opCacheIterator } type opCacheIterator interface { GraphOp() (ValueSlice, NomsKind, sequenceItem) Next() bool Release() } type ldbOpCacheStore struct { ldb *leveldb.DB dbDir string collectionId uint32 vrw ValueReadWriter } type ldbOpCache struct { vrw ValueReadWriter colId uint32 listIdx int64 ldb *leveldb.DB } type ldbOpCacheIterator struct { iter ldbIterator.Iterator vrw ValueReadWriter } func newLdbOpCacheStore(vrw ValueReadWriter) *ldbOpCacheStore { dir, err := ioutil.TempDir("", "") d.Chk.NoError(err) db, err := leveldb.OpenFile(dir, &opt.Options{ Compression: opt.NoCompression, Comparer: opCacheComparer{}, OpenFilesCacheCapacity: 24, // This data does not have to be durable. LDB is acting as temporary // storage that can be larger than main memory. NoSync: true, WriteBuffer: 1 << 27, // 128MiB }) d.Chk.NoError(err, "opening put cache in %s", dir) return &ldbOpCacheStore{ldb: db, dbDir: dir, vrw: vrw} } func (store *ldbOpCacheStore) destroy() error { d.Chk.NoError(store.ldb.Close()) return os.RemoveAll(store.dbDir) } func (store *ldbOpCacheStore) opCache() opCache { colId := atomic.AddUint32(&store.collectionId, 1) return &ldbOpCache{vrw: store.vrw, colId: colId, ldb: store.ldb} } // insertLdbOp encodes allKeys into the ldb key. Bool, Number, and String values // are encoded directly into the ldb key bytes. All other types are encoded as // their Hash() digest. Their actual value is then stored in ldb value. func (opc *ldbOpCache) insertLdbOp(allKeys ValueSlice, opKind NomsKind, val Value) { if len(allKeys) > 0x00FF { d.Panic("Number of keys in GraphMapSet exceeds max of 256") } ldbKeyBytes := [initialBufferSize]byte{} ldbValBytes := [initialBufferSize]byte{} ldbKey, valuesToEncode := encodeKeys(ldbKeyBytes[:0], opc.colId, opKind, allKeys) // val may be nil when dealing with sets, since the val is the key. if val != nil { valuesToEncode = append(valuesToEncode, val) } ldbVal := encodeValues(ldbValBytes[:0], valuesToEncode) err := opc.ldb.Put(ldbKey, ldbVal, nil) d.Chk.NoError(err) } func (opc *ldbOpCache) GraphMapSet(graphKeys ValueSlice, mapKey, mapVal Value) { allKeys := append(graphKeys, mapKey) opc.insertLdbOp(allKeys, MapKind, mapVal) } func (opc *ldbOpCache) GraphSetInsert(graphKeys ValueSlice, val Value) { allKeys := append(graphKeys, val) opc.insertLdbOp(allKeys, SetKind, val) } func (opc *ldbOpCache) GraphListAppend(graphKeys ValueSlice, val Value) { idx := atomic.AddInt64(&opc.listIdx, 1) allKeys := append(graphKeys, Number(idx)) opc.insertLdbOp(allKeys, ListKind, val) } func (i *ldbOpCacheIterator) GraphOp() (ValueSlice, NomsKind, sequenceItem) { ldbKey := i.iter.Key() ldbVal := i.iter.Value() // skip over 4 bytes of colId and get opKind, and numKeys from bytes 4 & 5 opKind := NomsKind(ldbKey[4]) numKeys := uint8(ldbKey[5]) ldbKey = ldbKey[6:] // Call decodeValue for each encoded graphKey. nil will be appended to // graphKeys for any keys that were encoded as hash digests. graphKeys := ValueSlice{} for pos := uint8(0); pos < numKeys; pos++ { var gk Value ldbKey, gk = decodeValue(ldbKey, false, i.vrw) graphKeys = append(graphKeys, gk) } // Get the number of values whose value was encoded in ldbVal numEncodedValues := uint8(ldbVal[0]) ldbVal = ldbVal[1:] // Call decodeValue for each non-primitive key stored in ldbVal. Replace // the nil value in graphKeys with the new decodedValue. values := ValueSlice{} for pos := uint8(0); pos < numEncodedValues; pos++ { var gk Value ldbVal, gk = decodeValue(ldbVal, true, i.vrw) values = append(values, gk) } // Fold in any non-primitive key values that were stored in ldbVal pos := 0 for idx, k1 := range graphKeys { if k1 == nil { graphKeys[idx] = values[pos] pos++ } } // Remove the last key in graphKeys. The last key in graphKeys is the // mapkey for Maps, the item for Sets, and the index for Lists. key := graphKeys[len(graphKeys)-1] graphKeys = graphKeys[:len(graphKeys)-1] var item sequenceItem switch opKind { case MapKind: val := values[len(values)-1] item = mapEntry{key, val} case SetKind: item = key case ListKind: item = values[len(values)-1] } return graphKeys, opKind, item } func (opc *ldbOpCache) NewIterator() opCacheIterator { prefix := [4]byte{} binary.BigEndian.PutUint32(prefix[:], opc.colId) return &ldbOpCacheIterator{iter: opc.ldb.NewIterator(util.BytesPrefix(prefix[:]), nil), vrw: opc.vrw} } func (i *ldbOpCacheIterator) Next() bool { return i.iter.Next() } func (i *ldbOpCacheIterator) Release() { i.iter.Release() } // encodeKeys() serializes a list of keys to the byte slice |bs|. func encodeKeys(bs []byte, colId uint32, opKind NomsKind, keys []Value) ([]byte, []Value) { // All ldb keys start with a 4-byte collection id that serves as a namespace // that keeps them separate from other collections. idHolder := [4]byte{} idHolderSlice := idHolder[:4] binary.BigEndian.PutUint32(idHolderSlice, colId) bs = append(bs, idHolderSlice...) // bs[4] is a NomsKind value which represents the type of leaf // collection being operated on (i.e. MapKind, SetKind, or ListKind) // bs[5] is a single uint8 value representing the number of keys // encoded in the ldb key. bs = append(bs, byte(opKind), byte(len(keys))) valuesToEncode := ValueSlice{} for _, gk := range keys { bs = encodeGraphKey(bs, gk) if !isKindOrderedByValue(gk.Kind()) { valuesToEncode = append(valuesToEncode, gk) } } return bs, valuesToEncode } func encodeValues(bs []byte, valuesToEncode []Value) []byte { // Encode allValues into the ldbVal byte slice. bs = append(bs, uint8(len(valuesToEncode))) for _, k := range valuesToEncode { bs = encodeGraphValue(bs, k) } return bs } func encodeGraphKey(bs []byte, v Value) []byte { return encodeForGraph(bs, v, false) } func encodeGraphValue(bs []byte, v Value) []byte { return encodeForGraph(bs, v, true) } func encodeForGraph(bs []byte, v Value, asValue bool) []byte { // Note: encToSlice() and append() will both grow the backing store of |bs| // as necessary. Always call them when writing to |bs|. if asValue || isKindOrderedByValue(v.Kind()) { // if we're encoding value, then put: // noms-kind(1-byte), serialization-len(4-bytes), serialization(n-bytes) buf := [initialBufferSize]byte{} uint32buf := [4]byte{} encodedVal := encToSlice(v, buf[:]) binary.BigEndian.PutUint32(uint32buf[:], uint32(len(encodedVal))) bs = append(bs, uint8(v.Kind())) bs = append(bs, uint32buf[:]...) bs = append(bs, encodedVal...) } else { // if we're encoding hash values, we know the length, so we can leave that out bs = append(bs, uint8(v.Kind())) h := v.Hash() bs = append(bs, h[:]...) } return bs } func decodeValue(bs []byte, asValue bool, vrw ValueReadWriter) ([]byte, Value) { kind := NomsKind(bs[0]) var v Value if asValue || isKindOrderedByValue(kind) { encodedLen := binary.BigEndian.Uint32(bs[1:5]) // The bytes in bs gets reused by LDB. The data of a chunk must // never change since we are backing the values by this data. data := make([]byte, encodedLen) copy(data, bs[5:5+encodedLen]) v = DecodeFromBytes(data, vrw) return bs[5+encodedLen:], v } return bs[1+hash.ByteLen:], nil } // Note that, if 'v' are prolly trees, any in-memory child chunks will be written to vw at this time. func encToSlice(v Value, initBuf []byte) []byte { // TODO: Are there enough calls to this that it's worth re-using a nomsWriter? w := &binaryNomsWriter{initBuf, 0} v.writeTo(w) return w.data() } ================================================ FILE: go/types/opcache_compare.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "encoding/binary" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) type opCacheComparer struct{} func (opCacheComparer) Compare(a, b []byte) int { if res := bytes.Compare(a[:uint32Size], b[:uint32Size]); res != 0 { return res } return compareEncodedKeys(a[uint32Size:], b[uint32Size:]) } func (opCacheComparer) Name() string { return "noms.OpCacheComparator" } func (opCacheComparer) Successor(dst, b []byte) []byte { return nil } func (opCacheComparer) Separator(dst, a, b []byte) []byte { return nil } func compareEncodedKeys(a, b []byte) int { if compared, res := compareEmpties(a, b); compared { return res } // keys are encoded as either values: // nomsKind(1-byte) + serialized len(4-bytes) + serialized value(n-bytes) // or digests: // nomsKind(1-byte) + digest(hash.Bytelen-bytes) splitAfterFirstKey := func(bs []byte) ([]byte, []byte) { keyLen := 1 + hash.ByteLen if isKindOrderedByValue(NomsKind(bs[0])) { l := int(binary.BigEndian.Uint32(bs[1:5])) keyLen = 1 + uint32Size + l } return bs[:keyLen], bs[keyLen:] } // a[0] and b[0] represent NomsKind of leafNode being operated on // a[1] and b[1] are the number of keys encoded in this byte slice numAGraphKeys, numBGraphKeys := a[1], b[1] minNumKeys := minByte(numAGraphKeys, numBGraphKeys) a, b = a[2:], b[2:] cres := 0 for pos := 0; pos < int(minNumKeys) && cres == 0; pos++ { aKey, aRest := splitAfterFirstKey(a) bKey, bRest := splitAfterFirstKey(b) cres = compareEncodedKey(aKey, bKey) a, b = aRest, bRest } if cres == 0 { if numAGraphKeys < numBGraphKeys { return -1 } if numAGraphKeys > numBGraphKeys { return 1 } } return cres } // compareEncodedKey accepts two byte slices that each contain a number of // encoded keys. It extracts the first key in each slice and returns the result // of comparing them. func compareEncodedKey(a, b []byte) int { // keys that are orderd by value are encoded as: // NomsKind(1-byte) + length(4-bytes) + encoding(n-bytes) // keys that are not ordred by value are encoded as // NomsKind(1-byte) + hash digest(20-bytes) aKind, bKind := NomsKind(a[0]), NomsKind(b[0]) if !isKindOrderedByValue(aKind) && !isKindOrderedByValue(bKind) { a, b := a[1:], b[1:] d.PanicIfFalse(len(a) == hash.ByteLen && len(b) == hash.ByteLen) res := bytes.Compare(a, b) if res == 0 && aKind != bKind { d.Panic("Values of different kinds with the same hash. Whaa??") } return res } // Now, we know that at least one of a and b is ordered by value. So if the // kinds are different, we can sort just by comparing them. if res := compareKinds(aKind, bKind); res != 0 { return res } // Now we know that we are comparing two values that are both Bools, Numbers, // or Strings. Extract their length and create slices that just contain their // Noms encodings. lenA := binary.BigEndian.Uint32(a[1:5]) lenB := binary.BigEndian.Uint32(b[1:5]) // create a1, b1 slices that just contain encoding a1, b1 := a[1+uint32Size:1+uint32Size+lenA], b[1+uint32Size:1+uint32Size+lenB] return compareEncodedNomsValues(a1, b1) } // compareEncodedNomsValues compares two slices. Each slice contains a first // byte that holds the nomsKind of the original key and an encoding for that key. // This method relies on knowledge about how bytes are arranged in a Noms // encoding and makes use of that for companing values efficiently. func compareEncodedNomsValues(a, b []byte) int { if compared, res := compareEmpties(a, b); compared { return res } aKind, bKind := NomsKind(a[0]), NomsKind(b[0]) if aKind != bKind { d.Panic("compareEncodedNomsValues, aKind: %s != bKind: %s", aKind, bKind) } switch aKind { case BoolKind: return bytes.Compare(a, b) case NumberKind: reader := binaryNomsReader{a[1:], 0} aNum := reader.readNumber() reader.buff, reader.offset = b[1:], 0 bNum := reader.readNumber() if aNum == bNum { return 0 } if aNum < bNum { return -1 } return 1 case StringKind: // Skip past uvarint-encoded string length _, aCount := binary.Uvarint(a[1:]) _, bCount := binary.Uvarint(b[1:]) res := bytes.Compare(a[1+aCount:], b[1+bCount:]) return res } panic("unreachable") } func compareEmpties(a, b []byte) (bool, int) { aLen, bLen := len(a), len(b) if aLen > 0 && bLen > 0 { return false, 0 } if aLen == 0 { if bLen == 0 { return true, 0 } return true, -1 } return true, 1 } func compareKinds(aKind, bKind NomsKind) (res int) { if aKind < bKind { res = -1 } else if aKind > bKind { res = 1 } return } func minByte(a, b byte) byte { if a < b { return a } return b } ================================================ FILE: go/types/opcache_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "sort" "testing" "github.com/attic-labs/noms/go/d" "github.com/stretchr/testify/suite" ) func TestOpCache(t *testing.T) { suite.Run(t, &OpCacheSuite{}) } type OpCacheSuite struct { suite.Suite vs *ValueStore } func (suite *OpCacheSuite) SetupTest() { suite.vs = newTestValueStore() } func (suite *OpCacheSuite) TearDownTest() { suite.vs.Close() } func (suite *OpCacheSuite) TestMapSet() { vs := suite.vs opCacheStore := newLdbOpCacheStore(vs) oc := opCacheStore.opCache() defer opCacheStore.destroy() entries := mapEntrySlice{ {NewList(vs, Number(8), Number(0)), String("ahoy")}, {String("A key"), NewBlob(vs, bytes.NewBufferString("A value"))}, {Number(1), Bool(true)}, {Bool(false), Number(1)}, {NewBlob(vs, bytes.NewBuffer([]byte{0xff, 0, 0})), NewMap(vs)}, {Bool(true), Number(42)}, {NewStruct("thing1", StructData{"a": Number(7)}), Number(42)}, {String("struct"), NewStruct("thing2", nil)}, {Number(42), String("other")}, } for _, entry := range entries { oc.GraphMapSet(nil, entry.key, entry.value) } sort.Sort(entries) iterated := mapEntrySlice{} iter := oc.NewIterator() defer iter.Release() for iter.Next() { keys, kind, item := iter.GraphOp() d.Chk.Empty(keys) d.Chk.Equal(MapKind, kind) iterated = append(iterated, item.(mapEntry)) } suite.True(entries.Equals(iterated)) } func (suite *OpCacheSuite) TestSetInsert() { vs := suite.vs opCacheStore := newLdbOpCacheStore(vs) oc := opCacheStore.opCache() defer opCacheStore.destroy() entries := ValueSlice{ NewList(vs, Number(8), Number(0)), String("ahoy"), NewBlob(vs, bytes.NewBufferString("A value")), Number(1), Bool(true), Bool(false), NewBlob(vs, bytes.NewBuffer([]byte{0xff, 0, 0})), NewMap(vs), Number(42), NewStruct("thing1", StructData{"a": Number(7)}), String("struct"), NewStruct("thing2", nil), String("other"), } for _, entry := range entries { oc.GraphSetInsert(nil, entry) } sort.Sort(entries) iterated := ValueSlice{} iter := oc.NewIterator() defer iter.Release() for iter.Next() { keys, kind, item := iter.GraphOp() d.Chk.Empty(keys) d.Chk.Equal(SetKind, kind) iterated = append(iterated, item.(Value)) } suite.True(entries.Equals(iterated)) } func (suite *OpCacheSuite) TestListAppend() { vs := suite.vs opCacheStore := newLdbOpCacheStore(vs) oc := opCacheStore.opCache() defer opCacheStore.destroy() entries := ValueSlice{ NewList(vs, Number(8), Number(0)), String("ahoy"), NewBlob(vs, bytes.NewBufferString("A value")), Number(1), Bool(true), Bool(false), NewBlob(vs, bytes.NewBuffer([]byte{0xff, 0, 0})), NewMap(vs), Number(42), NewStruct("thing1", StructData{"a": Number(7)}), String("struct"), NewStruct("thing2", nil), String("other"), } for _, entry := range entries { oc.GraphListAppend(nil, entry) } iterated := ValueSlice{} iter := oc.NewIterator() defer iter.Release() for iter.Next() { keys, kind, item := iter.GraphOp() d.Chk.Empty(keys) d.Chk.Equal(ListKind, kind) iterated = append(iterated, item.(Value)) } suite.True(entries.Equals(iterated)) } ================================================ FILE: go/types/ordered_sequences.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" ) type orderedSequence interface { sequence getKey(idx int) orderedKey search(key orderedKey) int } func newSetMetaSequence(level uint64, tuples []metaTuple, vrw ValueReadWriter) metaSequence { return newMetaSequenceFromTuples(SetKind, level, tuples, vrw) } func newMapMetaSequence(level uint64, tuples []metaTuple, vrw ValueReadWriter) metaSequence { return newMetaSequenceFromTuples(MapKind, level, tuples, vrw) } func newCursorAtValue(seq orderedSequence, val Value, forInsertion bool, last bool) *sequenceCursor { var key orderedKey if val != nil { key = newOrderedKey(val) } return newCursorAt(seq, key, forInsertion, last) } func newCursorAt(seq orderedSequence, key orderedKey, forInsertion bool, last bool) *sequenceCursor { var cur *sequenceCursor for { idx := 0 if last { idx = -1 } cur = newSequenceCursor(cur, seq, idx) if key != emptyKey { if !seekTo(cur, key, forInsertion && !seq.isLeaf()) { return cur } } cs := cur.getChildSequence() if cs == nil { break } seq = cs.(orderedSequence) } d.PanicIfFalse(cur != nil) return cur } func seekTo(cur *sequenceCursor, key orderedKey, lastPositionIfNotFound bool) bool { seq := cur.seq.(orderedSequence) // Find smallest idx in seq where key(idx) >= key cur.idx = seq.search(key) seqLen := seq.seqLen() if cur.idx == seqLen && lastPositionIfNotFound { d.PanicIfFalse(cur.idx > 0) cur.idx-- } return cur.idx < seqLen } // Gets the key used for ordering the sequence at current index. func getCurrentKey(cur *sequenceCursor) orderedKey { seq, ok := cur.seq.(orderedSequence) if !ok { d.Panic("need an ordered sequence here") } return seq.getKey(cur.idx) } func getMapValue(cur *sequenceCursor) Value { if ml, ok := cur.seq.(mapLeafSequence); ok { return ml.getValue(cur.idx) } return nil } // If |vw| is not nil, chunks will be eagerly written as they're created. Otherwise they are // written when the root is written. func newOrderedMetaSequenceChunkFn(kind NomsKind, vrw ValueReadWriter) makeChunkFn { return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { tuples := make([]metaTuple, len(items)) numLeaves := uint64(0) var lastKey orderedKey for i, v := range items { mt := v.(metaTuple) key := mt.key() d.PanicIfFalse(lastKey == emptyKey || lastKey.Less(key)) lastKey = key tuples[i] = mt // chunk is written when the root sequence is written numLeaves += mt.numLeaves() } var col Collection if kind == SetKind { col = newSet(newSetMetaSequence(level, tuples, vrw)) } else { d.PanicIfFalse(MapKind == kind) col = newMap(newMapMetaSequence(level, tuples, vrw)) } return col, tuples[len(tuples)-1].key(), numLeaves } } ================================================ FILE: go/types/ordered_sequences_diff.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sync" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/functions" ) type DiffChangeType uint8 const ( DiffChangeAdded DiffChangeType = iota DiffChangeRemoved DiffChangeModified ) type ValueChanged struct { ChangeType DiffChangeType Key, OldValue, NewValue Value } func sendChange(changes chan<- ValueChanged, stopChan <-chan struct{}, change ValueChanged) bool { select { case changes <- change: return true case <-stopChan: return false } } // Streams the diff from |last| to |current| into |changes|, using both left-right and top-down approach in parallel. // The left-right diff is expected to return results earlier, whereas the top-down approach is faster overall. This "best" algorithm runs both: // - early results from left-right are sent to |changes|. // - if/when top-down catches up, left-right is stopped and the rest of the changes are streamed from top-down. func orderedSequenceDiffBest(last orderedSequence, current orderedSequence, changes chan<- ValueChanged, stopChan <-chan struct{}) bool { lrChanges := make(chan ValueChanged) tdChanges := make(chan ValueChanged) // Give the stop channels a buffer size of 1 so that they won't block (see below). lrStopChan := make(chan struct{}, 1) tdStopChan := make(chan struct{}, 1) // Ensure all diff functions have finished doing work by the time this function returns, otherwise database reads might cause deadlock - e.g. https://github.com/attic-labs/noms/issues/2165. wg := &sync.WaitGroup{} defer func() { // Stop diffing. The left-right or top-down diff might have already finished, but sending to the stop channels won't block due to the buffer. lrStopChan <- struct{}{} tdStopChan <- struct{}{} wg.Wait() }() wg.Add(2) go func() { defer wg.Done() orderedSequenceDiffLeftRight(last, current, lrChanges, lrStopChan) close(lrChanges) }() go func() { defer wg.Done() orderedSequenceDiffTopDown(last, current, tdChanges, tdStopChan) close(tdChanges) }() // Stream left-right changes while the top-down diff algorithm catches up. var lrChangeCount, tdChangeCount int for multiplexing := true; multiplexing; { select { case <-stopChan: return false case c, ok := <-lrChanges: if !ok { // Left-right diff completed. return true } lrChangeCount++ if !sendChange(changes, stopChan, c) { return false } case c, ok := <-tdChanges: if !ok { // Top-down diff completed. return true } tdChangeCount++ if tdChangeCount > lrChangeCount { // Top-down diff has overtaken left-right diff. if !sendChange(changes, stopChan, c) { return false } lrStopChan <- struct{}{} multiplexing = false } } } for c := range tdChanges { if !sendChange(changes, stopChan, c) { return false } } return true } // Streams the diff from |last| to |current| into |changes|, using a top-down approach. // Top-down is parallel and efficiently returns the complete diff, but compared to left-right it's slow to start streaming changes. func orderedSequenceDiffTopDown(last orderedSequence, current orderedSequence, changes chan<- ValueChanged, stopChan <-chan struct{}) bool { return orderedSequenceDiffInternalNodes(last, current, changes, stopChan) } // TODO - something other than the literal edit-distance, which is way too much cpu work for this case - https://github.com/attic-labs/noms/issues/2027 func orderedSequenceDiffInternalNodes(last orderedSequence, current orderedSequence, changes chan<- ValueChanged, stopChan <-chan struct{}) bool { if last.treeLevel() > current.treeLevel() { lastChild := last.getCompositeChildSequence(0, uint64(last.seqLen())).(orderedSequence) return orderedSequenceDiffInternalNodes(lastChild, current, changes, stopChan) } if current.treeLevel() > last.treeLevel() { currentChild := current.getCompositeChildSequence(0, uint64(current.seqLen())).(orderedSequence) return orderedSequenceDiffInternalNodes(last, currentChild, changes, stopChan) } if last.isLeaf() && current.isLeaf() { return orderedSequenceDiffLeftRight(last, current, changes, stopChan) } compareFn := last.getCompareFn(current) initialSplices := calcSplices(uint64(last.seqLen()), uint64(current.seqLen()), DEFAULT_MAX_SPLICE_MATRIX_SIZE, func(i uint64, j uint64) bool { return compareFn(int(i), int(j)) }) for _, splice := range initialSplices { var lastChild, currentChild orderedSequence functions.All( func() { lastChild = last.getCompositeChildSequence(splice.SpAt, splice.SpRemoved).(orderedSequence) }, func() { currentChild = current.getCompositeChildSequence(splice.SpFrom, splice.SpAdded).(orderedSequence) }, ) if ok := orderedSequenceDiffInternalNodes(lastChild, currentChild, changes, stopChan); !ok { return false } } return true } // Streams the diff from |last| to |current| into |changes|, using a left-right approach. // Left-right immediately descends to the first change and starts streaming changes, but compared to top-down it's serial and much slower to calculate the full diff. func orderedSequenceDiffLeftRight(last orderedSequence, current orderedSequence, changes chan<- ValueChanged, stopChan <-chan struct{}) bool { lastCur := newCursorAt(last, emptyKey, false, false) currentCur := newCursorAt(current, emptyKey, false, false) for lastCur.valid() && currentCur.valid() { fastForward(lastCur, currentCur) for lastCur.valid() && currentCur.valid() && !lastCur.seq.getCompareFn(currentCur.seq)(lastCur.idx, currentCur.idx) { lastKey := getCurrentKey(lastCur) currentKey := getCurrentKey(currentCur) if currentKey.Less(lastKey) { if !sendChange(changes, stopChan, ValueChanged{DiffChangeAdded, currentKey.v, nil, getMapValue(currentCur)}) { return false } currentCur.advance() } else if lastKey.Less(currentKey) { if !sendChange(changes, stopChan, ValueChanged{DiffChangeRemoved, lastKey.v, getMapValue(lastCur), nil}) { return false } lastCur.advance() } else { if !sendChange(changes, stopChan, ValueChanged{DiffChangeModified, lastKey.v, getMapValue(lastCur), getMapValue(currentCur)}) { return false } lastCur.advance() currentCur.advance() } } } for lastCur.valid() { if !sendChange(changes, stopChan, ValueChanged{DiffChangeRemoved, getCurrentKey(lastCur).v, getMapValue(lastCur), nil}) { return false } lastCur.advance() } for currentCur.valid() { if !sendChange(changes, stopChan, ValueChanged{DiffChangeAdded, getCurrentKey(currentCur).v, nil, getMapValue(currentCur)}) { return false } currentCur.advance() } return true } /** * Advances |a| and |b| past their common sequence of equal values. */ func fastForward(a *sequenceCursor, b *sequenceCursor) { if a.valid() && b.valid() { doFastForward(true, a, b) } } func syncWithIdx(cur *sequenceCursor, hasMore bool, allowPastEnd bool) { cur.sync() if hasMore { cur.idx = 0 } else if allowPastEnd { cur.idx = cur.length() } else { cur.idx = cur.length() - 1 } } /* * Returns an array matching |a| and |b| respectively to whether that cursor has more values. */ func doFastForward(allowPastEnd bool, a *sequenceCursor, b *sequenceCursor) (aHasMore bool, bHasMore bool) { d.PanicIfFalse(a.valid()) d.PanicIfFalse(b.valid()) aHasMore = true bHasMore = true for aHasMore && bHasMore && isCurrentEqual(a, b) { if nil != a.parent && nil != b.parent && isCurrentEqual(a.parent, b.parent) { // Key optimisation: if the sequences have common parents, then entire chunks can be // fast-forwarded without reading unnecessary data. aHasMore, bHasMore = doFastForward(false, a.parent, b.parent) syncWithIdx(a, aHasMore, allowPastEnd) syncWithIdx(b, bHasMore, allowPastEnd) } else { aHasMore = a.advanceMaybeAllowPastEnd(allowPastEnd) bHasMore = b.advanceMaybeAllowPastEnd(allowPastEnd) } } return aHasMore, bHasMore } func isCurrentEqual(a *sequenceCursor, b *sequenceCursor) bool { return a.seq.getCompareFn(b.seq)(a.idx, b.idx) } ================================================ FILE: go/types/ordered_sequences_diff_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) const ( lengthOfNumbersTest = 1000 ) type diffFn func(last orderedSequence, current orderedSequence, changes chan<- ValueChanged, closeChan <-chan struct{}) bool type diffTestSuite struct { suite.Suite from1, to1, by1 int from2, to2, by2 int numAddsExpected int numRemovesExpected int numModifiedExpected int added ValueSlice removed ValueSlice modified ValueSlice } func newDiffTestSuite(from1, to1, by1, from2, to2, by2, numAddsExpected, numRemovesExpected, numModifiedExpected int) *diffTestSuite { return &diffTestSuite{ from1: from1, to1: to1, by1: by1, from2: from2, to2: to2, by2: by2, numAddsExpected: numAddsExpected, numRemovesExpected: numRemovesExpected, numModifiedExpected: numModifiedExpected, } } func accumulateOrderedSequenceDiffChanges(o1, o2 orderedSequence, df diffFn) (added []Value, removed []Value, modified []Value) { changes := make(chan ValueChanged) closeChan := make(chan struct{}) go func() { df(o1, o2, changes, closeChan) close(changes) }() for change := range changes { if change.ChangeType == DiffChangeAdded { added = append(added, change.Key) } else if change.ChangeType == DiffChangeRemoved { removed = append(removed, change.Key) } else { modified = append(modified, change.Key) } } return } func (suite *diffTestSuite) TestDiff() { vs := newTestValueStore() type valFn func(int, int, int) ValueSlice type colFn func([]Value) Collection notNil := func(vs []Value) bool { for _, v := range vs { if v == nil { return false } } return true } runTestDf := func(name string, vf valFn, cf colFn, df diffFn) { col1 := cf(vf(suite.from1, suite.to1, suite.by1)) col2 := cf(vf(suite.from2, suite.to2, suite.by2)) suite.added, suite.removed, suite.modified = accumulateOrderedSequenceDiffChanges( col1.asSequence().(orderedSequence), col2.asSequence().(orderedSequence), df) suite.Equal(suite.numAddsExpected, len(suite.added), "test %s: num added is not as expected", name) suite.Equal(suite.numRemovesExpected, len(suite.removed), "test %s: num removed is not as expected", name) suite.Equal(suite.numModifiedExpected, len(suite.modified), "test %s: num modified is not as expected", name) suite.True(notNil(suite.added), "test %s: added has nil values", name) suite.True(notNil(suite.removed), "test %s: removed has nil values", name) suite.True(notNil(suite.modified), "test %s: modified has nil values", name) } runTest := func(name string, vf valFn, cf colFn) { runTestDf(name, vf, cf, orderedSequenceDiffTopDown) runTestDf(name, vf, cf, orderedSequenceDiffLeftRight) runTestDf(name, vf, cf, orderedSequenceDiffBest) } newSetAsCol := func(vals []Value) Collection { return NewSet(vs, vals...) } newMapAsCol := func(vals []Value) Collection { return NewMap(vs, vals...) } rw := func(col Collection) Collection { h := vs.WriteValue(col).TargetHash() vs.Commit(vs.Root(), vs.Root()) return vs.ReadValue(h).(Collection) } newSetAsColRw := func(vs []Value) Collection { return rw(newSetAsCol(vs)) } newMapAsColRw := func(vs []Value) Collection { return rw(newMapAsCol(vs)) } runTest("set of numbers", generateNumbersAsValuesFromToBy, newSetAsCol) runTest("set of numbers (rw)", generateNumbersAsValuesFromToBy, newSetAsColRw) runTest("set of structs", generateNumbersAsStructsFromToBy, newSetAsCol) runTest("set of structs (rw)", generateNumbersAsStructsFromToBy, newSetAsColRw) suite.to1 *= 2 suite.to2 *= 2 runTest("map of numbers", generateNumbersAsValuesFromToBy, newMapAsCol) runTest("map of structs", generateNumbersAsStructsFromToBy, newMapAsColRw) runTest("map of numbers (rw)", generateNumbersAsValuesFromToBy, newMapAsCol) runTest("map of structs (rw)", generateNumbersAsStructsFromToBy, newMapAsColRw) } func TestOrderedSequencesIdentical(t *testing.T) { ts := newDiffTestSuite( 0, lengthOfNumbersTest, 1, 0, lengthOfNumbersTest, 1, 0, 0, 0) suite.Run(t, ts) } func TestOrderedSequencesSubset(t *testing.T) { ts1 := newDiffTestSuite( 0, lengthOfNumbersTest, 1, 0, lengthOfNumbersTest/2, 1, 0, lengthOfNumbersTest/2, 0) ts2 := newDiffTestSuite( 0, lengthOfNumbersTest/2, 1, 0, lengthOfNumbersTest, 1, lengthOfNumbersTest/2, 0, 0) suite.Run(t, ts1) suite.Run(t, ts2) ts1.True(ts1.added.Equals(ts2.removed), "added and removed in reverse order diff") ts1.True(ts1.removed.Equals(ts2.added), "removed and added in reverse order diff") } func TestOrderedSequencesDisjoint(t *testing.T) { ts1 := newDiffTestSuite( 0, lengthOfNumbersTest, 2, 1, lengthOfNumbersTest, 2, lengthOfNumbersTest/2, lengthOfNumbersTest/2, 0) ts2 := newDiffTestSuite( 1, lengthOfNumbersTest, 2, 0, lengthOfNumbersTest, 2, lengthOfNumbersTest/2, lengthOfNumbersTest/2, 0) suite.Run(t, ts1) suite.Run(t, ts2) ts1.True(ts1.added.Equals(ts2.removed), "added and removed in disjoint diff") ts1.True(ts1.removed.Equals(ts2.added), "removed and added in disjoint diff") } func TestOrderedSequencesDiffCloseWithoutReading(t *testing.T) { vs := newTestValueStore() runTest := func(df diffFn) { s1 := NewSet(vs).orderedSequence // A single item should be enough, but generate lots anyway. s2 := NewSet(vs, generateNumbersAsValuesFromToBy(0, 1000, 1)...).orderedSequence changeChan := make(chan ValueChanged) closeChan := make(chan struct{}) stopChan := make(chan struct{}) go func() { df(s1, s2, changeChan, closeChan) stopChan <- struct{}{} }() closeChan <- struct{}{} <-stopChan } runTest(orderedSequenceDiffBest) runTest(orderedSequenceDiffLeftRight) runTest(orderedSequenceDiffTopDown) } func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) { assert := assert.New(t) vrw := newTestValueStore() newSetSequenceMt := func(v ...Value) metaTuple { seq := newSetLeafSequence(vrw, v...) set := newSet(seq) return newMetaTuple(vrw.WriteValue(set), newOrderedKey(v[len(v)-1]), uint64(len(v))) } m1 := newSetSequenceMt(Number(1), Number(2)) m2 := newSetSequenceMt(Number(3), Number(4)) m3 := newSetSequenceMt(Number(5), Number(6)) s1 := newSetMetaSequence(1, []metaTuple{m1, m3}, vrw) s2 := newSetMetaSequence(1, []metaTuple{m1, m2, m3}, vrw) runTest := func(df diffFn) { changes := make(chan ValueChanged) go func() { df(s1, s2, changes, nil) changes <- ValueChanged{} df(s2, s1, changes, nil) close(changes) }() expected := []ValueChanged{ {DiffChangeAdded, Number(3), nil, nil}, {DiffChangeAdded, Number(4), nil, nil}, {}, {DiffChangeRemoved, Number(3), nil, nil}, {DiffChangeRemoved, Number(4), nil, nil}, } i := 0 for c := range changes { assert.Equal(expected[i], c) i++ } assert.Equal(len(expected), i) } runTest(orderedSequenceDiffBest) runTest(orderedSequenceDiffLeftRight) runTest(orderedSequenceDiffTopDown) } ================================================ FILE: go/types/path.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "errors" "fmt" "math" "regexp" "strconv" "strings" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) // For an annotation like @type, 1st capture group is the annotation. // For @at(42), 1st capture group is the annotation and 3rd is the parameter. // Note, @at() is valid under this regexp, code should deal with the error. var annotationRe = regexp.MustCompile(`^([a-z]+)(\(([\w\-"']*)\))?`) // A Path locates a value in Noms relative to some other value. For locating // values absolutely within a database, see AbsolutePath. To locate values // globally, see Spec. // // For more details, see: // https://github.com/attic-labs/noms/blob/master/doc/spelling.md. type Path []PathPart type PathPart interface { Resolve(v Value, vr ValueReader) Value String() string } // ParsePath parses str into a Path, or returns an error if parsing failed. func ParsePath(str string) (Path, error) { if str == "" { return Path{}, errors.New("Empty path") } return constructPath(Path{}, str) } // MustParsePath parses str into a Path, or panics if parsing failed. func MustParsePath(str string) Path { p, err := ParsePath(str) if err != nil { panic(err) } return p } type keyIndexable interface { setIntoKey(v bool) keyIndexable } func constructPath(p Path, str string) (Path, error) { if len(str) == 0 { return p, nil } op, tail := str[0], str[1:] switch op { case '.': idx := fieldNameComponentRe.FindIndex([]byte(tail)) if idx == nil { return Path{}, errors.New("Invalid field: " + tail) } p = append(p, FieldPath{tail[:idx[1]]}) return constructPath(p, tail[idx[1]:]) case '[': if len(tail) == 0 { return Path{}, errors.New("Path ends in [") } idx, h, rem, err := ParsePathIndex(tail) if err != nil { return Path{}, err } if !strings.HasPrefix(rem, "]") { return Path{}, errors.New("[ is missing closing ]") } d.PanicIfTrue(idx == nil && h.IsEmpty()) d.PanicIfTrue(idx != nil && !h.IsEmpty()) if idx != nil { p = append(p, NewIndexPath(idx)) } else { p = append(p, NewHashIndexPath(h)) } return constructPath(p, rem[1:]) case '@': ann, hasArg, arg, rem := getAnnotation(tail) switch ann { case "at": if arg == "" { return Path{}, fmt.Errorf("@at annotation requires a position argument") } idx, err := strconv.ParseInt(arg, 10, 64) if err != nil { return Path{}, fmt.Errorf("Invalid position: %s", arg) } return constructPath(append(p, NewAtAnnotation(idx)), rem) case "key": if hasArg { return Path{}, fmt.Errorf("@key annotation does not support arguments") } if len(p) == 0 { return Path{}, fmt.Errorf("Cannot use @key annotation at beginning of path") } lastPart := p[len(p)-1] if ki, ok := lastPart.(keyIndexable); ok { p[len(p)-1] = ki.setIntoKey(true).(PathPart) return constructPath(p, rem) } return Path{}, fmt.Errorf("Cannot use @key annotation on: %s", lastPart.String()) case "target": if hasArg { return Path{}, fmt.Errorf("@target annotation does not support arguments") } return constructPath(append(p, TargetAnnotation{}), rem) case "type": if hasArg { return Path{}, fmt.Errorf("@type annotation does not support arguments") } return constructPath(append(p, TypeAnnotation{}), rem) default: return Path{}, fmt.Errorf("Unsupported annotation: @%s", ann) } case ']': return Path{}, errors.New("] is missing opening [") default: return Path{}, fmt.Errorf("Invalid operator: %c", op) } } // Resolve resolves a path relative to some value. // A ValueReader is required to resolve paths that contain the @target annotation. func (p Path) Resolve(v Value, vr ValueReader) (resolved Value) { resolved = v for _, part := range p { if resolved == nil { break } resolved = part.Resolve(resolved, vr) } return } func (p Path) Equals(o Path) bool { if len(p) != len(o) { return false } for i, pp := range p { if pp != o[i] { return false } } return true } // Append makes a copy of a p and appends the PathPart 'pp' to it. func (p Path) Append(pp PathPart) Path { p1 := make(Path, len(p), len(p)+1) copy(p1, p) return append(p1, pp) } func (p Path) String() string { strs := make([]string, 0, len(p)) for _, part := range p { strs = append(strs, part.String()) } return strings.Join(strs, "") } func (p Path) IsEmpty() bool { return len(p) == 0 } // FieldPath references Struct field values by name. type FieldPath struct { // The name of the field, e.g. `.Name`. Name string } func NewFieldPath(name string) FieldPath { return FieldPath{name} } func (fp FieldPath) Resolve(v Value, vr ValueReader) Value { switch v := v.(type) { case Struct: if sv, ok := v.MaybeGet(fp.Name); ok { return sv } case *Type: if desc, ok := v.Desc.(StructDesc); ok { if df, _ := desc.Field(fp.Name); df != nil { return df } } } return nil } func (fp FieldPath) String() string { return fmt.Sprintf(".%s", fp.Name) } // IndexPath ndexes into Maps and Lists by key or index. type IndexPath struct { // The value of the index, e.g. `[42]` or `["value"]`. If Index is a negative // number and the path is resolved in a List, it means index from the back. Index Value // Whether this index should resolve to the key of a map, given by a `@key` // annotation. Typically IntoKey is false, and indices would resolve to the // values. E.g. given `{a: 42}` then `["a"]` resolves to `42`. If IntoKey is // true, then it resolves to `"a"`. For IndexPath this isn't particularly // useful - it's mostly provided for consistency with HashIndexPath - but // note that given `{a: 42}` then `["b"]` resolves to nil, not `"b"`. IntoKey bool } func NewIndexPath(idx Value) IndexPath { return newIndexPath(idx, false) } func NewIndexIntoKeyPath(idx Value) IndexPath { return newIndexPath(idx, true) } func ValueCanBePathIndex(v Value) bool { k := v.Kind() return k == StringKind || k == BoolKind || k == NumberKind } func newIndexPath(idx Value, intoKey bool) IndexPath { d.PanicIfFalse(ValueCanBePathIndex(idx)) return IndexPath{idx, intoKey} } func (ip IndexPath) Resolve(v Value, vr ValueReader) Value { seqIndex := func(getter func(i uint64) Value) Value { n, ok := ip.Index.(Number) if !ok { return nil } f := float64(n) if f != math.Trunc(f) { return nil } ai, ok := getAbsoluteIndex(v, int64(f)) if !ok { return nil } if ip.IntoKey { return Number(ai) } return getter(ai) } switch v := v.(type) { case List: return seqIndex(func(i uint64) Value { return v.Get(i) }) case *Type: if cd, ok := v.Desc.(CompoundDesc); ok { return seqIndex(func(i uint64) Value { return cd.ElemTypes[i] }) } case Map: if !ip.IntoKey { return v.Get(ip.Index) } if v.Has(ip.Index) { return ip.Index } } return nil } func (ip IndexPath) String() (str string) { str = fmt.Sprintf("[%s]", EncodedIndexValue(ip.Index)) if ip.IntoKey { str += "@key" } return } func (ip IndexPath) setIntoKey(v bool) keyIndexable { ip.IntoKey = v return ip } // Indexes into Maps by the hash of a key, or a Set by the hash of a value. type HashIndexPath struct { // The hash of the key or value to search for. Maps and Set are ordered, so // this in O(log(size)). Hash hash.Hash // Whether this index should resolve to the key of a map, given by a `@key` // annotation. Typically IntoKey is false, and indices would resolve to the // values. E.g. given `{a: 42}` and if the hash of `"a"` is `#abcd`, then // `[#abcd]` resolves to `42`. If IntoKey is true, then it resolves to `"a"`. // This is useful for when Map keys aren't primitive values, e.g. a struct, // since struct literals can't be spelled using a Path. IntoKey bool } func NewHashIndexPath(h hash.Hash) HashIndexPath { return newHashIndexPath(h, false) } func NewHashIndexIntoKeyPath(h hash.Hash) HashIndexPath { return newHashIndexPath(h, true) } func newHashIndexPath(h hash.Hash, intoKey bool) HashIndexPath { d.PanicIfTrue(h.IsEmpty()) return HashIndexPath{h, intoKey} } func (hip HashIndexPath) Resolve(v Value, vr ValueReader) (res Value) { var seq orderedSequence var getCurrentValue func(cur *sequenceCursor) Value switch v := v.(type) { case Set: // Unclear what the behavior should be if |hip.IntoKey| is true, but ignoring it for sets is arguably correct. seq = v.orderedSequence getCurrentValue = func(cur *sequenceCursor) Value { return cur.current().(Value) } case Map: seq = v.orderedSequence if hip.IntoKey { getCurrentValue = func(cur *sequenceCursor) Value { return cur.current().(mapEntry).key } } else { getCurrentValue = func(cur *sequenceCursor) Value { return cur.current().(mapEntry).value } } default: return nil } cur := newCursorAt(seq, orderedKeyFromHash(hip.Hash), false, false) if !cur.valid() { return nil } if getCurrentKey(cur).h != hip.Hash { return nil } return getCurrentValue(cur) } func (hip HashIndexPath) String() (str string) { str = fmt.Sprintf("[#%s]", hip.Hash.String()) if hip.IntoKey { str += "@key" } return } func (hip HashIndexPath) setIntoKey(v bool) keyIndexable { hip.IntoKey = v return hip } // Parse a Noms value from the path index syntax. // 4 -> types.Number // "4" -> types.String // true|false -> types.Boolean // # -> hash.Hash func ParsePathIndex(str string) (idx Value, h hash.Hash, rem string, err error) { Switch: switch str[0] { case '"': // String is complicated because ] might be quoted, and " or \ might be escaped. stringBuf := bytes.Buffer{} i := 1 for ; i < len(str); i++ { c := str[i] if c == '"' { i++ break } if c == '\\' && i < len(str)-1 { i++ c = str[i] if c != '\\' && c != '"' { err = errors.New(`Only " and \ can be escaped`) break Switch } } stringBuf.WriteByte(c) } idx = String(stringBuf.String()) rem = str[i:] default: idxStr := str sepIdx := strings.Index(str, "]") if sepIdx >= 0 { idxStr = str[:sepIdx] rem = str[sepIdx:] } if len(idxStr) == 0 { err = errors.New("Empty index value") } else if idxStr[0] == '#' { hashStr := idxStr[1:] h, _ = hash.MaybeParse(hashStr) if h.IsEmpty() { err = errors.New("Invalid hash: " + hashStr) } } else if idxStr == "true" { idx = Bool(true) } else if idxStr == "false" { idx = Bool(false) } else if i, err2 := strconv.ParseFloat(idxStr, 64); err2 == nil { // Should we be more strict here? ParseFloat allows leading and trailing dots, and exponents. idx = Number(i) } else { err = errors.New("Invalid index: " + idxStr) } } return } // TypeAnnotation is a PathPart annotation to resolve to the type of the value // it's resolved in. type TypeAnnotation struct { } func (ann TypeAnnotation) Resolve(v Value, vr ValueReader) Value { return TypeOf(v) } func (ann TypeAnnotation) String() string { return "@type" } // TargetAnnotation is a PathPart annotation to resolve to the targetValue of the Ref it is resolved on. type TargetAnnotation struct { } func (ann TargetAnnotation) Resolve(v Value, vr ValueReader) Value { if vr == nil { d.Panic("@target annotation requires a database to resolve against") } if r, ok := v.(Ref); ok { return r.TargetValue(vr) } else { return nil } } func (ann TargetAnnotation) String() string { return "@target" } // AtAnnotation is a PathPart annotation that gets the value of a collection at // a position, rather than a key. This is equivalent to IndexPath for lists, // but different for sets and maps. type AtAnnotation struct { // Index is the position to resolve at. If negative, it means an index // relative to the end of the collection. Index int64 // IntoKey see IndexPath.IntoKey. IntoKey bool } func NewAtAnnotation(idx int64) AtAnnotation { return AtAnnotation{idx, false} } func NewAtAnnotationIntoKeyPath(idx int64) AtAnnotation { return AtAnnotation{idx, true} } func (ann AtAnnotation) Resolve(v Value, vr ValueReader) Value { ai, ok := getAbsoluteIndex(v, ann.Index) if !ok { return nil } switch v := v.(type) { case List: if !ann.IntoKey { return v.Get(ai) } case Set: return v.At(ai) case Map: k, mapv := v.At(ai) if ann.IntoKey { return k } return mapv case *Type: if cd, ok := v.Desc.(CompoundDesc); ok { return cd.ElemTypes[ai] } } return nil } func (ann AtAnnotation) String() (str string) { str = fmt.Sprintf("@at(%d)", ann.Index) if ann.IntoKey { str += "@key" } return } func (ann AtAnnotation) setIntoKey(v bool) keyIndexable { ann.IntoKey = v return ann } func getAnnotation(str string) (ann string, hasArg bool, arg, rem string) { parts := annotationRe.FindStringSubmatch(str) if parts == nil { return } ann = parts[1] hasArg = parts[2] != "" arg = parts[3] rem = str[len(parts[0]):] return } func getAbsoluteIndex(v Value, relIdx int64) (absIdx uint64, ok bool) { var l uint64 switch v := v.(type) { case Collection: l = v.Len() case *Type: if cd, cdOK := v.Desc.(CompoundDesc); cdOK { l = uint64(len(cd.ElemTypes)) } else { return } default: return } if relIdx < 0 { if uint64(-relIdx) > l { return } absIdx = l - uint64(-relIdx) } else { if uint64(relIdx) >= l { return } absIdx = uint64(relIdx) } ok = true return } ================================================ FILE: go/types/path_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "fmt" "testing" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func hashIdx(v Value) string { return fmt.Sprintf("[#%s]", v.Hash().String()) } func assertResolvesTo(assert *assert.Assertions, expect, ref Value, str string) { assertResolvesToWithVR(assert, expect, ref, str, nil) } func assertResolvesToWithVR(assert *assert.Assertions, expect, ref Value, str string, vr ValueReader) { p, err := ParsePath(str) assert.NoError(err) actual := p.Resolve(ref, vr) if expect == nil { if actual != nil { assert.Fail("", "Expected nil, but got %s", EncodedValue(actual)) } } else if actual == nil { assert.Fail("", "Expected %s, but got nil", EncodedValue(expect)) } else { assert.True(expect.Equals(actual), "Expected %s, but got %s", EncodedValue(expect), EncodedValue(actual)) } } func TestPathStruct(t *testing.T) { assert := assert.New(t) v := NewStruct("", StructData{ "foo": String("foo"), "bar": Bool(false), "baz": Number(203), }) assertResolvesTo(assert, String("foo"), v, `.foo`) assertResolvesTo(assert, Bool(false), v, `.bar`) assertResolvesTo(assert, Number(203), v, `.baz`) assertResolvesTo(assert, nil, v, `.notHere`) v2 := NewStruct("", StructData{ "v1": v, }) assertResolvesTo(assert, String("foo"), v2, `.v1.foo`) assertResolvesTo(assert, Bool(false), v2, `.v1.bar`) assertResolvesTo(assert, Number(203), v2, `.v1.baz`) assertResolvesTo(assert, nil, v2, `.v1.notHere`) assertResolvesTo(assert, nil, v2, `.notHere.v1`) } func TestPathStructType(t *testing.T) { assert := assert.New(t) typ := MakeStructType("MyStruct", StructField{Name: "foo", Type: StringType}, StructField{Name: "bar", Type: BoolType}, StructField{Name: "baz", Type: NumberType}, ) assertResolvesTo(assert, StringType, typ, `.foo`) assertResolvesTo(assert, BoolType, typ, `.bar`) assertResolvesTo(assert, NumberType, typ, `.baz`) assertResolvesTo(assert, nil, typ, `.notHere`) typ2 := MakeStructType("", StructField{Name: "typ", Type: typ}, ) assertResolvesTo(assert, typ, typ2, `.typ`) assertResolvesTo(assert, StringType, typ2, `.typ.foo`) assertResolvesTo(assert, BoolType, typ2, `.typ.bar`) assertResolvesTo(assert, NumberType, typ2, `.typ.baz`) assertResolvesTo(assert, nil, typ2, `.typ.notHere`) assertResolvesTo(assert, nil, typ2, `.notHere.typ`) } func TestPathIndex(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() var v Value resolvesTo := func(expVal, expKey Value, str string) { assertResolvesTo(assert, expVal, v, str) assertResolvesTo(assert, expKey, v, str+"@key") } v = NewList(vs, Number(1), Number(3), String("foo"), Bool(false)) resolvesTo(Number(1), Number(0), "[0]") resolvesTo(Number(3), Number(1), "[1]") resolvesTo(String("foo"), Number(2), "[2]") resolvesTo(Bool(false), Number(3), "[3]") resolvesTo(nil, nil, "[4]") resolvesTo(nil, nil, "[-5]") resolvesTo(Number(1), Number(0), "[-4]") resolvesTo(Number(3), Number(1), "[-3]") resolvesTo(String("foo"), Number(2), "[-2]") resolvesTo(Bool(false), Number(3), "[-1]") v = NewMap(vs, Bool(false), Number(23), Number(1), String("foo"), Number(2.3), Number(4.5), String("two"), String("bar"), ) resolvesTo(String("foo"), Number(1), "[1]") resolvesTo(String("bar"), String("two"), `["two"]`) resolvesTo(Number(23), Bool(false), "[false]") resolvesTo(Number(4.5), Number(2.3), "[2.3]") resolvesTo(nil, nil, "[4]") } func TestPathIndexType(t *testing.T) { assert := assert.New(t) st := MakeSetType(NumberType) lt := MakeListType(st) mt := MakeMapType(st, lt) ut := MakeUnionType(lt, mt, st) assertResolvesTo(assert, NumberType, st, "[0]") assertResolvesTo(assert, NumberType, st, "[-1]") assertResolvesTo(assert, NumberType, st, "@at(0)") assertResolvesTo(assert, nil, st, "[1]") assertResolvesTo(assert, nil, st, "[-2]") assertResolvesTo(assert, st, lt, "[0]") assertResolvesTo(assert, st, lt, "[-1]") assertResolvesTo(assert, NumberType, lt, "[0][0]") assertResolvesTo(assert, NumberType, lt, "@at(0)@at(0)") assertResolvesTo(assert, nil, lt, "[1]") assertResolvesTo(assert, nil, lt, "[-2]") assertResolvesTo(assert, st, mt, "[0]") assertResolvesTo(assert, st, mt, "[-2]") assertResolvesTo(assert, lt, mt, "[1]") assertResolvesTo(assert, lt, mt, "[-1]") assertResolvesTo(assert, NumberType, mt, "[1][0][0]") assertResolvesTo(assert, NumberType, mt, "@at(1)@at(0)@at(0)") assertResolvesTo(assert, nil, mt, "[2]") assertResolvesTo(assert, nil, mt, "[-3]") assertResolvesTo(assert, lt, ut, "[0]") assertResolvesTo(assert, lt, ut, "[-3]") assertResolvesTo(assert, mt, ut, "[1]") assertResolvesTo(assert, mt, ut, "[-2]") assertResolvesTo(assert, st, ut, "[2]") assertResolvesTo(assert, st, ut, "[-1]") assertResolvesTo(assert, NumberType, ut, "[1][1][0][0]") assertResolvesTo(assert, NumberType, ut, "@at(1)@at(1)@at(0)@at(0)") assertResolvesTo(assert, nil, ut, "[3]") assertResolvesTo(assert, nil, ut, "[-4]") } func TestPathHashIndex(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() b := Bool(true) br := NewRef(b) i := Number(0) str := String("foo") l := NewList(vs, b, i, str) lr := NewRef(l) m := NewMap(vs, b, br, br, i, i, str, l, lr, lr, b, ) s := NewSet(vs, b, br, i, str, l, lr) resolvesTo := func(col, key, expVal, expKey Value) { assertResolvesTo(assert, expVal, col, hashIdx(key)) assertResolvesTo(assert, expKey, col, hashIdx(key)+"@key") } // Primitives are only addressable by their values. resolvesTo(m, b, nil, nil) resolvesTo(m, i, nil, nil) resolvesTo(m, str, nil, nil) resolvesTo(s, b, nil, nil) resolvesTo(s, i, nil, nil) resolvesTo(s, str, nil, nil) // Other values are only addressable by their hashes. resolvesTo(m, br, i, br) resolvesTo(m, l, lr, l) resolvesTo(m, lr, b, lr) resolvesTo(s, br, br, br) resolvesTo(s, l, l, l) resolvesTo(s, lr, lr, lr) // Lists cannot be addressed by hashes, obviously. resolvesTo(l, i, nil, nil) } func TestPathHashIndexOfSingletonCollection(t *testing.T) { // This test is to make sure we don't accidentally return |b| if it's the only element. assert := assert.New(t) vs := newTestValueStore() resolvesToNil := func(col, val Value) { assertResolvesTo(assert, nil, col, hashIdx(val)) } b := Bool(true) resolvesToNil(NewMap(vs, b, b), b) resolvesToNil(NewSet(vs, b), b) } func TestPathMulti(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() m1 := NewMap(vs, String("a"), String("foo"), String("b"), String("bar"), String("c"), String("car"), ) m2 := NewMap(vs, Bool(false), String("earth"), String("d"), String("dar"), m1, String("fire"), ) l := NewList(vs, m1, m2) s := NewStruct("", StructData{ "foo": l, }) assertResolvesTo(assert, l, s, `.foo`) assertResolvesTo(assert, m1, s, `.foo[0]`) assertResolvesTo(assert, String("foo"), s, `.foo[0]["a"]`) assertResolvesTo(assert, String("bar"), s, `.foo[0]["b"]`) assertResolvesTo(assert, String("car"), s, `.foo[0]["c"]`) assertResolvesTo(assert, String("foo"), s, `.foo[0]@at(0)`) assertResolvesTo(assert, String("bar"), s, `.foo[0]@at(1)`) assertResolvesTo(assert, String("car"), s, `.foo[0]@at(2)`) assertResolvesTo(assert, nil, s, `.foo[0]["x"]`) assertResolvesTo(assert, nil, s, `.foo[0]@at(3)`) assertResolvesTo(assert, nil, s, `.foo[2]["c"]`) assertResolvesTo(assert, nil, s, `.notHere[0]["c"]`) assertResolvesTo(assert, m2, s, `.foo[1]`) assertResolvesTo(assert, String("dar"), s, `.foo[1]["d"]`) assertResolvesTo(assert, String("earth"), s, `.foo[1][false]`) assertResolvesTo(assert, String("fire"), s, fmt.Sprintf(`.foo[1]%s`, hashIdx(m1))) assertResolvesTo(assert, m1, s, fmt.Sprintf(`.foo[1]%s@key`, hashIdx(m1))) assertResolvesTo(assert, String("car"), s, fmt.Sprintf(`.foo[1]%s@key["c"]`, hashIdx(m1))) assertResolvesTo(assert, String("fire"), s, `.foo[1]@at(2)`) assertResolvesTo(assert, m1, s, `.foo[1]@at(2)@key`) assertResolvesTo(assert, String("car"), s, `.foo[1]@at(2)@key@at(2)`) assertResolvesTo(assert, String("fire"), s, `.foo[1]@at(-1)`) assertResolvesTo(assert, m1, s, `.foo[1]@at(-1)@key`) assertResolvesTo(assert, String("car"), s, `.foo[1]@at(-1)@key@at(-1)`) } func TestPathParseSuccess(t *testing.T) { assert := assert.New(t) test := func(str string) { p, err := ParsePath(str) assert.NoError(err) expectStr := str switch expectStr { // Human readable serialization special cases. case "[1e4]": expectStr = "[10000]" case "[1.]": expectStr = "[1]" case "[\"line\nbreak\rreturn\"]": expectStr = `["line\nbreak\rreturn"]` } assert.Equal(expectStr, p.String()) } h := Number(42).Hash() // arbitrary hash test(".foo") test(".foo@type") test(".Q") test(".QQ") test("[true]") test("[true]@type") test("[false]") test("[false]@key") test("[false]@key@type") test("[false]@key@type@at(42)") test("[42]") test("[42]@key") test("[42]@at(-101)") test("[1e4]") test("[1.]") test("[1.345]") test(`[""]`) test(`["42"]`) test(`["42"]@key`) test("[\"line\nbreak\rreturn\"]") test(`["qu\\ote\""]`) test(`["π"]`) test(`["[[br][]acke]]ts"]`) test(`["xπy✌z"]`) test(`["ಠ_ಠ"]`) test(`["0"]["1"]["100"]`) test(".foo[0].bar[4.5][false]") test(fmt.Sprintf(".foo[#%s]", h.String())) test(fmt.Sprintf(".bar[#%s]@key", h.String())) } func TestPathParseErrors(t *testing.T) { assert := assert.New(t) test := func(str, expectError string) { p, err := ParsePath(str) assert.Equal(Path{}, p) if err != nil { assert.Equal(expectError, err.Error()) } else { assert.Fail("Expected " + expectError) } } test("", "Empty path") test(".", "Invalid field: ") test("[", "Path ends in [") test("]", "] is missing opening [") test(".#", "Invalid field: #") test(". ", "Invalid field: ") test(". invalid.field", "Invalid field: invalid.field") test(".foo.", "Invalid field: ") test(".foo.#invalid.field", "Invalid field: #invalid.field") test(".foo!", "Invalid operator: !") test(".foo!bar", "Invalid operator: !") test(".foo#", "Invalid operator: #") test(".foo#bar", "Invalid operator: #") test(".foo[", "Path ends in [") test(".foo[.bar", "Invalid index: .bar") test(".foo]", "] is missing opening [") test(".foo].bar", "] is missing opening [") test(".foo[]", "Empty index value") test(".foo[[]", "Invalid index: [") test(".foo[[]]", "Invalid index: [") test(".foo[42.1.2]", "Invalid index: 42.1.2") test(".foo[1f4]", "Invalid index: 1f4") test(".foo[hello]", "Invalid index: hello") test(".foo['hello']", "Invalid index: 'hello'") test(`.foo[\]`, `Invalid index: \`) test(`.foo[\\]`, `Invalid index: \\`) test(`.foo["hello]`, "[ is missing closing ]") test(`.foo["hello`, "[ is missing closing ]") test(`.foo["hello"`, "[ is missing closing ]") test(`.foo["`, "[ is missing closing ]") test(`.foo["\`, "[ is missing closing ]") test(`.foo["]`, "[ is missing closing ]") test(".foo[#]", "Invalid hash: ") test(".foo[#invalid]", "Invalid hash: invalid") test(`.foo["hello\nworld"]`, `Only " and \ can be escaped`) test(".foo[42]bar", "Invalid operator: b") test("#foo", "Invalid operator: #") test("!foo", "Invalid operator: !") test("@foo", "Unsupported annotation: @foo") test("@key", "Cannot use @key annotation at beginning of path") test(".foo@key", "Cannot use @key annotation on: .foo") test(".foo@key()", "@key annotation does not support arguments") test(".foo@key(42)", "@key annotation does not support arguments") test(".foo@type()", "@type annotation does not support arguments") test(".foo@type(42)", "@type annotation does not support arguments") test(".foo@at", "@at annotation requires a position argument") test(".foo@at()", "@at annotation requires a position argument") test(".foo@at(", "@at annotation requires a position argument") test(".foo@at(42", "@at annotation requires a position argument") test(fmt.Sprintf(".foo[#%s]@soup", hash.Of([]byte{42}).String()), "Unsupported annotation: @soup") } func TestPathEquals(t *testing.T) { assert := assert.New(t) equalPaths := []string{ `[1]`, `["one"]`, `.two.three`, `["yo"]@key`, } notEqualPaths := [][]string{ {`[1]`, `[2]`}, {`["one"]`, `["two"]`}, {`.two.three`, `.two.four`}, {`["yo"]@key`, `["yo"]`}, } assert.True(Path{}.Equals(Path{})) for _, s := range equalPaths { p, err := ParsePath(s) assert.NoError(err) assert.True(p.Equals(p)) } simple, err := ParsePath(`["one"].two`) assert.NoError(err) assert.False(Path{}.Equals(simple)) for _, a := range notEqualPaths { s0, s1 := a[0], a[1] p0, err := ParsePath(s0) assert.NoError(err) p1, err := ParsePath(s1) assert.NoError(err) assert.False(p0.Equals(p1)) } } func TestPathCanBePathIndex(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() assert.True(ValueCanBePathIndex(Bool(true))) assert.True(ValueCanBePathIndex(Number(5))) assert.True(ValueCanBePathIndex(String("yes"))) assert.False(ValueCanBePathIndex(NewRef(String("yes")))) assert.False(ValueCanBePathIndex(NewBlob(vs, bytes.NewReader([]byte("yes"))))) } func TestCopyPath(t *testing.T) { assert := assert.New(t) testCases := []string{ ``, `["key"]`, `["key"].field1`, `["key"]@key.field1`, } for _, s1 := range testCases { expected, err := ParsePath(s1 + `["anIndex"]`) assert.NoError(err) var p Path if s1 != "" { p, err = ParsePath(s1) } assert.NoError(err) p1 := p.Append(NewIndexPath(String("anIndex"))) if len(p) > 0 { p[0] = expected[1] // if p1 really is a copy, this shouldn't be noticed } assert.Equal(expected, p1) } } func TestMustParsePath(t *testing.T) { for _, good := range []string{".good", "[\"good\"]"} { assert.NotNil(t, MustParsePath(good)) } for _, bad := range []string{"", "bad", "[bad]", "!", "💩"} { assert.Panics(t, func() { MustParsePath(bad) }) } } func TestPathType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() m := NewMap(vs, String("string"), String("foo"), String("bool"), Bool(false), String("number"), Number(42), String("List"), NewList(vs, Number(42), String("foo")), String("Map"), NewMap(vs, Bool(true), Bool(false))) m.IterAll(func(k, cv Value) { ks := k.(String) assertResolvesTo(assert, TypeOf(cv), m, fmt.Sprintf("[\"%s\"]@type", ks)) }) assertResolvesTo(assert, StringType, m, `["string"]@key@type`) assertResolvesTo(assert, TypeOf(m), m, `@type`) s := NewStruct("", StructData{ "str": String("foo"), "num": Number(42), }) assertResolvesTo(assert, TypeOf(s.Get("str")), s, ".str@type") assertResolvesTo(assert, TypeOf(s.Get("num")), s, ".num@type") } func TestPathTarget(t *testing.T) { assert := assert.New(t) s := NewStruct("", StructData{ "foo": String("bar"), }) vs := newTestValueStore() r := vs.WriteValue(s) s2 := NewStruct("", StructData{ "ref": r, }) assertResolvesToWithVR(assert, nil, String("notref"), `@target`, vs) assertResolvesToWithVR(assert, s, r, `@target`, vs) assertResolvesToWithVR(assert, String("bar"), r, `@target.foo`, vs) assertResolvesToWithVR(assert, String("bar"), s2, `.ref@target.foo`, vs) } func TestPathAtAnnotation(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() var v Value resolvesTo := func(expVal, expKey Value, str string) { assertResolvesTo(assert, expVal, v, str) assertResolvesTo(assert, expKey, v, str+"@key") } v = NewList(vs, Number(1), Number(3), String("foo"), Bool(false)) resolvesTo(Number(1), nil, "@at(0)") resolvesTo(Number(3), nil, "@at(1)") resolvesTo(String("foo"), nil, "@at(2)") resolvesTo(Bool(false), nil, "@at(3)") resolvesTo(nil, nil, "@at(4)") resolvesTo(nil, nil, "@at(-5)") resolvesTo(Number(1), nil, "@at(-4)") resolvesTo(Number(3), nil, "@at(-3)") resolvesTo(String("foo"), nil, "@at(-2)") resolvesTo(Bool(false), nil, "@at(-1)") v = NewSet(vs, Bool(false), Number(1), Number(2.3), String("two"), ) resolvesTo(Bool(false), Bool(false), "@at(0)") resolvesTo(Number(1), Number(1), "@at(1)") resolvesTo(Number(2.3), Number(2.3), "@at(2)") resolvesTo(String("two"), String("two"), `@at(3)`) resolvesTo(nil, nil, "@at(4)") resolvesTo(nil, nil, "@at(-5)") resolvesTo(Bool(false), Bool(false), "@at(-4)") resolvesTo(Number(1), Number(1), "@at(-3)") resolvesTo(Number(2.3), Number(2.3), "@at(-2)") resolvesTo(String("two"), String("two"), `@at(-1)`) v = NewMap(vs, Bool(false), Number(23), Number(1), String("foo"), Number(2.3), Number(4.5), String("two"), String("bar"), ) resolvesTo(Number(23), Bool(false), "@at(0)") resolvesTo(String("foo"), Number(1), "@at(1)") resolvesTo(Number(4.5), Number(2.3), "@at(2)") resolvesTo(String("bar"), String("two"), `@at(3)`) resolvesTo(nil, nil, "@at(4)") resolvesTo(nil, nil, "@at(-5)") resolvesTo(Number(23), Bool(false), "@at(-4)") resolvesTo(String("foo"), Number(1), "@at(-3)") resolvesTo(Number(4.5), Number(2.3), "@at(-2)") resolvesTo(String("bar"), String("two"), `@at(-1)`) } ================================================ FILE: go/types/perf/dummy.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package perf // go build fails if there are _test.go but no other go files in a directory. ================================================ FILE: go/types/perf/perf_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package perf import ( "io" "io/ioutil" "math/rand" "os" "testing" "github.com/attic-labs/noms/go/perf/suite" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) type perfSuite struct { suite.PerfSuite r *rand.Rand ds string } func (s *perfSuite) SetupSuite() { s.r = rand.New(rand.NewSource(0)) } func (s *perfSuite) Test01BuildList10mNumbers() { assert := s.NewAssert() in := make(chan types.Value, 16) out := types.NewStreamingList(s.Database, in) for i := 0; i < 1e7; i++ { in <- types.Number(s.r.Int63()) } close(in) ds := s.Database.GetDataset("BuildList10mNumbers") var err error ds, err = s.Database.CommitValue(ds, <-out) assert.NoError(err) s.Database = ds.Database() } func (s *perfSuite) Test02BuildList10mStructs() { assert := s.NewAssert() in := make(chan types.Value, 16) out := types.NewStreamingList(s.Database, in) for i := 0; i < 1e7; i++ { in <- types.NewStruct("", types.StructData{ "number": types.Number(s.r.Int63()), }) } close(in) ds := s.Database.GetDataset("BuildList10mStructs") var err error ds, err = s.Database.CommitValue(ds, <-out) assert.NoError(err) s.Database = ds.Database() } func (s *perfSuite) Test03Read10mNumbers() { s.headList("BuildList10mNumbers").IterAll(func(v types.Value, index uint64) {}) } func (s *perfSuite) Test04Read10mStructs() { s.headList("BuildList10mStructs").IterAll(func(v types.Value, index uint64) {}) } func (s *perfSuite) Test05Concat10mValues2kTimes() { assert := s.NewAssert() last := func(v types.List) types.Value { return v.Get(v.Len() - 1) } l1 := s.headList("BuildList10mNumbers") l2 := s.headList("BuildList10mStructs") l1Len, l2Len := l1.Len(), l2.Len() l1Last, l2Last := last(l1), last(l2) l3 := types.NewList(s.Database) for i := uint64(0); i < 1e3; i++ { // 1k iterations * 2 concat ops = 2k times // Include some basic sanity checks. l3 = l3.Concat(l1) assert.True(l1Last.Equals(last(l3))) assert.Equal(i*(l1Len+l2Len)+l1Len, l3.Len()) l3 = l3.Concat(l2) assert.True(l2Last.Equals(last(l3))) assert.Equal((i+1)*(l1Len+l2Len), l3.Len()) } ds := s.Database.GetDataset("Concat10mValues2kTimes") var err error ds, err = s.Database.CommitValue(ds, l3) assert.NoError(err) s.Database = ds.Database() } func (s *perfSuite) TestBuild500megBlobFromFilesP1() { s.testBuild500megBlob(1) } func (s *perfSuite) TestBuild500megBlobFromFilesP2() { s.testBuild500megBlob(2) } func (s *perfSuite) TestBuild500megBlobFromFilesP8() { s.testBuild500megBlob(8) } func (s *perfSuite) TestBuild500megBlobFromFilesP64() { // Note: can't have too many files open. s.testBuild500megBlob(64) } func (s *perfSuite) testBuild500megBlob(p int) { assert := s.NewAssert() size := int(5e8) readers := make([]io.Reader, p) defer func() { for _, r := range readers { f := r.(*os.File) err := f.Close() assert.NoError(err) err = os.Remove(f.Name()) assert.NoError(err) } }() s.Pause(func() { for i := range readers { f, err := ioutil.TempFile("", "testBuildBlob") assert.NoError(err) _, err = f.Write(s.randomBytes(int64(i), size/p)) assert.NoError(err) err = f.Close() assert.NoError(err) f, err = os.Open(f.Name()) assert.NoError(err) readers[i] = f } }) b := types.NewBlob(s.Database, readers...) assert.Equal(uint64(size), b.Len()) } func (s *perfSuite) randomBytes(seed int64, size int) []byte { r := rand.New(rand.NewSource(seed)) randBytes := make([]byte, size) _, err := r.Read(randBytes) assert.NoError(s.T, err) return randBytes } func (s *perfSuite) headList(dsName string) types.List { ds := s.Database.GetDataset(dsName) return ds.HeadValue().(types.List) } func TestPerf(t *testing.T) { suite.Run("types", t, &perfSuite{}) } ================================================ FILE: go/types/primitives_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestPrimitives(t *testing.T) { data := []Value{ Bool(true), Bool(false), Number(0), Number(-1), Number(-0.1), Number(0.1), } for i := range data { for j := range data { if i == j { assert.True(t, data[i].Equals(data[j]), "Expected value to equal self at index %d", i) } else { assert.False(t, data[i].Equals(data[j]), "Expected values at indices %d and %d to not equal", i, j) } } } } func TestPrimitivesType(t *testing.T) { data := []struct { v Value k NomsKind }{ {Bool(false), BoolKind}, {Number(0), NumberKind}, } for _, d := range data { assert.True(t, TypeOf(d.v).Equals(MakePrimitiveType(d.k))) } } ================================================ FILE: go/types/ref.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "github.com/attic-labs/noms/go/hash" ) type Ref struct { valueImpl } type refPart uint32 const ( refPartKind refPart = iota refPartTargetHash refPartTargetType refPartHeight refPartEnd ) func NewRef(v Value) Ref { return constructRef(v.Hash(), TypeOf(v), maxChunkHeight(v)+1) } // ToRefOfValue returns a new Ref that points to the same target as |r|, but // with the type 'Ref'. func ToRefOfValue(r Ref) Ref { return constructRef(r.TargetHash(), ValueType, r.Height()) } func constructRef(targetHash hash.Hash, targetType *Type, height uint64) Ref { w := newBinaryNomsWriter() offsets := make([]uint32, refPartEnd) offsets[refPartKind] = w.offset RefKind.writeTo(&w) offsets[refPartTargetHash] = w.offset w.writeHash(targetHash) offsets[refPartTargetType] = w.offset targetType.writeToAsType(&w, map[string]*Type{}) offsets[refPartHeight] = w.offset w.writeCount(height) return Ref{valueImpl{nil, w.data(), offsets}} } func writeRefPartsTo(w nomsWriter, targetHash hash.Hash, targetType *Type, height uint64) { RefKind.writeTo(w) w.writeHash(targetHash) targetType.writeToAsType(w, map[string]*Type{}) w.writeCount(height) } // readRef reads the data provided by a reader and moves the reader forward. func readRef(dec *typedBinaryNomsReader) Ref { start := dec.pos() offsets := skipRef(dec) end := dec.pos() return Ref{valueImpl{nil, dec.byteSlice(start, end), offsets}} } // skipRef moves the reader forward, past the data representing the Ref, and returns the offsets of the component parts. func skipRef(dec *typedBinaryNomsReader) []uint32 { offsets := make([]uint32, refPartEnd) offsets[refPartKind] = dec.pos() dec.skipKind() offsets[refPartTargetHash] = dec.pos() dec.skipHash() // targetHash offsets[refPartTargetType] = dec.pos() dec.skipType() // targetType offsets[refPartHeight] = dec.pos() dec.skipCount() // height return offsets } func maxChunkHeight(v Value) (max uint64) { v.WalkRefs(func(r Ref) { if height := r.Height(); height > max { max = height } }) return } func (r Ref) offsetAtPart(part refPart) uint32 { return r.offsets[part] - r.offsets[refPartKind] } func (r Ref) decoderAtPart(part refPart) valueDecoder { offset := r.offsetAtPart(part) return newValueDecoder(r.buff[offset:], nil) } func (r Ref) TargetHash() hash.Hash { dec := r.decoderAtPart(refPartTargetHash) return dec.readHash() } func (r Ref) Height() uint64 { dec := r.decoderAtPart(refPartHeight) return dec.readCount() } func (r Ref) TargetValue(vr ValueReader) Value { return vr.ReadValue(r.TargetHash()) } func (r Ref) TargetType() *Type { dec := r.decoderAtPart(refPartTargetType) return dec.readType() } // Value interface func (r Ref) Value() Value { return r } func (r Ref) WalkValues(cb ValueCallback) { } func (r Ref) typeOf() *Type { return makeCompoundType(RefKind, r.TargetType()) } func (r Ref) isSameTargetType(other Ref) bool { targetTypeBytes := r.buff[r.offsetAtPart(refPartTargetType):r.offsetAtPart(refPartHeight)] otherTargetTypeBytes := other.buff[other.offsetAtPart(refPartTargetType):other.offsetAtPart(refPartHeight)] return bytes.Equal(targetTypeBytes, otherTargetTypeBytes) } ================================================ FILE: go/types/ref_heap.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "github.com/attic-labs/noms/go/hash" ) // RefByHeight implements sort.Interface to order by increasing HeightOrder(). It uses increasing order because this causes repeated pushes and pops of the 'tallest' Refs to re-use memory, avoiding reallocations. // We might consider making this a firmer abstraction boundary as a part of BUG 2182 type RefByHeight []Ref func (h RefByHeight) Len() int { return len(h) } func (h RefByHeight) Less(i, j int) bool { return !HeightOrder(h[i], h[j]) } func (h RefByHeight) Swap(i, j int) { h[i], h[j] = h[j], h[i] } func (h *RefByHeight) PushBack(r Ref) { *h = append(*h, r) } func (h *RefByHeight) PopBack() Ref { old := *h n := len(old) x := old[n-1] *h = old[0 : n-1] return x } // DropIndices takes a slice of integer indices into h and splices out the Refs at those indices. func (h *RefByHeight) DropIndices(indices []int) { sort.Ints(indices) old := *h numIdx := len(indices) for i, j := 0, 0; i < old.Len(); i++ { if len(indices) > 0 && i == indices[0] { indices = indices[1:] continue } if i != j { old[j] = old[i] } j++ } *h = old[:old.Len()-numIdx] } func (h *RefByHeight) Unique() { seen := hash.HashSet{} result := make(RefByHeight, 0, cap(*h)) for _, r := range *h { target := r.TargetHash() if !seen.Has(target) { result = append(result, r) } seen.Insert(target) } *h = result } // PopRefsOfHeight pops off and returns all refs r in h for which r.Height() == height. func (h *RefByHeight) PopRefsOfHeight(height uint64) (refs RefSlice) { for h.MaxHeight() == height { r := h.PopBack() refs = append(refs, r) } return } // MaxHeight returns the height of the 'tallest' Ref in h. func (h RefByHeight) MaxHeight() uint64 { if h.Empty() { return 0 } return h.PeekEnd().Height() } func (h RefByHeight) Empty() bool { return h.Len() == 0 } // PeekEnd returns, but does not Pop the tallest Ref in h. func (h RefByHeight) PeekEnd() (head Ref) { return h.PeekAt(h.Len() - 1) } // PeekAt returns, but does not remove, the Ref at h[idx]. If the index is out of range, returns the empty Ref. func (h RefByHeight) PeekAt(idx int) (peek Ref) { if idx >= 0 && idx < h.Len() { peek = h[idx] } return } // HeightOrder returns true if a is 'higher than' b, generally if its ref-height is greater. If the two are of the same height, fall back to sorting by TargetHash. func HeightOrder(a, b Ref) bool { if a.Height() == b.Height() { return a.TargetHash().Less(b.TargetHash()) } // > because we want the larger heights to be at the start of the queue. return a.Height() > b.Height() } // RefSlice implements sort.Interface to order by target ref. type RefSlice []Ref func (s RefSlice) Len() int { return len(s) } func (s RefSlice) Less(i, j int) bool { return s[i].TargetHash().Less(s[j].TargetHash()) } func (s RefSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } ================================================ FILE: go/types/ref_heap_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "testing" "github.com/stretchr/testify/assert" ) func TestRefByHeight(t *testing.T) { unique := 0 newRefWithHeight := func(height uint64) Ref { v := Number(unique) unique++ return constructRef(v.Hash(), NumberType, height) } assert := assert.New(t) h := RefByHeight{} r1 := newRefWithHeight(1) r2 := newRefWithHeight(2) r3 := newRefWithHeight(3) r4 := newRefWithHeight(2) h.PushBack(r1) assert.Equal(r1, h.PeekEnd()) assert.Equal(1, len(h)) h.PushBack(r3) sort.Sort(&h) assert.Equal(r3, h.PeekEnd()) assert.Equal(2, len(h)) h.PushBack(r2) sort.Sort(&h) assert.Equal(r3, h.PeekEnd()) assert.Equal(3, len(h)) h.PushBack(r4) sort.Sort(&h) assert.Equal(r3, h.PeekEnd()) assert.Equal(4, len(h)) expectedSecond, expectedThird := func() (Ref, Ref) { if r2.TargetHash().Less(r4.TargetHash()) { return r2, r4 } return r4, r2 }() assert.Equal(r3, h.PopBack()) assert.Equal(expectedSecond, h.PeekEnd()) assert.Equal(3, len(h)) assert.Equal(expectedSecond, h.PopBack()) assert.Equal(expectedThird, h.PeekEnd()) assert.Equal(2, len(h)) assert.Equal(expectedThird, h.PopBack()) assert.Equal(r1, h.PeekEnd()) assert.Equal(1, len(h)) assert.Equal(r1, h.PopBack()) assert.Equal(0, len(h)) } func TestDropIndices(t *testing.T) { h := &RefByHeight{} for i := 0; i < 10; i++ { h.PushBack(NewRef(Number(i))) } sort.Sort(h) toDrop := []int{2, 4, 7} expected := RefSlice{h.PeekAt(2), h.PeekAt(4), h.PeekAt(7)} h.DropIndices(toDrop) assert.Len(t, *h, 7) for i, dropped := range expected { assert.NotContains(t, *h, dropped, "Should not contain %d", toDrop[i]) } } func TestPopRefsOfHeight(t *testing.T) { h := &RefByHeight{} for i, n := range []int{6, 3, 6, 6, 2} { r := constructRef(Number(i).Hash(), NumberType, uint64(n)) h.PushBack(r) } sort.Sort(h) expected := RefSlice{h.PeekAt(4), h.PeekAt(3), h.PeekAt(2)} refs := h.PopRefsOfHeight(h.MaxHeight()) assert.Len(t, *h, 2) assert.Len(t, refs, 3) for _, popped := range expected { assert.NotContains(t, *h, popped, "Should not contain ref of height 6") } } ================================================ FILE: go/types/ref_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestRefInList(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() l := NewList(vs) r := NewRef(l) l = l.Edit().Append(r).List() r2 := l.Get(0) assert.True(r.Equals(r2)) } func TestRefInSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) r := NewRef(s) s = s.Edit().Insert(r).Set() r2 := s.First() assert.True(r.Equals(r2)) } func TestRefInMap(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() m := NewMap(vs) r := NewRef(m) m = m.Edit().Set(Number(0), r).Set(r, Number(1)).Map() r2 := m.Get(Number(0)) assert.True(r.Equals(r2)) i := m.Get(r) assert.Equal(int32(1), int32(i.(Number))) } func TestRefChunks(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() l := NewList(vs) r := NewRef(l) assert.Len(getChunks(r), 1) assert.Equal(r, getChunks(r)[0]) } ================================================ FILE: go/types/rolling_value_hasher.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sync" "github.com/attic-labs/noms/go/sloppy" "github.com/kch42/buzhash" ) const ( defaultChunkPattern = uint32(1<<12 - 1) // Avg Chunk Size of 4k // The window size to use for computing the rolling hash. This is way more than necessary assuming random data (two bytes would be sufficient with a target chunk size of 4k). The benefit of a larger window is it allows for better distribution on input with lower entropy. At a target chunk size of 4k, any given byte changing has roughly a 1.5% chance of affecting an existing boundary, which seems like an acceptable trade-off. The choice of a prime number provides better distribution for repeating input. chunkWindow = uint32(67) maxChunkSize = 1 << 24 // TODO: Remove when https://github.com/attic-labs/noms/issues/3743 is fixed. ) // Only set by tests var ( chunkPattern = defaultChunkPattern chunkConfigMu = &sync.Mutex{} ) func chunkingConfig() (pattern, window uint32) { chunkConfigMu.Lock() defer chunkConfigMu.Unlock() return chunkPattern, chunkWindow } func smallTestChunks() { chunkConfigMu.Lock() defer chunkConfigMu.Unlock() chunkPattern = uint32(1<<8 - 1) // Avg Chunk Size of 256 bytes } func normalProductionChunks() { chunkConfigMu.Lock() defer chunkConfigMu.Unlock() chunkPattern = defaultChunkPattern } type rollingValueHasher struct { bw binaryNomsWriter bz *buzhash.BuzHash crossedBoundary bool pattern, window uint32 salt byte sl *sloppy.Sloppy } func hashValueBytes(item sequenceItem, rv *rollingValueHasher) { rv.HashValue(item.(Value)) } func hashValueByte(item sequenceItem, rv *rollingValueHasher) { rv.HashByte(item.(byte)) } func newRollingValueHasher(salt byte) *rollingValueHasher { pattern, window := chunkingConfig() w := newBinaryNomsWriter() rv := &rollingValueHasher{ bw: w, bz: buzhash.NewBuzHash(window), pattern: pattern, window: window, salt: salt, } rv.sl = sloppy.New(rv.HashByte) return rv } func (rv *rollingValueHasher) HashByte(b byte) bool { if !rv.crossedBoundary { rv.bz.HashByte(b ^ rv.salt) rv.crossedBoundary = (rv.bz.Sum32()&rv.pattern == rv.pattern) if rv.bw.offset > maxChunkSize { rv.crossedBoundary = true } } return rv.crossedBoundary } func (rv *rollingValueHasher) Reset() { rv.crossedBoundary = false rv.bz = buzhash.NewBuzHash(rv.window) rv.bw.reset() rv.sl.Reset() } func (rv *rollingValueHasher) HashValue(v Value) { v.writeTo(&rv.bw) rv.sl.Update(rv.bw.data()) } func (rv *rollingValueHasher) hashBytes(buff []byte) { rv.bw.writeRaw(buff) rv.sl.Update(rv.bw.data()) } ================================================ FILE: go/types/rungen.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types //go:generate go run gen/main.go ================================================ FILE: go/types/sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) type sequenceItem interface{} type compareFn func(x int, y int) bool type sequence interface { asValueImpl() valueImpl cumulativeNumberOfLeaves(idx int) uint64 Empty() bool Equals(other Value) bool getChildSequence(idx int) sequence getCompareFn(other sequence) compareFn getCompositeChildSequence(start uint64, length uint64) sequence getItem(idx int) sequenceItem Hash() hash.Hash isLeaf() bool Kind() NomsKind Len() uint64 Less(other Value) bool numLeaves() uint64 seqLen() int treeLevel() uint64 typeOf() *Type valueBytes() []byte valueReadWriter() ValueReadWriter valuesSlice(from, to uint64) []Value WalkRefs(cb RefCallback) writeTo(nomsWriter) } const ( sequencePartKind = 0 sequencePartLevel = 1 sequencePartCount = 2 sequencePartValues = 3 ) type sequenceImpl struct { valueImpl len uint64 } func newSequenceImpl(vrw ValueReadWriter, buff []byte, offsets []uint32, len uint64) sequenceImpl { return sequenceImpl{valueImpl{vrw, buff, offsets}, len} } func (seq sequenceImpl) decoderSkipToValues() (valueDecoder, uint64) { dec := seq.decoderAtPart(sequencePartCount) count := dec.readCount() return dec, count } func (seq sequenceImpl) decoderAtPart(part uint32) valueDecoder { offset := seq.offsets[part] - seq.offsets[sequencePartKind] return newValueDecoder(seq.buff[offset:], seq.vrw) } func (seq sequenceImpl) Empty() bool { return seq.Len() == 0 } func (seq sequenceImpl) Len() uint64 { return seq.len } func (seq sequenceImpl) seqLen() int { _, count := seq.decoderSkipToValues() return int(count) } func (seq sequenceImpl) getItemOffset(idx int) int { // kind, level, count, elements... // 0 1 2 3 n+1 d.PanicIfTrue(idx+sequencePartValues+1 > len(seq.offsets)) return int(seq.offsets[idx+sequencePartValues] - seq.offsets[sequencePartKind]) } func (seq sequenceImpl) decoderSkipToIndex(idx int) valueDecoder { offset := seq.getItemOffset(idx) return seq.decoderAtOffset(offset) } ================================================ FILE: go/types/sequence_chunker.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/attic-labs/noms/go/d" type hashValueBytesFn func(item sequenceItem, rv *rollingValueHasher) type sequenceChunker struct { cur *sequenceCursor level uint64 vrw ValueReadWriter parent *sequenceChunker current []sequenceItem makeChunk, parentMakeChunk makeChunkFn isLeaf bool hashValueBytes hashValueBytesFn rv *rollingValueHasher done bool unwrittenCol Collection } // makeChunkFn takes a sequence of items to chunk, and returns the result of chunking those items, a tuple of a reference to that chunk which can itself be chunked + its underlying value. type makeChunkFn func(level uint64, values []sequenceItem) (Collection, orderedKey, uint64) func newEmptySequenceChunker(vrw ValueReadWriter, makeChunk, parentMakeChunk makeChunkFn, hashValueBytes hashValueBytesFn) *sequenceChunker { return newSequenceChunker(nil, uint64(0), vrw, makeChunk, parentMakeChunk, hashValueBytes) } func newSequenceChunker(cur *sequenceCursor, level uint64, vrw ValueReadWriter, makeChunk, parentMakeChunk makeChunkFn, hashValueBytes hashValueBytesFn) *sequenceChunker { d.PanicIfFalse(makeChunk != nil) d.PanicIfFalse(parentMakeChunk != nil) d.PanicIfFalse(hashValueBytes != nil) d.PanicIfTrue(vrw == nil) // |cur| will be nil if this is a new sequence, implying this is a new tree, or the tree has grown in height relative to its original chunked form. sc := &sequenceChunker{ cur, level, vrw, nil, make([]sequenceItem, 0, 1<<10), makeChunk, parentMakeChunk, true, hashValueBytes, newRollingValueHasher(byte(level % 256)), false, nil, } if cur != nil { sc.resume() } return sc } func (sc *sequenceChunker) resume() { if sc.cur.parent != nil && sc.parent == nil { sc.createParent() } idx := sc.cur.idx // Walk backwards to the start of the existing chunk. for sc.cur.indexInChunk() > 0 && sc.cur.retreatMaybeAllowBeforeStart(false) { } for ; sc.cur.idx < idx; sc.cur.advance() { sc.Append(sc.cur.current()) } } // advanceTo advances the sequenceChunker to the next "spine" at which // modifications to the prolly-tree should take place func (sc *sequenceChunker) advanceTo(next *sequenceCursor) { // There are four basic situations which must be handled when advancing to a // new chunking position: // // Case (1): |sc.cur| and |next| are exactly aligned. In this case, there's // nothing to do. Just assign sc.cur = next. // // Case (2): |sc.cur| is "ahead" of |next|. This can only have resulted from // advancing of a lower level causing |sc.cur| to advance. In this // case, we advance |next| until the cursors are aligned and then // process as if Case (1): // // Case (3+4): |sc.cur| is "behind" |next|, we must consume elements in // |sc.cur| until either: // // Case (3): |sc.cur| aligns with |next|. In this case, we just assign // sc.cur = next. // Case (4): A boundary is encountered which is aligned with a boundary // in the previous state. This is the critical case, as is allows // us to skip over large parts of the tree. In this case, we align // parent chunkers then sc.resume() at |next| for sc.cur.compare(next) > 0 { next.advance() // Case (2) } // If neither loop above and below are entered, it is Case (1). If the loop // below is entered but Case (4) isn't reached, then it is Case (3). reachedNext := true for sc.cur.compare(next) < 0 { if sc.Append(sc.cur.current()) && sc.cur.atLastItem() { if sc.cur.parent != nil { if sc.cur.parent.compare(next.parent) < 0 { // Case (4): We stopped consuming items on this level before entering // the sequence referenced by |next| reachedNext = false } // Note: Logically, what is happening here is that we are consuming the // item at the current level. Logically, we'd call sc.cur.advance(), // but that would force loading of the next sequence, which we don't // need for any reason, so instead we advance the parent and take care // not to allow it to step outside the sequence. sc.cur.parent.advanceMaybeAllowPastEnd(false) // Invalidate this cursor, since it is now inconsistent with its parent sc.cur.parent = nil sc.cur.seq = nil } break } sc.cur.advance() } if sc.parent != nil && next.parent != nil { sc.parent.advanceTo(next.parent) } sc.cur = next if !reachedNext { sc.resume() // Case (4) } } func (sc *sequenceChunker) Append(item sequenceItem) bool { d.PanicIfTrue(item == nil) sc.current = append(sc.current, item) sc.hashValueBytes(item, sc.rv) if sc.rv.crossedBoundary { sc.handleChunkBoundary() return true } return false } func (sc *sequenceChunker) Skip() { sc.cur.advance() } func (sc *sequenceChunker) createParent() { d.PanicIfFalse(sc.parent == nil) var parent *sequenceCursor if sc.cur != nil && sc.cur.parent != nil { // Clone the parent cursor because otherwise calling cur.advance() will affect our parent - and vice versa - in surprising ways. Instead, Skip moves forward our parent's cursor if we advance across a boundary. parent = sc.cur.parent } sc.parent = newSequenceChunker(parent, sc.level+1, sc.vrw, sc.parentMakeChunk, sc.parentMakeChunk, metaHashValueBytes) sc.parent.isLeaf = false if sc.unwrittenCol != nil { // There is an unwritten collection, but this chunker now has a parent, so // write it. See createSequence(). sc.vrw.WriteValue(sc.unwrittenCol) sc.unwrittenCol = nil } } // createSequence creates a sequence from the current items in |sc.current|, // clears the current items, then returns the new sequence and a metaTuple that // points to it. // // If |write| is true then the sequence is eagerly written, or if false it's // manually constructed and stored in |sc.unwrittenCol| to possibly write later // in createParent(). This is to hopefully avoid unnecessarily writing the root // chunk (for example, the sequence may be stored inline). // // There is a catch: in the rare case that the root chunk is actually not the // canonical root of the sequence (see Done()), then we will have ended up // unnecessarily writing a chunk - the canonical root. However, this is a fair // tradeoff for simplicity of the chunking algorithm. func (sc *sequenceChunker) createSequence(write bool) (sequence, metaTuple) { col, key, numLeaves := sc.makeChunk(sc.level, sc.current) // |sc.makeChunk| copies |sc.current| so it's safe to re-use the memory. sc.current = sc.current[:0] var ref Ref if write { ref = sc.vrw.WriteValue(col) } else { ref = NewRef(col) sc.unwrittenCol = col } mt := newMetaTuple(ref, key, numLeaves) return col.asSequence(), mt } func (sc *sequenceChunker) handleChunkBoundary() { d.PanicIfFalse(len(sc.current) > 0) sc.rv.Reset() if sc.parent == nil { sc.createParent() } _, mt := sc.createSequence(true) sc.parent.Append(mt) } // Returns true if this chunker or any of its parents have any pending items in their |current| slice. func (sc *sequenceChunker) anyPending() bool { if len(sc.current) > 0 { return true } if sc.parent != nil { return sc.parent.anyPending() } return false } // Returns the root sequence of the resulting tree. The logic here is subtle, but hopefully correct and understandable. See comments inline. func (sc *sequenceChunker) Done() sequence { d.PanicIfTrue(sc.done) sc.done = true if sc.cur != nil { sc.finalizeCursor() } // There is pending content above us, so we must push any remaining items from this level up and allow some parent to find the root of the resulting tree. if sc.parent != nil && sc.parent.anyPending() { if len(sc.current) > 0 { // If there are items in |current| at this point, they represent the final items of the sequence which occurred beyond the previous *explicit* chunk boundary. The end of input of a sequence is considered an *implicit* boundary. sc.handleChunkBoundary() } return sc.parent.Done() } // At this point, we know this chunker contains, in |current| every item at this level of the resulting tree. To see this, consider that there are two ways a chunker can enter items into its |current|: (1) as the result of resume() with the cursor on anything other than the first item in the sequence, and (2) as a result of a child chunker hitting an explicit chunk boundary during either Append() or finalize(). The only way there can be no items in some parent chunker's |current| is if this chunker began with cursor within its first existing chunk (and thus all parents resume()'d with a cursor on their first item) and continued through all sebsequent items without creating any explicit chunk boundaries (and thus never sent any items up to a parent as a result of chunking). Therefore, this chunker's current must contain all items within the current sequence. // This level must represent *a* root of the tree, but it is possibly non-canonical. There are three cases to consider: // (1) This is "leaf" chunker and thus produced tree of depth 1 which contains exactly one chunk (never hit a boundary), or (2) This in an internal node of the tree which contains multiple references to child nodes. In either case, this is the canonical root of the tree. if sc.isLeaf || len(sc.current) > 1 { seq, _ := sc.createSequence(false) return seq } // (3) This is an internal node of the tree which contains a single reference to a child node. This can occur if a non-leaf chunker happens to chunk on the first item (metaTuple) appended. In this case, this is the root of the tree, but it is *not* canonical and we must walk down until we find cases (1) or (2), above. d.PanicIfFalse(!sc.isLeaf && len(sc.current) == 1) mt := sc.current[0].(metaTuple) for { child := mt.getChildSequence(sc.vrw) if _, ok := child.(metaSequence); !ok || child.seqLen() > 1 { return child } mt = child.getItem(0).(metaTuple) } } // If we are mutating an existing sequence, appending subsequent items in the sequence until we reach a pre-existing chunk boundary or the end of the sequence. func (sc *sequenceChunker) finalizeCursor() { for ; sc.cur.valid(); sc.cur.advance() { if sc.Append(sc.cur.current()) && sc.cur.atLastItem() { break // boundary occurred at same place in old & new sequence } } if sc.cur.parent != nil { sc.cur.parent.advance() // Invalidate this cursor, since it is now inconsistent with its parent sc.cur.parent = nil sc.cur.seq = nil } } ================================================ FILE: go/types/sequence_concat.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types type newSequenceChunkerFn func(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker func concat(fst, snd sequence, newSequenceChunker newSequenceChunkerFn) sequence { if fst.numLeaves() == 0 { return snd } if snd.numLeaves() == 0 { return fst } // concat works by tricking the sequenceChunker into resuming chunking at a // cursor to the end of fst, then finalizing chunking to the start of snd - by // swapping fst cursors for snd cursors in the middle of chunking. vrw := fst.valueReadWriter() chunker := newSequenceChunker(newCursorAtIndex(fst, fst.numLeaves()), vrw) for cur, ch := newCursorAtIndex(snd, 0), chunker; ch != nil; ch = ch.parent { // Note that if snd is shallower than fst, then higher chunkers will have // their cursors set to nil. This has the effect of "dropping" the final // item in each of those sequences. ch.cur = cur if cur != nil { cur = cur.parent if cur != nil && ch.parent == nil { // If fst is shallower than snd, its cur will have a parent whereas the // chunker to snd won't. In that case, create a parent for fst. ch.createParent() } } } return chunker.Done() } ================================================ FILE: go/types/sequence_cursor.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/attic-labs/noms/go/d" import "fmt" // sequenceCursor explores a tree of sequence items. type sequenceCursor struct { parent *sequenceCursor seq sequence idx int seqLen int } // newSequenceCursor creates a cursor on seq positioned at idx. // If idx < 0, count backward from the end of seq. func newSequenceCursor(parent *sequenceCursor, seq sequence, idx int) *sequenceCursor { d.PanicIfTrue(seq == nil) seqLen := seq.seqLen() if idx < 0 { idx += seqLen d.PanicIfFalse(idx >= 0) } return &sequenceCursor{parent, seq, idx, seqLen} } func (cur *sequenceCursor) length() int { return cur.seqLen } func (cur *sequenceCursor) getItem(idx int) sequenceItem { return cur.seq.getItem(idx) } // sync loads the sequence that the cursor index points to. // It's called whenever the cursor advances/retreats to a different chunk. func (cur *sequenceCursor) sync() { d.PanicIfFalse(cur.parent != nil) cur.seq = cur.parent.getChildSequence() cur.seqLen = cur.seq.seqLen() } // getChildSequence retrieves the child at the current cursor position. func (cur *sequenceCursor) getChildSequence() sequence { return cur.seq.getChildSequence(cur.idx) } // current returns the value at the current cursor position func (cur *sequenceCursor) current() sequenceItem { d.PanicIfFalse(cur.valid()) return cur.getItem(cur.idx) } func (cur *sequenceCursor) valid() bool { return cur.idx >= 0 && cur.idx < cur.length() } func (cur *sequenceCursor) indexInChunk() int { return cur.idx } func (cur *sequenceCursor) atLastItem() bool { return cur.idx == cur.length()-1 } func (cur *sequenceCursor) advance() bool { return cur.advanceMaybeAllowPastEnd(true) } func (cur *sequenceCursor) advanceMaybeAllowPastEnd(allowPastEnd bool) bool { if cur.idx < cur.length()-1 { cur.idx++ return true } if cur.idx == cur.length() { return false } if cur.parent != nil && cur.parent.advanceMaybeAllowPastEnd(false) { // at end of current leaf chunk and there are more cur.sync() cur.idx = 0 return true } if allowPastEnd { cur.idx++ } return false } func (cur *sequenceCursor) retreat() bool { return cur.retreatMaybeAllowBeforeStart(true) } func (cur *sequenceCursor) retreatMaybeAllowBeforeStart(allowBeforeStart bool) bool { if cur.idx > 0 { cur.idx-- return true } if cur.idx == -1 { return false } d.PanicIfFalse(0 == cur.idx) if cur.parent != nil && cur.parent.retreatMaybeAllowBeforeStart(false) { cur.sync() cur.idx = cur.length() - 1 return true } if allowBeforeStart { cur.idx-- } return false } // clone creates a copy of the cursor func (cur *sequenceCursor) clone() *sequenceCursor { var parent *sequenceCursor if cur.parent != nil { parent = cur.parent.clone() } cl := newSequenceCursor(parent, cur.seq, cur.idx) return cl } type cursorIterCallback func(item interface{}) bool func (cur *sequenceCursor) String() string { if cur.parent == nil { return fmt.Sprintf("%s (%d): %d", newMap(cur.seq.(orderedSequence)).Hash().String(), cur.seq.seqLen(), cur.idx) } return fmt.Sprintf("%s (%d): %d -- %s", newMap(cur.seq.(orderedSequence)).Hash().String(), cur.seq.seqLen(), cur.idx, cur.parent.String()) } func (cur *sequenceCursor) compare(other *sequenceCursor) int { if cur.parent != nil { d.PanicIfFalse(other.parent != nil) p := cur.parent.compare(other.parent) if p != 0 { return p } } // TODO: It'd be nice here to assert that the two sequences are the same // but there isn't a good way to that at this point because the containing // collection of the sequence isn't available. d.PanicIfFalse(cur.seq.seqLen() == other.seq.seqLen()) return cur.idx - other.idx } // iter iterates forward from the current position func (cur *sequenceCursor) iter(cb cursorIterCallback) { for cur.valid() && !cb(cur.getItem(cur.idx)) { cur.advance() } } // newCursorAtIndex creates a new cursor over seq positioned at idx. // // Implemented by searching down the tree to the leaf sequence containing idx. Each // sequence cursor includes a back pointer to its parent so that it can follow the path // to the next leaf chunk when the cursor exhausts the entries in the current chunk. func newCursorAtIndex(seq sequence, idx uint64) *sequenceCursor { var cur *sequenceCursor for { cur = newSequenceCursor(cur, seq, 0) idx = idx - advanceCursorToOffset(cur, idx) cs := cur.getChildSequence() if cs == nil { break } seq = cs } d.PanicIfTrue(cur == nil) return cur } ================================================ FILE: go/types/sequence_cursor_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) type testSequence struct { items []interface{} } // sequence interface func (ts testSequence) getItem(idx int) sequenceItem { return ts.items[idx] } func (ts testSequence) seqLen() int { return len(ts.items) } func (ts testSequence) numLeaves() uint64 { return uint64(len(ts.items)) } func (ts testSequence) cumulativeNumberOfLeaves(idx int) uint64 { panic("not reached") } func (ts testSequence) getCompositeChildSequence(start uint64, length uint64) sequence { panic("not reached") } func (ts testSequence) treeLevel() uint64 { panic("not reached") } func (ts testSequence) Kind() NomsKind { panic("not reached") } func (ts testSequence) getCompareFn(other sequence) compareFn { obl := other.(testSequence) return func(idx, otherIdx int) bool { return ts.items[idx] == obl.items[otherIdx] } } func (ts testSequence) valueReadWriter() ValueReadWriter { panic("not reached") } func (ts testSequence) writeTo(nomsWriter) { panic("not reached") } func (ts testSequence) getChildSequence(idx int) sequence { child := ts.items[idx] return testSequence{child.([]interface{})} } func (ts testSequence) isLeaf() bool { panic("not reached") } func (ts testSequence) Equals(other Value) bool { panic("not reached") } func (ts testSequence) valueBytes() []byte { panic("not reached") } func (ts testSequence) valuesSlice(from, to uint64) []Value { panic("not reached") } func (ts testSequence) Less(other Value) bool { panic("not reached") } func (ts testSequence) Hash() hash.Hash { panic("not reached") } func (ts testSequence) WalkValues(cb ValueCallback) { panic("not reached") } func (ts testSequence) WalkRefs(cb RefCallback) { panic("not reached") } func (ts testSequence) typeOf() *Type { panic("not reached") } func (ts testSequence) Len() uint64 { panic("not reached") } func (ts testSequence) Empty() bool { panic("not reached") } func (ts testSequence) asValueImpl() valueImpl { panic("not reached") } func newTestSequenceCursor(items []interface{}) *sequenceCursor { parent := newSequenceCursor(nil, testSequence{items}, 0) items = items[0].([]interface{}) return newSequenceCursor(parent, testSequence{items}, 0) } // TODO: Convert all tests to use newTestSequenceCursor3. func newTestSequenceCursor3(items []interface{}) *sequenceCursor { top := newSequenceCursor(nil, testSequence{items}, 0) items = items[0].([]interface{}) middle := newSequenceCursor(top, testSequence{items}, 0) items = items[0].([]interface{}) return newSequenceCursor(middle, testSequence{items}, 0) } func TestTestCursor(t *testing.T) { assert := assert.New(t) var cur *sequenceCursor reset := func() { cur = newTestSequenceCursor([]interface{}{[]interface{}{100, 101}, []interface{}{102}}) } expect := func(expectIdx, expectParentIdx int, expectOk bool, expectVal sequenceItem) { assert.Equal(expectIdx, cur.indexInChunk()) assert.Equal(expectParentIdx, cur.parent.indexInChunk()) assert.Equal(expectOk, cur.valid()) if cur.valid() { assert.Equal(expectVal, cur.current()) } } // Test retreating past the start. reset() expect(0, 0, true, sequenceItem(100)) assert.False(cur.retreat()) expect(-1, 0, false, nil) assert.False(cur.retreat()) expect(-1, 0, false, nil) // Test retreating past the start, then advanding past the end. reset() assert.False(cur.retreat()) assert.True(cur.advance()) expect(0, 0, true, sequenceItem(100)) assert.True(cur.advance()) expect(1, 0, true, sequenceItem(101)) assert.True(cur.advance()) expect(0, 1, true, sequenceItem(102)) assert.False(cur.advance()) expect(1, 1, false, nil) assert.False(cur.advance()) expect(1, 1, false, nil) // Test advancing past the end. reset() assert.True(cur.advance()) expect(1, 0, true, sequenceItem(101)) assert.True(cur.retreat()) expect(0, 0, true, sequenceItem(100)) assert.False(cur.retreat()) expect(-1, 0, false, nil) assert.False(cur.retreat()) expect(-1, 0, false, nil) // Test advancing past the end, then retreating past the start. reset() assert.True(cur.advance()) assert.True(cur.advance()) expect(0, 1, true, sequenceItem(102)) assert.False(cur.advance()) expect(1, 1, false, nil) assert.False(cur.advance()) expect(1, 1, false, nil) assert.True(cur.retreat()) expect(0, 1, true, sequenceItem(102)) assert.True(cur.retreat()) expect(1, 0, true, sequenceItem(101)) assert.True(cur.retreat()) expect(0, 0, true, sequenceItem(100)) assert.False(cur.retreat()) expect(-1, 0, false, nil) assert.False(cur.retreat()) expect(-1, 0, false, nil) } ================================================ FILE: go/types/set.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "fmt" "sort" "github.com/attic-labs/noms/go/d" ) type Set struct { orderedSequence } func newSet(seq orderedSequence) Set { return Set{seq} } func NewSet(vrw ValueReadWriter, v ...Value) Set { data := buildSetData(v) ch := newEmptySetSequenceChunker(vrw) for _, v := range data { ch.Append(v) } return newSet(ch.Done().(orderedSequence)) } // NewStreamingSet takes an input channel of values and returns a output // channel that will produce a finished Set. Values that are sent to the input // channel must be in Noms sortorder, adding values to the input channel // out of order will result in a panic. Once the input channel is closed // by the caller, a finished Set will be sent to the output channel. See // graph_builder.go for building collections with values that are not in order. func NewStreamingSet(vrw ValueReadWriter, vChan <-chan Value) <-chan Set { return newStreamingSet(vrw, vChan, func(vrw ValueReadWriter, vChan <-chan Value, outChan chan<- Set) { go readSetInput(vrw, vChan, outChan) }) } type streamingSetReadFunc func(vrw ValueReadWriter, vChan <-chan Value, outChan chan<- Set) func newStreamingSet(vrw ValueReadWriter, vChan <-chan Value, readFunc streamingSetReadFunc) <-chan Set { d.PanicIfTrue(vrw == nil) outChan := make(chan Set, 1) readFunc(vrw, vChan, outChan) return outChan } func readSetInput(vrw ValueReadWriter, vChan <-chan Value, outChan chan<- Set) { defer close(outChan) ch := newEmptySetSequenceChunker(vrw) var lastV Value for v := range vChan { d.PanicIfTrue(v == nil) if lastV != nil { d.PanicIfFalse(lastV.Less(v)) } lastV = v ch.Append(v) } outChan <- newSet(ch.Done().(orderedSequence)) } // Diff computes the diff from |last| to |m| using the top-down algorithm, // which completes as fast as possible while taking longer to return early // results than left-to-right. func (s Set) Diff(last Set, changes chan<- ValueChanged, closeChan <-chan struct{}) { if s.Equals(last) { return } orderedSequenceDiffTopDown(last.orderedSequence, s.orderedSequence, changes, closeChan) } // DiffHybrid computes the diff from |last| to |s| using a hybrid algorithm // which balances returning results early vs completing quickly, if possible. func (s Set) DiffHybrid(last Set, changes chan<- ValueChanged, closeChan <-chan struct{}) { if s.Equals(last) { return } orderedSequenceDiffBest(last.orderedSequence, s.orderedSequence, changes, closeChan) } // DiffLeftRight computes the diff from |last| to |s| using a left-to-right // streaming approach, optimised for returning results early, but not // completing quickly. func (s Set) DiffLeftRight(last Set, changes chan<- ValueChanged, closeChan <-chan struct{}) { if s.Equals(last) { return } orderedSequenceDiffLeftRight(last.orderedSequence, s.orderedSequence, changes, closeChan) } func (s Set) asSequence() sequence { return s.orderedSequence } // Value interface func (s Set) Value() Value { return s } func (s Set) WalkValues(cb ValueCallback) { iterAll(s, func(v Value, idx uint64) { cb(v) }) } func (s Set) First() Value { cur := newCursorAt(s.orderedSequence, emptyKey, false, false) if !cur.valid() { return nil } return cur.current().(Value) } func (s Set) At(idx uint64) Value { if idx >= s.Len() { panic(fmt.Errorf("Out of bounds: %d >= %d", idx, s.Len())) } cur := newCursorAtIndex(s.orderedSequence, idx) return cur.current().(Value) } func (s Set) Has(v Value) bool { cur := newCursorAtValue(s.orderedSequence, v, false, false) return cur.valid() && cur.current().(Value).Equals(v) } type setIterCallback func(v Value) bool func (s Set) Iter(cb setIterCallback) { cur := newCursorAt(s.orderedSequence, emptyKey, false, false) cur.iter(func(v interface{}) bool { return cb(v.(Value)) }) } type setIterAllCallback func(v Value) func (s Set) IterAll(cb setIterAllCallback) { iterAll(s, func(v Value, idx uint64) { cb(v) }) } func (s Set) Iterator() SetIterator { return s.IteratorAt(0) } func (s Set) IteratorAt(idx uint64) SetIterator { return &setIterator{ cursor: newCursorAtIndex(s.orderedSequence, idx), s: s, } } func (s Set) IteratorFrom(val Value) SetIterator { return &setIterator{ cursor: newCursorAtValue(s.orderedSequence, val, false, false), s: s, } } func (s Set) Edit() *SetEditor { return NewSetEditor(s) } func buildSetData(values ValueSlice) ValueSlice { if len(values) == 0 { return ValueSlice{} } uniqueSorted := make(ValueSlice, 0, len(values)) sort.Stable(values) last := values[0] for i := 1; i < len(values); i++ { v := values[i] if !v.Equals(last) { uniqueSorted = append(uniqueSorted, last) } last = v } return append(uniqueSorted, last) } func makeSetLeafChunkFn(vrw ValueReadWriter) makeChunkFn { return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { d.PanicIfFalse(level == 0) setData := make([]Value, len(items), len(items)) var lastValue Value for i, item := range items { v := item.(Value) d.PanicIfFalse(lastValue == nil || lastValue.Less(v)) lastValue = v setData[i] = v } set := newSet(newSetLeafSequence(vrw, setData...)) var key orderedKey if len(setData) > 0 { key = newOrderedKey(setData[len(setData)-1]) } return set, key, uint64(len(items)) } } func newEmptySetSequenceChunker(vrw ValueReadWriter) *sequenceChunker { return newEmptySequenceChunker(vrw, makeSetLeafChunkFn(vrw), newOrderedMetaSequenceChunkFn(SetKind, vrw), hashValueBytes) } ================================================ FILE: go/types/set_editor.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" "github.com/attic-labs/noms/go/d" ) // SetEditor allows for efficient editing of Set-typed prolly trees. Edits // are buffered to memory and can be applied via Build(), which returns a new // Set. Prior to Build(), Get() & Has() will return the value that the resulting // Set would return if it were built immediately prior to the respective call. // Note: The implementation biases performance towards a usage which applies // edits in key-order. type SetEditor struct { s Set edits setEditSlice // edits may contain duplicate values, in which case, the last edit of a given key is used normalized bool } func NewSetEditor(s Set) *SetEditor { return &SetEditor{s, setEditSlice{}, true} } func (se *SetEditor) Kind() NomsKind { return SetKind } func (se *SetEditor) Value() Value { return se.Set() } func (se *SetEditor) Set() Set { if len(se.edits) == 0 { return se.s // no edits } seq := se.s.orderedSequence vrw := seq.valueReadWriter() se.normalize() cursChan := make(chan chan *sequenceCursor) editChan := make(chan setEdit) go func() { for i, edit := range se.edits { if i+1 < len(se.edits) && se.edits[i+1].value.Equals(edit.value) { continue // next edit supercedes this one } edit := edit // Load cursor. TODO: Use ReadMany cc := make(chan *sequenceCursor, 1) cursChan <- cc go func() { cc <- newCursorAtValue(seq, edit.value, true, false) }() editChan <- edit } close(cursChan) close(editChan) }() var ch *sequenceChunker for cc := range cursChan { cur := <-cc edit := <-editChan exists := false if cur.idx < cur.seq.seqLen() { v := cur.current().(Value) if v.Equals(edit.value) { exists = true } } if exists && edit.insert { continue // already present } if !exists && !edit.insert { continue // already non-present } if ch == nil { ch = newSequenceChunker(cur, 0, vrw, makeSetLeafChunkFn(vrw), newOrderedMetaSequenceChunkFn(SetKind, vrw), hashValueBytes) } else { ch.advanceTo(cur) } if edit.insert { ch.Append(edit.value) } else { ch.Skip() } } if ch == nil { return se.s // no edits required application } return newSet(ch.Done().(orderedSequence)) } func (se *SetEditor) Insert(vs ...Value) *SetEditor { sort.Stable(ValueSlice(vs)) for _, v := range vs { d.PanicIfTrue(v == nil) se.edit(v, true) } return se } func (se *SetEditor) Remove(vs ...Value) *SetEditor { sort.Stable(ValueSlice(vs)) for _, v := range vs { d.PanicIfTrue(v == nil) se.edit(v, false) } return se } func (se *SetEditor) Has(v Value) bool { if idx, found := se.findEdit(v); found { return se.edits[idx].insert } return se.s.Has(v) } func (se *SetEditor) edit(v Value, insert bool) { if len(se.edits) == 0 { se.edits = append(se.edits, setEdit{v, insert}) return } final := se.edits[len(se.edits)-1] if final.value.Equals(v) { se.edits[len(se.edits)-1] = setEdit{v, insert} return // update the last edit } se.edits = append(se.edits, setEdit{v, insert}) if se.normalized && final.value.Less(v) { // fast-path: edits take place in key-order return } // de-normalize se.normalized = false } // Find the edit position of the last edit for a given key func (se *SetEditor) findEdit(v Value) (idx int, found bool) { se.normalize() idx = sort.Search(len(se.edits), func(i int) bool { return !se.edits[i].value.Less(v) }) if idx == len(se.edits) { return } if !se.edits[idx].value.Equals(v) { return } // advance to final edit position where kv.key == k for idx < len(se.edits) && se.edits[idx].value.Equals(v) { idx++ } idx-- found = true return } func (se *SetEditor) normalize() { if se.normalized { return } sort.Stable(se.edits) // TODO: GC duplicate keys over some threshold of collectable memory? se.normalized = true } type setEdit struct { value Value insert bool } type setEditSlice []setEdit func (ses setEditSlice) Len() int { return len(ses) } func (ses setEditSlice) Swap(i, j int) { ses[i], ses[j] = ses[j], ses[i] } func (ses setEditSlice) Less(i, j int) bool { return ses[i].value.Less(ses[j].value) } ================================================ FILE: go/types/set_iterator.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" ) // SetIterator defines methods that can be used to efficiently iterate through a set in 'Noms-defined' // sorted order. type SetIterator interface { // Next returns subsequent values from a set. It returns nil, when no objects remain. Next() Value // SkipTo(v) advances to and returns the next value in the iterator >= v. // Note: if the iterator has already returned the value being skipped to, it will return the next // value (just as if Next() was called). For example, given the following set: // s = Set{ 0, 3, 6, 9, 12, 15, 18 } // An iterator on the set would return: // i := s.Iterator() // i.Next() return 0 // i.SkipTo(4) -- returns 6 // i.skipTo(3) -- returns 9 (this is the next value in the iterator >= 3) // i.skipTo(12) -- returns 12 // i.skipTo(12) -- return 15 (this is the next value in the iterator >= 12) // i.skipTo(20) -- returns nil // If there are no values left in the iterator that are >= v, // the iterator will skip to the end of the sequence and return nil. SkipTo(v Value) Value } type setIterator struct { s Set cursor *sequenceCursor currentValue Value } func (si *setIterator) Next() Value { if si.cursor.valid() { si.currentValue = si.cursor.current().(Value) si.cursor.advance() } else { si.currentValue = nil } return si.currentValue } func (si *setIterator) SkipTo(v Value) Value { d.PanicIfTrue(v == nil) if si.cursor.valid() { if compareValue(v, si.currentValue) <= 0 { return si.Next() } si.cursor = newCursorAtValue(si.s.orderedSequence, v, true, false) if si.cursor.valid() { si.currentValue = si.cursor.current().(Value) si.cursor.advance() } else { si.currentValue = nil } } else { si.currentValue = nil } return si.currentValue } // iterState contains iterator and it's current value type iterState struct { i SetIterator v Value } func (st *iterState) Next() Value { if st.v == nil { return nil } v := st.v st.v = st.i.Next() return v } func (st *iterState) SkipTo(v Value) Value { if st.v == nil || v == nil { st.v = nil return nil } st.v = st.i.SkipTo(v) return st.v } // UnionIterator combines the results from two other iterators. The values from Next() are returned in // noms-defined order with all duplicates removed. type UnionIterator struct { aState iterState bState iterState } // NewUnionIterator creates a union iterator from two other SetIterators. func NewUnionIterator(iterA, iterB SetIterator) SetIterator { d.PanicIfTrue(iterA == nil) d.PanicIfTrue(iterB == nil) a := iterState{i: iterA, v: iterA.Next()} b := iterState{i: iterB, v: iterB.Next()} return &UnionIterator{aState: a, bState: b} } func (u *UnionIterator) Next() Value { switch compareValue(u.aState.v, u.bState.v) { case -1: return u.aState.Next() case 0: u.aState.Next() return u.bState.Next() case 1: return u.bState.Next() } panic("Unreachable") } func (u *UnionIterator) SkipTo(v Value) Value { d.PanicIfTrue(v == nil) didAdvance := false if compareValue(u.aState.v, v) < 0 { didAdvance = true u.aState.SkipTo(v) } if compareValue(u.bState.v, v) < 0 { didAdvance = true u.bState.SkipTo(v) } if !didAdvance { return u.Next() } switch compareValue(u.aState.v, u.bState.v) { case -1: return u.aState.Next() case 0: u.aState.Next() return u.bState.Next() case 1: return u.bState.Next() } panic("Unreachable") } // IntersectionIterator only returns values that are returned in both of its child iterators. // The values from Next() are returned in noms-defined order with all duplicates removed. type IntersectionIterator struct { aState iterState bState iterState } // NewIntersectionIterator creates a intersect iterator from two other SetIterators. func NewIntersectionIterator(iterA, iterB SetIterator) SetIterator { d.PanicIfTrue(iterA == nil) d.PanicIfTrue(iterB == nil) a := iterState{i: iterA, v: iterA.Next()} b := iterState{i: iterB, v: iterB.Next()} return &IntersectionIterator{aState: a, bState: b} } func (i *IntersectionIterator) Next() Value { for cont := true; cont; { switch compareValue(i.aState.v, i.bState.v) { case -1: i.aState.SkipTo(i.bState.v) case 0: cont = false case 1: i.bState.SkipTo(i.aState.v) } } // we only get here if aState and bState are equal res := i.aState.v i.aState.Next() i.bState.Next() return res } func (i *IntersectionIterator) SkipTo(v Value) Value { d.PanicIfTrue(v == nil) if compareValue(v, i.aState.v) >= 0 { i.aState.SkipTo(v) } if compareValue(v, i.bState.v) >= 0 { i.bState.SkipTo(v) } return i.Next() } // considers nil max value, return -1 if v1 < v2, 0 if v1 == v2, 1 if v1 > v2 func compareValue(v1, v2 Value) int { if v1 == nil && v2 == nil { return 0 } if v2 == nil || (v1 != nil && v1.Less(v2)) { return -1 } if v1 == nil || (v2 != nil && v2.Less(v1)) { return 1 } return 0 } ================================================ FILE: go/types/set_iterator_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "math" "testing" "github.com/stretchr/testify/assert" ) func TestSetIterator(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() numbers := append(generateNumbersAsValues(10), Number(20), Number(25)) s := NewSet(vs, numbers...) i := s.Iterator() vals := iterToSlice(i) assert.True(vals.Equals(numbers), "Expected: %v != actual: %v", numbers, vs) i = s.Iterator() assert.Panics(func() { i.SkipTo(nil) }) assert.Equal(Number(0), i.SkipTo(Number(-20))) assert.Equal(Number(2), i.SkipTo(Number(2))) assert.Equal(Number(3), i.SkipTo(Number(-20))) assert.Equal(Number(5), i.SkipTo(Number(5))) assert.Equal(Number(6), i.Next()) assert.Equal(Number(7), i.SkipTo(Number(6))) assert.Equal(Number(20), i.SkipTo(Number(15))) assert.Nil(i.SkipTo(Number(30))) assert.Nil(i.SkipTo(Number(30))) assert.Nil(i.SkipTo(Number(1))) i = s.Iterator() assert.Equal(Number(0), i.Next()) assert.Equal(Number(1), i.Next()) assert.Equal(Number(3), i.SkipTo(Number(3))) assert.Equal(Number(4), i.Next()) empty := NewSet(vs) assert.Nil(empty.Iterator().Next()) assert.Nil(empty.Iterator().SkipTo(Number(-30))) single := NewSet(vs, Number(42)).Iterator() assert.Equal(Number(42), single.SkipTo(Number(42))) assert.Equal(nil, single.SkipTo(Number(42))) single = NewSet(vs, Number(42)).Iterator() assert.Equal(Number(42), single.SkipTo(Number(42))) assert.Equal(nil, single.Next()) single = NewSet(vs, Number(42)).Iterator() assert.Equal(Number(42), single.SkipTo(Number(21))) } func TestSetIteratorAt(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() numbers := append(generateNumbersAsValues(5), Number(10)) s := NewSet(vs, numbers...) i := s.IteratorAt(0) vals := iterToSlice(i) assert.True(vals.Equals(numbers), "Expected: %v != actual: %v", numbers, vs) i = s.IteratorAt(2) vals = iterToSlice(i) assert.True(vals.Equals(numbers[2:]), "Expected: %v != actual: %v", numbers[2:], vs) i = s.IteratorAt(10) vals = iterToSlice(i) assert.True(vals.Equals(nil), "Expected: %v != actual: %v", nil, vs) } func TestSetIteratorFrom(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() numbers := append(generateNumbersAsValues(5), Number(10), Number(20)) s := NewSet(vs, numbers...) i := s.IteratorFrom(Number(0)) vals := iterToSlice(i) assert.True(vals.Equals(numbers), "Expected: %v != actual: %v", numbers, vs) i = s.IteratorFrom(Number(2)) vals = iterToSlice(i) assert.True(vals.Equals(numbers[2:]), "Expected: %v != actual: %v", numbers[2:], vs) i = s.IteratorFrom(Number(10)) vals = iterToSlice(i) assert.True(vals.Equals(ValueSlice{Number(10), Number(20)}), "Expected: %v != actual: %v", nil, vs) i = s.IteratorFrom(Number(20)) vals = iterToSlice(i) assert.True(vals.Equals(ValueSlice{Number(20)}), "Expected: %v != actual: %v", nil, vs) i = s.IteratorFrom(Number(100)) vals = iterToSlice(i) assert.True(vals.Equals(nil), "Expected: %v != actual: %v", nil, vs) // Not present. Starts at next larger. i = s.IteratorFrom(Number(15)) vals = iterToSlice(i) assert.True(vals.Equals(ValueSlice{Number(20)}), "Expected: %v != actual: %v", nil, vs) } func TestUnionIterator(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() set1 := NewSet(vs, generateNumbersAsValuesFromToBy(0, 10, 1)...) set2 := NewSet(vs, generateNumbersAsValuesFromToBy(5, 15, 1)...) set3 := NewSet(vs, generateNumbersAsValuesFromToBy(10, 20, 1)...) set4 := NewSet(vs, generateNumbersAsValuesFromToBy(15, 25, 1)...) ui1 := NewUnionIterator(set1.Iterator(), set2.Iterator()) vals := iterToSlice(ui1) expectedRes := generateNumbersAsValues(15) assert.True(vals.Equals(expectedRes), "Expected: %v != actual: %v", expectedRes, vs) ui1 = NewUnionIterator(set1.Iterator(), set4.Iterator()) ui2 := NewUnionIterator(set3.Iterator(), set2.Iterator()) ui3 := NewUnionIterator(ui1, ui2) vals = iterToSlice(ui3) expectedRes = generateNumbersAsValues(25) assert.True(vals.Equals(expectedRes), "Expected: %v != actual: %v", expectedRes, vs) ui1 = NewUnionIterator(set1.Iterator(), set4.Iterator()) ui2 = NewUnionIterator(set3.Iterator(), set2.Iterator()) ui3 = NewUnionIterator(ui1, ui2) assert.Panics(func() { ui3.SkipTo(nil) }) assert.Equal(Number(0), ui3.SkipTo(Number(-5))) assert.Equal(Number(5), ui3.SkipTo(Number(5))) assert.Equal(Number(8), ui3.SkipTo(Number(8))) assert.Equal(Number(9), ui3.SkipTo(Number(8))) assert.Equal(Number(10), ui3.SkipTo(Number(8))) assert.Equal(Number(11), ui3.SkipTo(Number(7))) assert.Equal(Number(12), ui3.Next()) assert.Equal(Number(15), ui3.SkipTo(Number(15))) assert.Equal(Number(24), ui3.SkipTo(Number(24))) assert.Nil(ui3.SkipTo(Number(25))) singleElemSet := NewSet(vs, Number(4)) emptySet := NewSet(vs) ui10 := NewUnionIterator(singleElemSet.Iterator(), singleElemSet.Iterator()) ui20 := NewUnionIterator(emptySet.Iterator(), emptySet.Iterator()) ui30 := NewUnionIterator(ui10, ui20) vals = iterToSlice(ui30) expectedRes = ValueSlice{Number(4)} assert.True(vals.Equals(expectedRes), "%v != %v\n", expectedRes, vs) } func TestIntersectionIterator(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() byTwos := NewSet(vs, generateNumbersAsValuesFromToBy(0, 200, 2)...) byThrees := NewSet(vs, generateNumbersAsValuesFromToBy(0, 200, 3)...) byFives := NewSet(vs, generateNumbersAsValuesFromToBy(0, 200, 5)...) i1 := NewIntersectionIterator(byTwos.Iterator(), byThrees.Iterator()) vals := iterToSlice(i1) expectedRes := generateNumbersAsValuesFromToBy(0, 200, 6) assert.True(vals.Equals(expectedRes), "Expected: %v != actual: %v", expectedRes, vs) it1 := NewIntersectionIterator(byTwos.Iterator(), byThrees.Iterator()) it2 := NewIntersectionIterator(it1, byFives.Iterator()) vals = iterToSlice(it2) expectedRes = generateNumbersAsValuesFromToBy(0, 200, 30) assert.True(vals.Equals(expectedRes), "Expected: %v != actual: %v", expectedRes, vs) it1 = NewIntersectionIterator(byThrees.Iterator(), byFives.Iterator()) it2 = NewIntersectionIterator(it1, byTwos.Iterator()) assert.Panics(func() { it2.SkipTo(nil) }) assert.Equal(Number(30), it2.SkipTo(Number(5))) assert.Equal(Number(60), it2.SkipTo(Number(60))) assert.Equal(Number(90), it2.SkipTo(Number(5))) assert.Equal(Number(120), it2.Next()) assert.Equal(Number(150), it2.SkipTo(Number(150))) assert.Nil(it2.SkipTo(Number(40000))) } func TestCombinationIterator(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() byTwos := NewSet(vs, generateNumbersAsValuesFromToBy(0, 70, 2)...) byThrees := NewSet(vs, generateNumbersAsValuesFromToBy(0, 70, 3)...) byFives := NewSet(vs, generateNumbersAsValuesFromToBy(0, 70, 5)...) bySevens := NewSet(vs, generateNumbersAsValuesFromToBy(0, 70, 7)...) it1 := NewIntersectionIterator(byTwos.Iterator(), bySevens.Iterator()) it2 := NewIntersectionIterator(byFives.Iterator(), byThrees.Iterator()) ut1 := NewUnionIterator(it1, it2) vals := iterToSlice(ut1) expectedRes := intsToValueSlice(0, 14, 15, 28, 30, 42, 45, 56, 60) assert.True(vals.Equals(expectedRes), "Expected: %v != actual: %v", expectedRes, vs) ut1 = NewUnionIterator(byTwos.Iterator(), bySevens.Iterator()) it2 = NewIntersectionIterator(byFives.Iterator(), byThrees.Iterator()) ut2 := NewIntersectionIterator(ut1, it2) vals = iterToSlice(ut2) expectedRes = intsToValueSlice(0, 30, 60) assert.True(vals.Equals(expectedRes), "Expected: %v != actual: %v", expectedRes, vs) } type UnionTestIterator struct { *UnionIterator cntr *int } func (ui *UnionTestIterator) Next() Value { *ui.cntr++ return ui.UnionIterator.Next() } func (ui *UnionTestIterator) SkipTo(v Value) Value { *ui.cntr++ return ui.UnionIterator.SkipTo(v) } func NewUnionTestIterator(i1, i2 SetIterator, cntr *int) SetIterator { ui := NewUnionIterator(i1, i2).(*UnionIterator) return &UnionTestIterator{ui, cntr} } // When a binary tree of union operators is built on top of a list of sets, the complexity to // retrieve all of the elements in sorted order should be Log(N) * M where N is the number of sets func init() { // the list and M is the total number of elements in all of the sets. func TestUnionComplexity(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() numSets := 256 numElemsPerSet := 1000 logNumSets := int(math.Ceil(math.Log2(float64(numSets)))) totalElems := numSets * numElemsPerSet expectedMax := logNumSets*totalElems + numSets callCount1 := 0 iter := iterize(createSetsWithDistinctNumbers(vs, numSets, numElemsPerSet), NewUnionTestIterator, &callCount1) vals := iterToSlice(iter) expected := generateNumbersAsValueSlice(numSets * numElemsPerSet) assert.True(expected.Equals(vals), "expected: %v != actual: %v", expected, vals) assert.True(expectedMax > callCount1, "callCount: %d exceeds expectedMax: %d", callCount1, expectedMax) callCount2 := 0 iter = iterize(createSetsWithSameNumbers(vs, numSets, numElemsPerSet), NewUnionTestIterator, &callCount2) vals = iterToSlice(iter) expected = generateNumbersAsValueSlice(numElemsPerSet) assert.True(expected.Equals(vals), "expected: %v != actual: %v", expected, vals) assert.True(expectedMax > callCount2, "callCount: %d exceeds expectedMax: %d", callCount2, expectedMax) } type IntersectionTestIterator struct { *IntersectionIterator cntr *int } func (i *IntersectionTestIterator) Next() Value { *i.cntr++ return i.IntersectionIterator.Next() } func (i *IntersectionTestIterator) SkipTo(v Value) Value { *i.cntr++ return i.IntersectionIterator.SkipTo(v) } func NewIntersectionTestIterator(i1, i2 SetIterator, cntr *int) SetIterator { ui := NewIntersectionIterator(i1, i2).(*IntersectionIterator) return &IntersectionTestIterator{ui, cntr} } // When a binary tree of intersection operators is built on top of a list of sets, the complexity to // retrieve all of the elements in sorted order should be Log(N) * M where N is the number of sets func init() { // the list and M is the total number of elements in all of the sets. func TestIntersectComplexity(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() numSets := 256 numElemsPerSet := 1000 logNumSets := int(math.Ceil(math.Log2(float64(numSets)))) totalElems := numSets * numElemsPerSet expectedMax := logNumSets*totalElems + numSets callCount1 := 0 iter := iterize(createSetsWithDistinctNumbers(vs, numSets, numElemsPerSet), NewIntersectionTestIterator, &callCount1) vals := iterToSlice(iter) expected := ValueSlice{} assert.True(expected.Equals(vals), "expected: %v != actual: %v", expected, vals) assert.True(expectedMax > callCount1, "callCount: %d exceeds expectedMax: %d", callCount1, expectedMax) callCount2 := 0 iter = iterize(createSetsWithSameNumbers(vs, numSets, numElemsPerSet), NewIntersectionTestIterator, &callCount2) vals = iterToSlice(iter) expected = generateNumbersAsValueSlice(numElemsPerSet) assert.True(expected.Equals(vals), "expected: %v != actual: %v", expected, vals) assert.True(expectedMax > callCount2, "callCount: %d exceeds expectedMax: %d", callCount2, expectedMax) } func createSetsWithDistinctNumbers(vrw ValueReadWriter, numSets, numElemsPerSet int) []SetIterator { iterSlice := []SetIterator{} for i := 0; i < numSets; i++ { vals := ValueSlice{} for j := 0; j < numElemsPerSet; j++ { vals = append(vals, Number(i+(numSets*j))) } s := NewSet(vrw, vals...) iterSlice = append(iterSlice, s.Iterator()) } return iterSlice } func createSetsWithSameNumbers(vrw ValueReadWriter, numSets, numElemsPerSet int) []SetIterator { vs := ValueSlice{} for j := 0; j < numElemsPerSet; j++ { vs = append(vs, Number(j)) } iterSlice := []SetIterator{} for i := 0; i < numSets; i++ { iterSlice = append(iterSlice, NewSet(vrw, vs...).Iterator()) } return iterSlice } type newIterFunc func(i1, i2 SetIterator, cntr *int) SetIterator // Iterize calls itself recursively to build a binary tree of iterators over the original set. func iterize(iters []SetIterator, newIter newIterFunc, cntr *int) SetIterator { if len(iters) == 0 { return nil } if len(iters) <= 1 { return iters[0] } var iter0 SetIterator newIters := []SetIterator{} for i, iter := range iters { if i%2 == 0 { iter0 = iter } else { ni := newIter(iter0, iter, cntr) newIters = append(newIters, ni) iter0 = nil } } if iter0 != nil { newIters = append(newIters, iter0) } return iterize(newIters, newIter, cntr) } ================================================ FILE: go/types/set_leaf_sequence.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "sort" type setLeafSequence struct { leafSequence } func newSetLeafSequence(vrw ValueReadWriter, vs ...Value) orderedSequence { return setLeafSequence{newLeafSequenceFromValues(SetKind, vrw, vs...)} } func (sl setLeafSequence) getCompareFn(other sequence) compareFn { return sl.getCompareFnHelper(other.(setLeafSequence).leafSequence) } // orderedSequence interface func (sl setLeafSequence) getKey(idx int) orderedKey { return newOrderedKey(sl.getItem(idx).(Value)) } func (sl setLeafSequence) search(key orderedKey) int { return sort.Search(int(sl.Len()), func(i int) bool { return !sl.getKey(i).Less(key) }) } ================================================ FILE: go/types/set_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "fmt" "math/rand" "sort" "sync" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) const testSetSize = 5000 type testSet ValueSlice type toTestSetFunc func(scale int, vrw ValueReadWriter) testSet func (ts testSet) Remove(from, to int) testSet { values := make(testSet, 0, len(ts)-(to-from)) values = append(values, ts[:from]...) values = append(values, ts[to:]...) return values } func (ts testSet) Has(key Value) bool { for _, entry := range ts { if entry.Equals(key) { return true } } return false } func (ts testSet) Diff(last testSet) (added []Value, removed []Value) { // Note: this could be use ts.toSet/last.toSet and then tsSet.Diff(lastSet) but the // purpose of this method is to be redundant. if len(ts) == 0 && len(last) == 0 { return // nothing changed } if len(ts) == 0 { // everything removed for _, entry := range last { removed = append(removed, entry) } return } if len(last) == 0 { // everything added for _, entry := range ts { added = append(added, entry) } return } for _, entry := range ts { if !last.Has(entry) { added = append(added, entry) } } for _, entry := range last { if !ts.Has(entry) { removed = append(removed, entry) } } return } func (ts testSet) toSet(vrw ValueReadWriter) Set { return NewSet(vrw, ts...) } func newSortedTestSet(length int, gen genValueFn) (values testSet) { for i := 0; i < length; i++ { values = append(values, gen(i)) } return } func newTestSetFromSet(s Set) testSet { values := make([]Value, 0, s.Len()) s.IterAll(func(v Value) { values = append(values, v) }) return values } func newRandomTestSet(length int, gen genValueFn) testSet { s := rand.NewSource(4242) used := map[int]bool{} var values []Value for len(values) < length { v := int(s.Int63()) & 0xffffff if _, ok := used[v]; !ok { values = append(values, gen(v)) used[v] = true } } return values } func validateSet(t *testing.T, vrw ValueReadWriter, s Set, values ValueSlice) { assert.True(t, s.Equals(NewSet(vrw, values...))) out := ValueSlice{} s.IterAll(func(v Value) { out = append(out, v) }) assert.True(t, out.Equals(values)) } type setTestSuite struct { collectionTestSuite elems testSet } func newSetTestSuite(size uint, expectChunkCount int, expectPrependChunkDiff int, expectAppendChunkDiff int, gen genValueFn) *setTestSuite { vs := newTestValueStore() length := 1 << size elemType := TypeOf(gen(0)) elems := newSortedTestSet(length, gen) tr := MakeSetType(elemType) set := NewSet(vs, elems...) return &setTestSuite{ collectionTestSuite: collectionTestSuite{ col: set, expectType: tr, expectLen: uint64(length), expectChunkCount: expectChunkCount, expectPrependChunkDiff: expectPrependChunkDiff, expectAppendChunkDiff: expectAppendChunkDiff, validate: func(v2 Collection) bool { l2 := v2.(Set) out := ValueSlice{} l2.IterAll(func(v Value) { out = append(out, v) }) exp := ValueSlice(elems) rv := exp.Equals(out) if !rv { printBadCollections(exp, out) } return rv }, prependOne: func() Collection { dup := make([]Value, length+1) dup[0] = Number(-1) copy(dup[1:], elems) return NewSet(vs, dup...) }, appendOne: func() Collection { dup := make([]Value, length+1) copy(dup, elems) dup[len(dup)-1] = Number(length + 1) return NewSet(vs, dup...) }, }, elems: elems, } } var mutex sync.Mutex func printBadCollections(expected, actual ValueSlice) { mutex.Lock() defer mutex.Unlock() fmt.Println("expected:", expected) fmt.Println("actual:", actual) } func (suite *setTestSuite) createStreamingSet(vs *ValueStore) Set { vChan := make(chan Value) setChan := NewStreamingSet(vs, vChan) for _, entry := range suite.elems { vChan <- entry } close(vChan) return <-setChan } func (suite *setTestSuite) TestStreamingSet() { vs := newTestValueStore() defer vs.Close() s := suite.createStreamingSet(vs) suite.True(suite.validate(s)) } func (suite *setTestSuite) TestStreamingSetOrder() { vs := newTestValueStore() defer vs.Close() elems := make(testSet, len(suite.elems)) copy(elems, suite.elems) elems[0], elems[1] = elems[1], elems[0] vChan := make(chan Value, len(elems)) for _, e := range elems { vChan <- e } close(vChan) readInput := func(vrw ValueReadWriter, vChan <-chan Value, outChan chan<- Set) { readSetInput(vrw, vChan, outChan) } testFunc := func() { outChan := newStreamingSet(vs, vChan, readInput) <-outChan } suite.Panics(testFunc) } func (suite *setTestSuite) TestStreamingSet2() { vs := newTestValueStore() defer vs.Close() wg := sync.WaitGroup{} wg.Add(2) var s1, s2 Set go func() { s1 = suite.createStreamingSet(vs) wg.Done() }() go func() { s2 = suite.createStreamingSet(vs) wg.Done() }() wg.Wait() suite.True(suite.validate(s1)) suite.True(suite.validate(s2)) } func TestSetSuite4K(t *testing.T) { suite.Run(t, newSetTestSuite(12, 8, 2, 2, newNumber)) } func TestSetSuite4KStructs(t *testing.T) { suite.Run(t, newSetTestSuite(12, 9, 2, 2, newNumberStruct)) } func getTestNativeOrderSet(scale int, vrw ValueReadWriter) testSet { return newRandomTestSet(64*scale, newNumber) } func getTestRefValueOrderSet(scale int, vrw ValueReadWriter) testSet { return newRandomTestSet(64*scale, newNumber) } func getTestRefToNativeOrderSet(scale int, vrw ValueReadWriter) testSet { return newRandomTestSet(64*scale, func(v int) Value { return vrw.WriteValue(Number(v)) }) } func getTestRefToValueOrderSet(scale int, vrw ValueReadWriter) testSet { return newRandomTestSet(64*scale, func(v int) Value { return vrw.WriteValue(NewSet(vrw, Number(v))) }) } func accumulateSetDiffChanges(s1, s2 Set) (added []Value, removed []Value) { changes := make(chan ValueChanged) go func() { s1.Diff(s2, changes, nil) close(changes) }() for change := range changes { if change.ChangeType == DiffChangeAdded { added = append(added, change.Key) } else if change.ChangeType == DiffChangeRemoved { removed = append(removed, change.Key) } } return } func diffSetTest(assert *assert.Assertions, s1 Set, s2 Set, numAddsExpected int, numRemovesExpected int) (added []Value, removed []Value) { added, removed = accumulateSetDiffChanges(s1, s2) assert.Equal(numAddsExpected, len(added), "num added is not as expected") assert.Equal(numRemovesExpected, len(removed), "num removed is not as expected") ts1 := newTestSetFromSet(s1) ts2 := newTestSetFromSet(s2) tsAdded, tsRemoved := ts1.Diff(ts2) assert.Equal(numAddsExpected, len(tsAdded), "num added is not as expected") assert.Equal(numRemovesExpected, len(tsRemoved), "num removed is not as expected") assert.Equal(added, tsAdded, "set added != tsSet added") assert.Equal(removed, tsRemoved, "set removed != tsSet removed") return } func TestNewSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) assert.True(MakeSetType(MakeUnionType()).Equals(TypeOf(s))) assert.Equal(uint64(0), s.Len()) s = NewSet(vs, Number(0)) assert.True(MakeSetType(NumberType).Equals(TypeOf(s))) s = NewSet(vs) assert.IsType(MakeSetType(NumberType), TypeOf(s)) s2 := s.Edit().Remove(Number(1)).Set() assert.IsType(TypeOf(s), TypeOf(s2)) } func TestSetLen(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s0 := NewSet(vs) assert.Equal(uint64(0), s0.Len()) s1 := NewSet(vs, Bool(true), Number(1), String("hi")) assert.Equal(uint64(3), s1.Len()) diffSetTest(assert, s0, s1, 0, 3) diffSetTest(assert, s1, s0, 3, 0) s2 := s1.Edit().Insert(Bool(false)).Set() assert.Equal(uint64(4), s2.Len()) diffSetTest(assert, s0, s2, 0, 4) diffSetTest(assert, s2, s0, 4, 0) diffSetTest(assert, s1, s2, 0, 1) diffSetTest(assert, s2, s1, 1, 0) s3 := s2.Edit().Remove(Bool(true)).Set() assert.Equal(uint64(3), s3.Len()) diffSetTest(assert, s2, s3, 1, 0) diffSetTest(assert, s3, s2, 0, 1) } func TestSetEmpty(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) assert.True(s.Empty()) assert.Equal(uint64(0), s.Len()) } func TestSetEmptyInsert(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) assert.True(s.Empty()) s = s.Edit().Insert(Bool(false)).Set() assert.False(s.Empty()) assert.Equal(uint64(1), s.Len()) } func TestSetEmptyInsertRemove(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) assert.True(s.Empty()) s = s.Edit().Insert(Bool(false)).Set() assert.False(s.Empty()) assert.Equal(uint64(1), s.Len()) s = s.Edit().Remove(Bool(false)).Set() assert.True(s.Empty()) assert.Equal(uint64(0), s.Len()) } // BUG 98 func TestSetDuplicateInsert(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s1 := NewSet(vs, Bool(true), Number(42), Number(42)) assert.Equal(uint64(2), s1.Len()) } func TestSetUniqueKeysString(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s1 := NewSet(vs, String("hello"), String("world"), String("hello")) assert.Equal(uint64(2), s1.Len()) assert.True(s1.Has(String("hello"))) assert.True(s1.Has(String("world"))) assert.False(s1.Has(String("foo"))) } func TestSetUniqueKeysNumber(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s1 := NewSet(vs, Number(4), Number(1), Number(0), Number(0), Number(1), Number(3)) assert.Equal(uint64(4), s1.Len()) assert.True(s1.Has(Number(4))) assert.True(s1.Has(Number(1))) assert.True(s1.Has(Number(0))) assert.True(s1.Has(Number(3))) assert.False(s1.Has(Number(2))) } func TestSetHas(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s1 := NewSet(vs, Bool(true), Number(1), String("hi")) assert.True(s1.Has(Bool(true))) assert.False(s1.Has(Bool(false))) assert.True(s1.Has(Number(1))) assert.False(s1.Has(Number(0))) assert.True(s1.Has(String("hi"))) assert.False(s1.Has(String("ho"))) s2 := s1.Edit().Insert(Bool(false)).Set() assert.True(s2.Has(Bool(false))) assert.True(s2.Has(Bool(true))) assert.True(s1.Has(Bool(true))) assert.False(s1.Has(Bool(false))) } func TestSetHas2(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(toTestSet toTestSetFunc, scale int) { vrw := newTestValueStore() ts := toTestSet(scale, vrw) set := ts.toSet(vrw) set2 := vrw.ReadValue(vrw.WriteValue(set).TargetHash()).(Set) for _, v := range ts { assert.True(set.Has(v)) assert.True(set2.Has(v)) } diffSetTest(assert, set, set2, 0, 0) } doTest(getTestNativeOrderSet, 16) doTest(getTestRefValueOrderSet, 2) doTest(getTestRefToNativeOrderSet, 2) doTest(getTestRefToValueOrderSet, 2) } func validateSetInsertion(t *testing.T, vrw ValueReadWriter, values ValueSlice) { s := NewSet(vrw) for i, v := range values { s = s.Edit().Insert(v).Set() validateSet(t, vrw, s, values[0:i+1]) } } func TestSetValidateInsertAscending(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() validateSetInsertion(t, vs, generateNumbersAsValues(300)) } func TestSetInsert(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) v1 := Bool(false) v2 := Bool(true) v3 := Number(0) assert.False(s.Has(v1)) s = s.Edit().Insert(v1).Set() assert.True(s.Has(v1)) s = s.Edit().Insert(v2).Set() assert.True(s.Has(v1)) assert.True(s.Has(v2)) s2 := s.Edit().Insert(v3).Set() assert.True(s.Has(v1)) assert.True(s.Has(v2)) assert.False(s.Has(v3)) assert.True(s2.Has(v1)) assert.True(s2.Has(v2)) assert.True(s2.Has(v3)) } func TestSetInsert2(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(incr, offset int, toTestSet toTestSetFunc, scale int) { vrw := newTestValueStore() ts := toTestSet(scale, vrw) expected := ts.toSet(vrw) run := func(from, to int) { actual := ts.Remove(from, to).toSet(vrw).Edit().Insert(ts[from:to]...).Set() assert.Equal(expected.Len(), actual.Len()) assert.True(expected.Equals(actual)) diffSetTest(assert, expected, actual, 0, 0) } for i := 0; i < len(ts)-offset; i += incr { run(i, i+offset) } run(len(ts)-offset, len(ts)) } doTest(18, 3, getTestNativeOrderSet, 9) doTest(64, 1, getTestNativeOrderSet, 32) doTest(32, 1, getTestRefValueOrderSet, 4) doTest(32, 1, getTestRefToNativeOrderSet, 4) doTest(32, 1, getTestRefToValueOrderSet, 4) } func TestSetInsertExistingValue(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() ts := getTestNativeOrderSet(2, vs) original := ts.toSet(vs) actual := original.Edit().Insert(ts[0]).Set() assert.Equal(original.Len(), actual.Len()) assert.True(original.Equals(actual)) } func TestSetRemove(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() v1 := Bool(false) v2 := Bool(true) v3 := Number(0) s := NewSet(vs, v1, v2, v3) assert.True(s.Has(v1)) assert.True(s.Has(v2)) assert.True(s.Has(v3)) s = s.Edit().Remove(v1).Set() assert.False(s.Has(v1)) assert.True(s.Has(v2)) assert.True(s.Has(v3)) s2 := s.Edit().Remove(v2).Set() assert.False(s.Has(v1)) assert.True(s.Has(v2)) assert.True(s.Has(v3)) assert.False(s2.Has(v1)) assert.False(s2.Has(v2)) assert.True(s2.Has(v3)) } func TestSetRemove2(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } smallTestChunks() defer normalProductionChunks() assert := assert.New(t) doTest := func(incr, offset int, toTestSet toTestSetFunc, scale int) { vrw := newTestValueStore() ts := toTestSet(scale, vrw) whole := ts.toSet(vrw) run := func(from, to int) { expected := ts.Remove(from, to).toSet(vrw) actual := whole.Edit().Remove(ts[from:to]...).Set() assert.Equal(expected.Len(), actual.Len()) assert.True(expected.Equals(actual)) diffSetTest(assert, expected, actual, 0, 0) } for i := 0; i < len(ts)-offset; i += incr { run(i, i+offset) } run(len(ts)-offset, len(ts)) } doTest(18, 3, getTestNativeOrderSet, 9) doTest(64, 1, getTestNativeOrderSet, 32) doTest(32, 1, getTestRefValueOrderSet, 4) doTest(32, 1, getTestRefToNativeOrderSet, 4) doTest(32, 1, getTestRefToValueOrderSet, 4) } func TestSetRemoveNonexistentValue(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() ts := getTestNativeOrderSet(2, vs) original := ts.toSet(vs) actual := original.Edit().Remove(Number(-1)).Set() // rand.Int63 returns non-negative values. assert.Equal(original.Len(), actual.Len()) assert.True(original.Equals(actual)) } func TestSetFirst(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) assert.Nil(s.First()) s = s.Edit().Insert(Number(1)).Set() assert.NotNil(s.First()) s = s.Edit().Insert(Number(2)).Set() assert.NotNil(s.First()) s2 := s.Edit().Remove(Number(1)).Set() assert.NotNil(s2.First()) s2 = s2.Edit().Remove(Number(2)).Set() assert.Nil(s2.First()) } func TestSetOfStruct(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() elems := []Value{} for i := 0; i < 200; i++ { elems = append(elems, NewStruct("S1", StructData{"o": Number(i)})) } s := NewSet(vs, elems...) for i := 0; i < 200; i++ { assert.True(s.Has(elems[i])) } } func TestSetIter(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs, Number(0), Number(1), Number(2), Number(3), Number(4)) acc := NewSet(vs) s.Iter(func(v Value) bool { _, ok := v.(Number) assert.True(ok) acc = acc.Edit().Insert(v).Set() return false }) assert.True(s.Equals(acc)) acc = NewSet(vs) s.Iter(func(v Value) bool { return true }) assert.True(acc.Empty()) } func TestSetIter2(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() doTest := func(toTestSet toTestSetFunc, scale int) { vrw := newTestValueStore() ts := toTestSet(scale, vrw) set := ts.toSet(vrw) sort.Sort(ValueSlice(ts)) idx := uint64(0) endAt := uint64(64) set.Iter(func(v Value) (done bool) { assert.True(ts[idx].Equals(v)) if idx == endAt { done = true } idx++ return }) assert.Equal(endAt, idx-1) } doTest(getTestNativeOrderSet, 16) doTest(getTestRefValueOrderSet, 2) doTest(getTestRefToNativeOrderSet, 2) doTest(getTestRefToValueOrderSet, 2) } func TestSetIterAll(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs, Number(0), Number(1), Number(2), Number(3), Number(4)) acc := NewSet(vs) s.IterAll(func(v Value) { _, ok := v.(Number) assert.True(ok) acc = acc.Edit().Insert(v).Set() }) assert.True(s.Equals(acc)) } func TestSetIterAll2(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() doTest := func(toTestSet toTestSetFunc, scale int) { vrw := newTestValueStore() ts := toTestSet(scale, vrw) set := ts.toSet(vrw) sort.Sort(ValueSlice(ts)) idx := uint64(0) set.IterAll(func(v Value) { assert.True(ts[idx].Equals(v)) idx++ }) } doTest(getTestNativeOrderSet, 16) doTest(getTestRefValueOrderSet, 2) doTest(getTestRefToNativeOrderSet, 2) doTest(getTestRefToValueOrderSet, 2) } func testSetOrder(assert *assert.Assertions, vrw ValueReadWriter, valueType *Type, value []Value, expectOrdering []Value) { m := NewSet(vrw, value...) i := 0 m.IterAll(func(value Value) { assert.Equal(expectOrdering[i].Hash().String(), value.Hash().String()) i++ }) } func TestSetOrdering(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() testSetOrder(assert, vs, StringType, []Value{ String("a"), String("z"), String("b"), String("y"), String("c"), String("x"), }, []Value{ String("a"), String("b"), String("c"), String("x"), String("y"), String("z"), }, ) testSetOrder(assert, vs, NumberType, []Value{ Number(0), Number(1000), Number(1), Number(100), Number(2), Number(10), }, []Value{ Number(0), Number(1), Number(2), Number(10), Number(100), Number(1000), }, ) testSetOrder(assert, vs, NumberType, []Value{ Number(0), Number(-30), Number(25), Number(1002), Number(-5050), Number(23), }, []Value{ Number(-5050), Number(-30), Number(0), Number(23), Number(25), Number(1002), }, ) testSetOrder(assert, vs, NumberType, []Value{ Number(0.0001), Number(0.000001), Number(1), Number(25.01e3), Number(-32.231123e5), Number(23), }, []Value{ Number(-32.231123e5), Number(0.000001), Number(0.0001), Number(1), Number(23), Number(25.01e3), }, ) testSetOrder(assert, vs, ValueType, []Value{ String("a"), String("z"), String("b"), String("y"), String("c"), String("x"), }, // Ordered by value []Value{ String("a"), String("b"), String("c"), String("x"), String("y"), String("z"), }, ) testSetOrder(assert, vs, BoolType, []Value{ Bool(true), Bool(false), }, // Ordered by value []Value{ Bool(false), Bool(true), }, ) } func TestSetType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewSet(vs) assert.True(TypeOf(s).Equals(MakeSetType(MakeUnionType()))) s = NewSet(vs, Number(0)) assert.True(TypeOf(s).Equals(MakeSetType(NumberType))) s2 := s.Edit().Remove(Number(1)).Set() assert.True(TypeOf(s2).Equals(MakeSetType(NumberType))) s2 = s.Edit().Insert(Number(0), Number(1)).Set() assert.True(TypeOf(s).Equals(TypeOf(s2))) s3 := s.Edit().Insert(Bool(true)).Set() assert.True(TypeOf(s3).Equals(MakeSetType(MakeUnionType(BoolType, NumberType)))) s4 := s.Edit().Insert(Number(3), Bool(true)).Set() assert.True(TypeOf(s4).Equals(MakeSetType(MakeUnionType(BoolType, NumberType)))) } func TestSetChunks(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() l1 := NewSet(vs, Number(0)) c1 := getChunks(l1) assert.Len(c1, 0) l2 := NewSet(vs, NewRef(Number(0))) c2 := getChunks(l2) assert.Len(c2, 1) } func TestSetChunks2(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() doTest := func(toTestSet toTestSetFunc, scale int) { vrw := newTestValueStore() ts := toTestSet(scale, vrw) set := ts.toSet(vrw) set2chunks := getChunks(vrw.ReadValue(vrw.WriteValue(set).TargetHash())) for i, r := range getChunks(set) { assert.True(TypeOf(r).Equals(TypeOf(set2chunks[i])), "%s != %s", TypeOf(r).Describe(), TypeOf(set2chunks[i]).Describe()) } } doTest(getTestNativeOrderSet, 16) doTest(getTestRefValueOrderSet, 2) doTest(getTestRefToNativeOrderSet, 2) doTest(getTestRefToValueOrderSet, 2) } func TestSetFirstNNumbers(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() nums := generateNumbersAsValues(testSetSize) s := NewSet(vs, nums...) assert.Equal(deriveCollectionHeight(s), getRefHeightOfCollection(s)) } func TestSetRefOfStructFirstNNumbers(t *testing.T) { if testing.Short() { t.Skip("Skipping test in short mode.") } assert := assert.New(t) vs := newTestValueStore() nums := generateNumbersAsRefOfStructs(vs, testSetSize) s := NewSet(vs, nums...) // height + 1 because the leaves are Ref values (with height 1). assert.Equal(deriveCollectionHeight(s)+1, getRefHeightOfCollection(s)) } func TestSetModifyAfterRead(t *testing.T) { smallTestChunks() defer normalProductionChunks() assert := assert.New(t) vs := newTestValueStore() set := getTestNativeOrderSet(2, vs).toSet(vs) // Drop chunk values. set = vs.ReadValue(vs.WriteValue(set).TargetHash()).(Set) // Modify/query. Once upon a time this would crash. fst := set.First() set = set.Edit().Remove(fst).Set() assert.False(set.Has(fst)) assert.True(set.Has(set.First())) set = set.Edit().Insert(fst).Set() assert.True(set.Has(fst)) } func TestSetTypeAfterMutations(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() test := func(n int, c interface{}) { vs := newTestValueStore() values := generateNumbersAsValues(n) s := NewSet(vs, values...) assert.Equal(s.Len(), uint64(n)) assert.IsType(c, s.asSequence()) assert.True(TypeOf(s).Equals(MakeSetType(NumberType))) s = s.Edit().Insert(String("a")).Set() assert.Equal(s.Len(), uint64(n+1)) assert.IsType(c, s.asSequence()) assert.True(TypeOf(s).Equals(MakeSetType(MakeUnionType(NumberType, StringType)))) s = s.Edit().Remove(String("a")).Set() assert.Equal(s.Len(), uint64(n)) assert.IsType(c, s.asSequence()) assert.True(TypeOf(s).Equals(MakeSetType(NumberType))) } test(10, setLeafSequence{}) test(2000, metaSequence{}) } func TestChunkedSetWithValuesOfEveryType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() smallTestChunks() defer normalProductionChunks() vals := []Value{ // Values Bool(true), Number(0), String("hello"), NewBlob(vs, bytes.NewBufferString("buf")), NewSet(vs, Bool(true)), NewList(vs, Bool(true)), NewMap(vs, Bool(true), Number(0)), NewStruct("", StructData{"field": Bool(true)}), // Refs of values NewRef(Bool(true)), NewRef(Number(0)), NewRef(String("hello")), NewRef(NewBlob(vs, bytes.NewBufferString("buf"))), NewRef(NewSet(vs, Bool(true))), NewRef(NewList(vs, Bool(true))), NewRef(NewMap(vs, Bool(true), Number(0))), NewRef(NewStruct("", StructData{"field": Bool(true)})), } s := NewSet(vs, vals...) for i := 1; s.asSequence().isLeaf(); i++ { v := Number(i) vals = append(vals, v) s = s.Edit().Insert(v).Set() } assert.Equal(len(vals), int(s.Len())) assert.True(bool(s.First().(Bool))) for _, v := range vals { assert.True(s.Has(v)) } for len(vals) > 0 { v := vals[0] vals = vals[1:] s = s.Edit().Remove(v).Set() assert.False(s.Has(v)) assert.Equal(len(vals), int(s.Len())) } } func TestSetRemoveLastWhenNotLoaded(t *testing.T) { assert := assert.New(t) smallTestChunks() defer normalProductionChunks() vs := newTestValueStore() reload := func(s Set) Set { return vs.ReadValue(vs.WriteValue(s).TargetHash()).(Set) } ts := getTestNativeOrderSet(8, vs) ns := ts.toSet(vs) for len(ts) > 0 { last := ts[len(ts)-1] ts = ts[:len(ts)-1] ns = reload(ns.Edit().Remove(last).Set()) assert.True(ts.toSet(vs).Equals(ns)) } } func TestSetAt(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() values := []Value{Bool(false), Number(42), String("a"), String("b"), String("c")} s := NewSet(vs, values...) for i, v := range values { assert.Equal(v, s.At(uint64(i))) } assert.Panics(func() { s.At(42) }) } func TestSetWithStructShouldHaveOptionalFields(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() list := NewSet(vs, NewStruct("Foo", StructData{ "a": Number(1), }), NewStruct("Foo", StructData{ "a": Number(2), "b": String("bar"), }), ) assert.True( MakeSetType(MakeStructType("Foo", StructField{"a", NumberType, false}, StructField{"b", StringType, true}, ), ).Equals(TypeOf(list))) } func TestSetWithNil(t *testing.T) { vs := newTestValueStore() assert.Panics(t, func() { NewSet(vs, nil) }) assert.Panics(t, func() { NewSet(vs, Number(42), nil) }) } ================================================ FILE: go/types/simplify.go ================================================ package types import ( "sort" "github.com/attic-labs/noms/go/d" ) // simplifyType returns a type that is a super type of the input type but is // much smaller and less complex than a straight union of all those types would // be. // // The resulting type is guaranteed to: // a. be a super type of the input type // b. have all unions flattened (no union inside a union) // c. have all unions folded, which means the union // 1. have at most one element each of kind Ref, Set, List, and Map // 2. have at most one struct element with a given name // e. all named unions are pointing at the same simplified struct, which means // that all named unions with the same name form cycles. // f. all cycle type that can be resolved have been resolved. // g. all types reachable from it also fulfill b-f // // The union folding is created roughly as follows: // // - The input types are deduplicated // - Any unions in the input set are "flattened" into the input set // - The inputs are grouped into categories: // - ref // - list // - set // - map // - struct, by name (each unique struct name will have its own group) // - The ref, set, and list groups are collapsed like so: // {Ref,Ref,...} -> Ref // - The map group is collapsed like so: // {Map|Map...} -> Map // - Each struct group is collapsed like so: // {struct{foo:number,bar:string}, struct{bar:blob, baz:bool}} -> // struct{foo?:number,bar:string|blob,baz?:bool} // // All the above rules are applied recursively. func simplifyType(t *Type, intersectStructs bool) *Type { if t.Desc.isSimplifiedForSure() { return t } // 1. Clone tree because we are going to mutate it // 1.1 Replace all named structs and cycle types with a single `struct Name {}` // 2. When a union type is found change its elemTypes as needed // 2.1 Merge unnamed structs // 3. Update the fields of all named structs namedStructs := map[string]structInfo{} clone := cloneTypeTreeAndReplaceNamedStructs(t, namedStructs) folded := foldUnions(clone, typeset{}, intersectStructs) for name, info := range namedStructs { if len(info.sources) == 0 { d.PanicIfTrue(name == "") info.instance.Desc = CycleDesc(name) } else { fields := foldStructTypesFieldsOnly(name, info.sources, typeset{}, intersectStructs) info.instance.Desc = StructDesc{name, fields} } } return folded } // typeset is a helper that aggregates the unique set of input types for this algorithm, flattening // any unions recursively. type typeset map[*Type]struct{} func (ts typeset) add(t *Type) { switch t.TargetKind() { case UnionKind: for _, et := range t.Desc.(CompoundDesc).ElemTypes { ts.add(et) } default: ts[t] = struct{}{} } } func (ts typeset) has(t *Type) bool { _, ok := ts[t] return ok } type structInfo struct { instance *Type sources typeset } func cloneTypeTreeAndReplaceNamedStructs(t *Type, namedStructs map[string]structInfo) *Type { getNamedStruct := func(name string, t *Type) *Type { record := namedStructs[name] if t.TargetKind() == StructKind { record.sources.add(t) } return record.instance } ensureInstance := func(name string) { if _, ok := namedStructs[name]; !ok { instance := newType(StructDesc{Name: name}) namedStructs[name] = structInfo{instance, typeset{}} } } seenStructs := typeset{} var rec func(t *Type) *Type rec = func(t *Type) *Type { kind := t.TargetKind() switch kind { case BoolKind, NumberKind, StringKind, BlobKind, ValueKind, TypeKind: return t case ListKind, MapKind, RefKind, SetKind, UnionKind: elemTypes := make(typeSlice, len(t.Desc.(CompoundDesc).ElemTypes)) for i, et := range t.Desc.(CompoundDesc).ElemTypes { elemTypes[i] = rec(et) } return newType(CompoundDesc{kind, elemTypes}) case StructKind: desc := t.Desc.(StructDesc) name := desc.Name if name != "" { ensureInstance(name) if seenStructs.has(t) { return namedStructs[name].instance } } else if seenStructs.has(t) { // It is OK to use the same unnamed struct type in multiple places. // Do not clone it again. return t } seenStructs.add(t) fields := make(structTypeFields, len(desc.fields)) for i, f := range desc.fields { fields[i] = StructField{f.Name, rec(f.Type), f.Optional} } newStruct := newType(StructDesc{name, fields}) if name == "" { return newStruct } return getNamedStruct(name, newStruct) case CycleKind: name := string(t.Desc.(CycleDesc)) d.PanicIfTrue(name == "") ensureInstance(name) return getNamedStruct(name, t) default: panic("Unknown noms kind") } } return rec(t) } func foldUnions(t *Type, seenStructs typeset, intersectStructs bool) *Type { kind := t.TargetKind() switch kind { case BoolKind, NumberKind, StringKind, BlobKind, ValueKind, TypeKind, CycleKind: break case ListKind, MapKind, RefKind, SetKind: elemTypes := t.Desc.(CompoundDesc).ElemTypes for i, et := range elemTypes { elemTypes[i] = foldUnions(et, seenStructs, intersectStructs) } case StructKind: if seenStructs.has(t) { return t } seenStructs.add(t) fields := t.Desc.(StructDesc).fields for i, f := range fields { fields[i].Type = foldUnions(f.Type, seenStructs, intersectStructs) } case UnionKind: elemTypes := t.Desc.(CompoundDesc).ElemTypes if len(elemTypes) == 0 { break } ts := make(typeset, len(elemTypes)) for _, t := range elemTypes { ts.add(t) } if len(ts) == 0 { t.Desc = CompoundDesc{UnionKind, nil} return t } return foldUnionImpl(ts, seenStructs, intersectStructs) default: panic("Unknown noms kind") } return t } func foldUnionImpl(ts typeset, seenStructs typeset, intersectStructs bool) *Type { type how struct { k NomsKind n string } out := make(typeSlice, 0, len(ts)) groups := map[how]typeset{} for t := range ts { var h how switch t.TargetKind() { case RefKind, SetKind, ListKind, MapKind: h = how{k: t.TargetKind()} case StructKind: h = how{k: t.TargetKind(), n: t.Desc.(StructDesc).Name} default: out = append(out, t) continue } g := groups[h] if g == nil { g = typeset{} groups[h] = g } g.add(t) } for h, ts := range groups { if len(ts) == 1 { for t := range ts { out = append(out, t) } continue } var r *Type switch h.k { case ListKind, RefKind, SetKind: r = foldCompoundTypesForUnion(h.k, ts, seenStructs, intersectStructs) case MapKind: r = foldMapTypesForUnion(ts, seenStructs, intersectStructs) case StructKind: r = foldStructTypes(h.n, ts, seenStructs, intersectStructs) } out = append(out, r) } for i, t := range out { out[i] = foldUnions(t, seenStructs, intersectStructs) } if len(out) == 1 { return out[0] } sort.Sort(out) return newType(CompoundDesc{UnionKind, out}) } func foldCompoundTypesForUnion(k NomsKind, ts, seenStructs typeset, intersectStructs bool) *Type { elemTypes := make(typeset, len(ts)) for t := range ts { d.PanicIfFalse(t.TargetKind() == k) elemTypes.add(t.Desc.(CompoundDesc).ElemTypes[0]) } elemType := foldUnionImpl(elemTypes, seenStructs, intersectStructs) return makeCompoundType(k, elemType) } func foldMapTypesForUnion(ts, seenStructs typeset, intersectStructs bool) *Type { keyTypes := make(typeset, len(ts)) valTypes := make(typeset, len(ts)) for t := range ts { d.PanicIfFalse(t.TargetKind() == MapKind) elemTypes := t.Desc.(CompoundDesc).ElemTypes keyTypes.add(elemTypes[0]) valTypes.add(elemTypes[1]) } kt := foldUnionImpl(keyTypes, seenStructs, intersectStructs) vt := foldUnionImpl(valTypes, seenStructs, intersectStructs) return makeCompoundType(MapKind, kt, vt) } func foldStructTypesFieldsOnly(name string, ts, seenStructs typeset, intersectStructs bool) structTypeFields { fieldset := make([]structTypeFields, len(ts)) i := 0 for t := range ts { desc := t.Desc.(StructDesc) d.PanicIfFalse(desc.Name == name) fieldset[i] = desc.fields i++ } return simplifyStructFields(fieldset, seenStructs, intersectStructs) } func foldStructTypes(name string, ts, seenStructs typeset, intersectStructs bool) *Type { fields := foldStructTypesFieldsOnly(name, ts, seenStructs, intersectStructs) return newType(StructDesc{name, fields}) } func simplifyStructFields(in []structTypeFields, seenStructs typeset, intersectStructs bool) structTypeFields { // We gather all the fields/types into allFields. If the number of // times a field name is present is less that then number of types we // are simplifying then the field must be optional. // If we see an optional field we do not increment the count for it and // it will be treated as optional in the end. // If intersectStructs is true we need to pick the more restrictive version (n: T over n?: T). type fieldTypeInfo struct { anyNonOptional bool count int ts typeSlice } allFields := map[string]fieldTypeInfo{} for _, ff := range in { for _, f := range ff { fti, ok := allFields[f.Name] if !ok { fti = fieldTypeInfo{ ts: make(typeSlice, 0, len(in)), } } fti.ts = append(fti.ts, f.Type) if !f.Optional { fti.count++ fti.anyNonOptional = true } allFields[f.Name] = fti } } count := len(in) fields := make(structTypeFields, len(allFields)) i := 0 for name, fti := range allFields { nt := makeUnionType(fti.ts...) fields[i] = StructField{ Name: name, Type: foldUnions(nt, seenStructs, intersectStructs), Optional: !(intersectStructs && fti.anyNonOptional) && fti.count < count, } i++ } sort.Sort(fields) return fields } ================================================ FILE: go/types/simplify_test.go ================================================ package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestSimplifyStructFields(t *testing.T) { assert := assert.New(t) test := func(in []structTypeFields, exp structTypeFields) { // simplifier := newSimplifier(false) act := simplifyStructFields(in, typeset{}, false) assert.Equal(act, exp) } test([]structTypeFields{ { StructField{"a", BoolType, false}, }, { StructField{"a", BoolType, false}, }, }, structTypeFields{ StructField{"a", BoolType, false}, }, ) test([]structTypeFields{ { StructField{"a", BoolType, false}, }, { StructField{"b", BoolType, false}, }, }, structTypeFields{ StructField{"a", BoolType, true}, StructField{"b", BoolType, true}, }, ) test([]structTypeFields{ { StructField{"a", BoolType, false}, }, { StructField{"a", BoolType, true}, }, }, structTypeFields{ StructField{"a", BoolType, true}, }, ) } func TestSimplifyType(t *testing.T) { assert := assert.New(t) run := func(intersectStructs bool) { test := func(in, exp *Type) { act := simplifyType(in, intersectStructs) assert.True(exp.Equals(act), "Expected: %s\nActual: %s", exp.Describe(), act.Describe()) } testSame := func(t *Type) { test(t, t) } testSame(BlobType) testSame(BoolType) testSame(NumberType) testSame(StringType) testSame(TypeType) testSame(ValueType) testSame(makeCompoundType(ListKind, BoolType)) testSame(makeCompoundType(SetKind, BoolType)) testSame(makeCompoundType(RefKind, BoolType)) testSame(makeCompoundType(MapKind, BoolType, NumberType)) { // Cannot do equals on cycle types in := MakeCycleType("ABC") act := simplifyType(in, intersectStructs) assert.Equal(in, act) } test(makeUnionType(BoolType), BoolType) test(makeUnionType(BoolType, BoolType), BoolType) testSame(makeUnionType(BoolType, NumberType)) test(makeUnionType(NumberType, BoolType), makeUnionType(BoolType, NumberType)) test(makeUnionType(NumberType, BoolType), makeUnionType(BoolType, NumberType)) testSame(makeCompoundType(ListKind, makeUnionType(BoolType, NumberType))) test(makeCompoundType(ListKind, makeUnionType(BoolType)), makeCompoundType(ListKind, BoolType)) test(makeCompoundType(ListKind, makeUnionType(BoolType, BoolType)), makeCompoundType(ListKind, BoolType)) testSame(makeStructType("", nil)) testSame(makeStructType("", structTypeFields{})) testSame(makeStructType("", structTypeFields{ StructField{"b", BoolType, false}, StructField{"s", StringType, !intersectStructs}, })) test( makeStructType("", structTypeFields{ StructField{"a", BoolType, false}, StructField{"b", makeUnionType(NumberType, NumberType), false}, }), makeStructType("", structTypeFields{ StructField{"a", BoolType, false}, StructField{"b", NumberType, false}, }), ) // non named structs do not create cycles. testSame(makeStructType("", structTypeFields{ StructField{"b", BoolType, false}, StructField{ "s", makeStructType("", structTypeFields{ StructField{"c", StringType, false}, }), !intersectStructs, }, })) // merge non named structs in unions test( makeCompoundType( UnionKind, makeStructType("", structTypeFields{ StructField{"a", BoolType, false}, }), makeStructType("", structTypeFields{ StructField{"b", BoolType, false}, }), ), makeStructType("", structTypeFields{ StructField{"a", BoolType, !intersectStructs}, StructField{"b", BoolType, !intersectStructs}, }), ) // List | List -> List for _, k := range []NomsKind{ListKind, SetKind, RefKind} { test( makeCompoundType( UnionKind, makeCompoundType(k, NumberType), makeCompoundType(k, BoolType), ), makeCompoundType(k, makeUnionType(BoolType, NumberType), ), ) } // Map | List -> List test( makeCompoundType( UnionKind, makeCompoundType(MapKind, NumberType, NumberType), makeCompoundType(MapKind, BoolType, NumberType), ), makeCompoundType(MapKind, makeUnionType(BoolType, NumberType), NumberType, ), ) // Map | List -> List test( makeCompoundType( UnionKind, makeCompoundType(MapKind, NumberType, NumberType), makeCompoundType(MapKind, NumberType, BoolType), ), makeCompoundType(MapKind, NumberType, makeUnionType(BoolType, NumberType), ), ) // union flattening test( makeUnionType(NumberType, makeUnionType(NumberType, BoolType)), makeUnionType(BoolType, NumberType), ) { // Cannot do equals on cycle types in := makeUnionType(MakeCycleType("A"), MakeCycleType("A")) exp := MakeCycleType("A") act := simplifyType(in, intersectStructs) assert.Equal(exp, act) } { // Cannot do equals on cycle types in := makeCompoundType(UnionKind, makeCompoundType(ListKind, MakeCycleType("A")), makeCompoundType(ListKind, MakeCycleType("A"))) exp := makeCompoundType(ListKind, MakeCycleType("A")) act := simplifyType(in, intersectStructs) assert.Equal(exp, act, "Expected: %s\nActual: %s", exp.Describe(), act.Describe()) } testSame(makeStructType("A", nil)) testSame(makeStructType("A", structTypeFields{})) testSame(makeStructType("A", structTypeFields{ StructField{"a", BoolType, !intersectStructs}, })) test( makeStructType("A", structTypeFields{ StructField{"a", makeUnionType(BoolType, BoolType, NumberType), false}, }), makeStructType("A", structTypeFields{ StructField{"a", makeUnionType(BoolType, NumberType), false}, }), ) testSame( makeStructType("A", structTypeFields{ StructField{ "a", makeStructType("B", structTypeFields{ StructField{"b", BoolType, !intersectStructs}, }), false, }, }), ) { // Create pointer cycle manually. exp := makeStructType("A", structTypeFields{ StructField{ "a", BoolType, // placeholder !intersectStructs, }, }) exp.Desc.(StructDesc).fields[0].Type = exp test( makeStructType("A", structTypeFields{ StructField{ "a", makeStructType("A", structTypeFields{}), false, }, }), exp, ) } { a := makeStructType("S", structTypeFields{}) exp := makeCompoundType(MapKind, a, a) test( makeCompoundType(MapKind, makeStructType("S", structTypeFields{}), makeStructType("S", structTypeFields{}), ), exp, ) } { a := makeStructType("S", structTypeFields{ StructField{"a", BoolType, !intersectStructs}, StructField{"b", makeUnionType(BoolType, StringType), false}, }) exp := makeCompoundType(MapKind, a, a) test( makeCompoundType(MapKind, makeStructType("S", structTypeFields{ StructField{"a", BoolType, false}, StructField{"b", StringType, false}, }), makeStructType("S", structTypeFields{ StructField{"b", BoolType, false}, }), ), exp, ) } // Non named do not get merged outside unions testSame( makeCompoundType(MapKind, makeStructType("", structTypeFields{ StructField{"a", BoolType, false}, StructField{"b", StringType, false}, }), makeStructType("", structTypeFields{ StructField{"b", BoolType, false}, }), ), ) // Cycle in union { a := makeStructType("A", structTypeFields{ StructField{ "a", BoolType, // placeholder !intersectStructs, }, }) a.Desc.(StructDesc).fields[0].Type = a exp := makeUnionType(NumberType, a, TypeType) test( makeCompoundType(UnionKind, makeStructType("A", structTypeFields{ StructField{ "a", makeStructType("A", structTypeFields{}), false, }, }), NumberType, TypeType, ), exp, ) } test( makeCompoundType(RefKind, makeCompoundType(UnionKind, makeCompoundType(ListKind, BoolType, ), makeCompoundType(SetKind, makeUnionType(StringType, NumberType), ), ), ), makeCompoundType(RefKind, makeCompoundType(UnionKind, makeCompoundType(ListKind, BoolType, ), makeCompoundType(SetKind, makeUnionType(NumberType, StringType), ), ), ), ) } t.Run("Union", func(*testing.T) { run(false) }) t.Run("IntersectStructs", func(*testing.T) { run(true) }) } ================================================ FILE: go/types/string.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "encoding/binary" "github.com/attic-labs/noms/go/hash" ) // String is a Noms Value wrapper around the primitive string type. type String string // Value interface func (s String) Value() Value { return s } func (s String) Equals(other Value) bool { return s == other } func (s String) Less(other Value) bool { if s2, ok := other.(String); ok { return s < s2 } return StringKind < other.Kind() } func (s String) Hash() hash.Hash { return getHash(s) } func (s String) WalkValues(cb ValueCallback) { } func (s String) WalkRefs(cb RefCallback) { } func (s String) typeOf() *Type { return StringType } func (s String) Kind() NomsKind { return StringKind } func (s String) valueReadWriter() ValueReadWriter { return nil } func (s String) writeTo(w nomsWriter) { StringKind.writeTo(w) w.writeString(string(s)) } func (s String) valueBytes() []byte { // We know the size of the buffer here so allocate it once. // StringKind, Length (UVarint), UTF-8 encoded string buff := make([]byte, 1+binary.MaxVarintLen64+len(s)) w := binaryNomsWriter{buff, 0} s.writeTo(&w) return buff[:w.offset] } ================================================ FILE: go/types/string_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestStringEquals(t *testing.T) { assert := assert.New(t) s1 := String("foo") s2 := String("foo") s3 := s2 s4 := String("bar") assert.True(s1.Equals(s2)) assert.True(s2.Equals(s1)) assert.True(s1.Equals(s3)) assert.True(s3.Equals(s1)) assert.False(s1.Equals(s4)) assert.False(s4.Equals(s1)) } func TestStringString(t *testing.T) { assert := assert.New(t) s1 := String("") s2 := String("foo") assert.Equal("", string(s1)) assert.Equal("foo", string(s2)) } func TestStringType(t *testing.T) { assert.True(t, TypeOf(String("hi")).Equals(StringType)) } ================================================ FILE: go/types/struct.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "fmt" "regexp" "sort" "strings" "github.com/attic-labs/noms/go/d" ) var EmptyStructType = MakeStructType("") var EmptyStruct = newStruct("", nil, nil) type StructData map[string]Value type Struct struct { valueImpl } // readStruct reads the data provided by a decoder and moves the decoder forward. func readStruct(dec *valueDecoder) Struct { start := dec.pos() skipStruct(dec) end := dec.pos() return Struct{valueImpl{dec.vrw, dec.byteSlice(start, end), nil}} } func skipStruct(dec *valueDecoder) { dec.skipKind() dec.skipString() // name count := dec.readCount() for i := uint64(0); i < count; i++ { dec.skipString() dec.skipValue() } } func isStructSameTypeForSure(dec *valueDecoder, t *Type) bool { desc := t.Desc.(StructDesc) dec.skipKind() if !dec.isStringSame(desc.Name) { return false } count := dec.readCount() if count != uint64(len(desc.fields)) { return false } for i := uint64(0); i < count; i++ { if desc.fields[i].Optional { return false } if !dec.isStringSame(desc.fields[i].Name) { return false } if !dec.isValueSameTypeForSure(desc.fields[i].Type) { return false } } return true } func walkStruct(r *refWalker, cb RefCallback) { r.skipKind() r.skipString() // name count := r.readCount() for i := uint64(0); i < count; i++ { r.skipString() r.walkValue(cb) } } func newStruct(name string, fieldNames []string, values []Value) Struct { var vrw ValueReadWriter w := newBinaryNomsWriter() StructKind.writeTo(&w) w.writeString(name) w.writeCount(uint64(len(fieldNames))) for i := 0; i < len(fieldNames); i++ { w.writeString(fieldNames[i]) if vrw == nil { vrw = values[i].(valueReadWriter).valueReadWriter() } values[i].writeTo(&w) } return Struct{valueImpl{vrw, w.data(), nil}} } func NewStruct(name string, data StructData) Struct { verifyStructName(name) fieldNames := make([]string, len(data)) values := make([]Value, len(data)) i := 0 for name := range data { verifyFieldName(name) fieldNames[i] = name i++ } sort.Sort(sort.StringSlice(fieldNames)) for i = 0; i < len(fieldNames); i++ { values[i] = data[fieldNames[i]] } return newStruct(name, fieldNames, values) } // StructTemplate allows creating a template for structs with a known shape // (name and fields). If a lot of structs of the same shape are being created // then using a StructTemplate makes that slightly more efficient. type StructTemplate struct { name string fieldNames []string } // MakeStructTemplate creates a new StructTemplate or panics if the name and // fields are not valid. func MakeStructTemplate(name string, fieldNames []string) (t StructTemplate) { t = StructTemplate{name, fieldNames} verifyStructName(name) if len(fieldNames) == 0 { return } verifyFieldName(fieldNames[0]) for i := 1; i < len(fieldNames); i++ { verifyFieldName(fieldNames[i]) d.PanicIfFalse(fieldNames[i] > fieldNames[i-1]) } return } // NewStruct creates a new Struct from the StructTemplate. The order of the // values must match the order of the field names of the StructTemplate. func (st StructTemplate) NewStruct(values []Value) Struct { d.PanicIfFalse(len(st.fieldNames) == len(values)) return newStruct(st.name, st.fieldNames, values) } func (s Struct) Empty() bool { return s.Len() == 0 } // Value interface func (s Struct) Value() Value { return s } func (s Struct) WalkValues(cb ValueCallback) { dec, count := s.decoderSkipToFields() for i := uint64(0); i < count; i++ { dec.skipString() cb(dec.readValue()) } } func (s Struct) typeOf() *Type { dec := s.decoder() return readStructTypeOfValue(&dec) } func readStructTypeOfValue(dec *valueDecoder) *Type { dec.skipKind() name := dec.readString() count := dec.readCount() typeFields := make(structTypeFields, count) for i := uint64(0); i < count; i++ { typeFields[i] = StructField{ Name: dec.readString(), Optional: false, Type: dec.readTypeOfValue(), } } return makeStructTypeQuickly(name, typeFields) } func (s Struct) decoderSkipToFields() (valueDecoder, uint64) { dec := s.decoder() dec.skipKind() dec.skipString() count := dec.readCount() return dec, count } // Len is the number of fields in the struct. func (s Struct) Len() int { _, count := s.decoderSkipToFields() return int(count) } // Name is the name of the struct. func (s Struct) Name() string { dec := s.decoder() dec.skipKind() return dec.readString() } // IterFields iterates over the fields, calling cb for every field in the // struct. func (s Struct) IterFields(cb func(name string, value Value) (stop bool)) { dec, count := s.decoderSkipToFields() for i := uint64(0); i < count; i++ { if cb(dec.readString(), dec.readValue()) { break } } } type structPartCallbacks interface { name(n string) count(c uint64) fieldName(n string) fieldValue(v Value) end() } func (s Struct) iterParts(cbs structPartCallbacks) { dec := s.decoder() dec.skipKind() cbs.name(dec.readString()) count := dec.readCount() cbs.count(count) for i := uint64(0); i < count; i++ { cbs.fieldName(dec.readString()) cbs.fieldValue(dec.readValue()) } cbs.end() } // MaybeGet returns the value of a field in the struct. If the struct does not a have a field with // the name name then this returns (nil, false). func (s Struct) MaybeGet(n string) (v Value, found bool) { dec, count := s.decoderSkipToFields() for i := uint64(0); i < count; i++ { name := dec.readString() if name == n { found = true v = dec.readValue() return } if name > n { return } dec.skipValue() } return } // Get returns the value of a field in the struct. If the struct does not a have a field with the // name name then this panics. func (s Struct) Get(n string) Value { v, ok := s.MaybeGet(n) if !ok { d.Chk.Fail(fmt.Sprintf(`Struct has no field "%s"`, n)) } return v } // Set returns a new struct where the field name has been set to value. If name is not an // existing field in the struct or the type of value is different from the old value of the // struct field a new struct type is created. func (s Struct) Set(n string, v Value) Struct { verifyFieldName(n) prolog, head, tail, count, found := s.splitFieldsAt(n) w := binaryNomsWriter{make([]byte, len(s.buff)), 0} w.writeRaw(prolog) if !found { count++ } w.writeCount(count) w.writeRaw(head) w.writeString(n) v.writeTo(&w) w.writeRaw(tail) return Struct{valueImpl{s.vrw, w.data(), nil}} } func (s Struct) SetName(name string) Struct { verifyStructName(name) w := binaryNomsWriter{make([]byte, len(s.buff)), 0} StructKind.writeTo(&w) w.writeString(name) dec := s.decoder() dec.skipKind() dec.skipString() w.writeRaw(dec.buff[dec.offset:]) return Struct{valueImpl{s.vrw, w.data(), nil}} } // splitFieldsAt splits the buffer into two parts. The fields coming before the field we are looking for // and the fields coming after it. func (s Struct) splitFieldsAt(name string) (prolog, head, tail []byte, count uint64, found bool) { dec := s.decoder() dec.skipKind() dec.skipString() prolog = dec.buff[:dec.offset] count = dec.readCount() fieldsOffset := dec.offset for i := uint64(0); i < count; i++ { beforeCurrent := dec.offset fn := dec.readString() dec.skipValue() if fn == name { found = true head = dec.buff[fieldsOffset:beforeCurrent] tail = dec.buff[dec.offset:len(dec.buff)] break } if name < fn { head = dec.buff[fieldsOffset:beforeCurrent] tail = dec.buff[beforeCurrent:len(dec.buff)] break } } if head == nil && tail == nil { head = dec.buff[fieldsOffset:dec.offset] } return } // Delete returns a new struct where the field name has been removed. // If name is not an existing field in the struct then the current struct is returned. func (s Struct) Delete(n string) Struct { prolog, head, tail, count, found := s.splitFieldsAt(n) if !found { return s } w := binaryNomsWriter{make([]byte, len(s.buff)), 0} w.writeRaw(prolog) w.writeCount(count - 1) w.writeRaw(head) w.writeRaw(tail) return Struct{valueImpl{s.vrw, w.data(), nil}} } func (s Struct) Diff(last Struct, changes chan<- ValueChanged, closeChan <-chan struct{}) { if s.Equals(last) { return } dec1, dec2 := s.decoder(), last.decoder() dec1.skipKind() dec2.skipKind() dec1.skipString() // Ignore names dec2.skipString() count1, count2 := dec1.readCount(), dec2.readCount() i1, i2 := uint64(0), uint64(0) var fn1, fn2 string for i1 < count1 && i2 < count2 { if fn1 == "" { fn1 = dec1.readString() } if fn2 == "" { fn2 = dec2.readString() } var change ValueChanged if fn1 == fn2 { v1, v2 := dec1.readValue(), dec2.readValue() if !v1.Equals(v2) { change = ValueChanged{DiffChangeModified, String(fn1), v2, v1} } i1++ i2++ fn1, fn2 = "", "" } else if fn1 < fn2 { v1 := dec1.readValue() change = ValueChanged{DiffChangeAdded, String(fn1), nil, v1} i1++ fn1 = "" } else { v2 := dec2.readValue() change = ValueChanged{DiffChangeRemoved, String(fn2), v2, nil} i2++ fn2 = "" } if change != (ValueChanged{}) && !sendChange(changes, closeChan, change) { return } } for ; i1 < count1; i1++ { if fn1 == "" { fn1 = dec1.readString() } v1 := dec1.readValue() if !sendChange(changes, closeChan, ValueChanged{DiffChangeAdded, String(fn1), nil, v1}) { return } } for ; i2 < count2; i2++ { if fn2 == "" { fn2 = dec2.readString() } v2 := dec2.readValue() if !sendChange(changes, closeChan, ValueChanged{DiffChangeRemoved, String(fn2), v2, nil}) { return } } } var escapeChar = "Q" var headFieldNamePattern = regexp.MustCompile("[a-zA-Z]") var tailFieldNamePattern = regexp.MustCompile("[a-zA-Z0-9_]") var spaceRegex = regexp.MustCompile("[ ]") var escapeRegex = regexp.MustCompile(escapeChar) var fieldNameComponentRe = regexp.MustCompile("^" + headFieldNamePattern.String() + tailFieldNamePattern.String() + "*") var fieldNameRe = regexp.MustCompile(fieldNameComponentRe.String() + "$") type encodingFunc func(string, *regexp.Regexp) string func CamelCaseFieldName(input string) string { //strip invalid struct characters and leave spaces encode := func(s1 string, p *regexp.Regexp) string { if p.MatchString(s1) || spaceRegex.MatchString(s1) { return s1 } return "" } strippedField := escapeField(input, encode) splitField := strings.Fields(strippedField) if len(splitField) == 0 { return "" } //Camelcase field output := strings.ToLower(splitField[0]) if len(splitField) > 1 { for _, field := range splitField[1:] { output += strings.Title(strings.ToLower(field)) } } //Because we are removing characters, we may generate an invalid field name //i.e. -- 1A B, we will remove the first bad chars and process until 1aB //1aB is invalid struct field name so we will return "" if !IsValidStructFieldName(output) { return "" } return output } func escapeField(input string, encode encodingFunc) string { output := "" pattern := headFieldNamePattern for _, ch := range input { output += encode(string([]rune{ch}), pattern) pattern = tailFieldNamePattern } return output } // EscapeStructField escapes names for use as noms structs with regards to non CSV imported data. // Disallowed characters are encoded as 'Q'. // Note that Q itself is also escaped since it is the escape character. func EscapeStructField(input string) string { if !escapeRegex.MatchString(input) && IsValidStructFieldName(input) { return input } encode := func(s1 string, p *regexp.Regexp) string { if p.MatchString(s1) && s1 != escapeChar { return s1 } var hs = fmt.Sprintf("%X", s1) var buf bytes.Buffer buf.WriteString(escapeChar) if len(hs) == 1 { buf.WriteString("0") } buf.WriteString(hs) return buf.String() } return escapeField(input, encode) } // IsValidStructFieldName returns whether the name is valid as a field name in a struct. // Valid names must start with `a-zA-Z` and after that `a-zA-Z0-9_`. func IsValidStructFieldName(name string) bool { for i, c := range name { if i == 0 { if !isAlpha(c) { return false } } else if !isAlphaNumOrUnderscore(c) { return false } } return len(name) != 0 } func isAlpha(c rune) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' } func isAlphaNumOrUnderscore(c rune) bool { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' } func verifyFields(fs structTypeFields) { for i, f := range fs { verifyFieldName(f.Name) if i > 0 && strings.Compare(fs[i-1].Name, f.Name) >= 0 { d.Chk.Fail("Field names must be unique and ordered alphabetically") } } } func verifyName(name, kind string) { if !IsValidStructFieldName(name) { d.Panic(`Invalid struct%s name: "%s"`, kind, name) } } func verifyFieldName(name string) { verifyName(name, " field") } func verifyStructName(name string) { if name != "" { verifyName(name, "") } } ================================================ FILE: go/types/struct_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func getChunks(v Value) (chunks []Ref) { v.WalkRefs(func(r Ref) { chunks = append(chunks, r) }) return } func TestGenericStructEquals(t *testing.T) { assert := assert.New(t) s1 := NewStruct("S1", StructData{"s": String("hi"), "x": Bool(true)}) s2 := NewStruct("S1", StructData{"s": String("hi"), "x": Bool(true)}) assert.True(s1.Equals(s2)) assert.True(s2.Equals(s1)) } func TestGenericStructChunks(t *testing.T) { assert := assert.New(t) b := Bool(true) s1 := NewStruct("S1", StructData{"r": NewRef(b)}) assert.Len(getChunks(s1), 1) assert.Equal(Bool(true).Hash(), getChunks(s1)[0].TargetHash()) } func TestGenericStructNew(t *testing.T) { assert := assert.New(t) s := NewStruct("S2", StructData{"b": Bool(true), "o": String("hi")}) assert.True(s.Get("b").Equals(Bool(true))) _, ok := s.MaybeGet("missing") assert.False(ok) s2 := NewStruct("S2", StructData{"b": Bool(false), "o": String("hi")}) assert.True(s2.Get("b").Equals(Bool(false))) o, ok := s2.MaybeGet("o") assert.True(ok) assert.True(String("hi").Equals(o)) } func TestGenericStructSet(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() s := NewStruct("S3", StructData{"b": Bool(true), "o": String("hi")}) s2 := s.Set("b", Bool(false)) s3 := s2.Set("b", Bool(true)) assert.True(s.Equals(s3)) // Changes the type s4 := s.Set("b", Number(42)) assert.True(MakeStructType("S3", StructField{"b", NumberType, false}, StructField{"o", StringType, false}, ).Equals(TypeOf(s4))) // Adds a new field s5 := s.Set("x", Number(42)) assert.True(MakeStructType("S3", StructField{"b", BoolType, false}, StructField{"o", StringType, false}, StructField{"x", NumberType, false}, ).Equals(TypeOf(s5))) // Subtype is not equal. s6 := NewStruct("", StructData{"l": NewList(vs, Number(0), Number(1), Bool(false), Bool(true))}) s7 := s6.Set("l", NewList(vs, Number(2), Number(3))) t7 := MakeStructTypeFromFields("", FieldMap{ "l": MakeListType(NumberType), }) assert.True(t7.Equals(TypeOf(s7))) s8 := NewStruct("S", StructData{"a": Bool(true), "c": Bool(true)}) s9 := s8.Set("b", Bool(true)) assert.True(s9.Equals(NewStruct("S", StructData{"a": Bool(true), "b": Bool(true), "c": Bool(true)}))) } func TestGenericStructDelete(t *testing.T) { assert := assert.New(t) s1 := NewStruct("S", StructData{"b": Bool(true), "o": String("hi")}) s2 := s1.Delete("notThere") assert.True(s1.Equals(s2)) s3 := s1.Delete("o") s4 := NewStruct("S", StructData{"b": Bool(true)}) assert.True(s3.Equals(s4)) s5 := s3.Delete("b") s6 := NewStruct("S", StructData{}) assert.True(s5.Equals(s6)) } func assertValueChangeEqual(assert *assert.Assertions, c1, c2 ValueChanged) { assert.Equal(c1.ChangeType, c2.ChangeType) assert.Equal(EncodedValue(c1.Key), EncodedValue(c2.Key)) if c1.NewValue == nil { assert.Nil(c2.NewValue) } else { assert.Equal(EncodedValue(c1.NewValue), EncodedValue(c2.NewValue)) } if c1.OldValue == nil { assert.Nil(c2.OldValue) } else { assert.Equal(EncodedValue(c1.OldValue), EncodedValue(c2.OldValue)) } } func TestStructDiff(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() assertDiff := func(expect []ValueChanged, s1, s2 Struct) { changes := make(chan ValueChanged) go func() { s1.Diff(s2, changes, nil) close(changes) }() i := 0 for change := range changes { assertValueChangeEqual(assert, expect[i], change) i++ } assert.Equal(len(expect), i, "Wrong number of changes") } vc := func(ct DiffChangeType, fieldName string, oldV, newV Value) ValueChanged { return ValueChanged{ct, String(fieldName), oldV, newV} } s1 := NewStruct("", StructData{"a": Bool(true), "b": String("hi"), "c": Number(4)}) assertDiff([]ValueChanged{}, s1, NewStruct("", StructData{"a": Bool(true), "b": String("hi"), "c": Number(4)})) assertDiff([]ValueChanged{vc(DiffChangeModified, "a", Bool(false), Bool(true)), vc(DiffChangeModified, "b", String("bye"), String("hi"))}, s1, NewStruct("", StructData{"a": Bool(false), "b": String("bye"), "c": Number(4)})) assertDiff([]ValueChanged{vc(DiffChangeModified, "b", String("bye"), String("hi")), vc(DiffChangeModified, "c", Number(5), Number(4))}, s1, NewStruct("", StructData{"a": Bool(true), "b": String("bye"), "c": Number(5)})) assertDiff([]ValueChanged{vc(DiffChangeModified, "a", Bool(false), Bool(true)), vc(DiffChangeModified, "c", Number(10), Number(4))}, s1, NewStruct("", StructData{"a": Bool(false), "b": String("hi"), "c": Number(10)})) assertDiff([]ValueChanged{vc(DiffChangeAdded, "a", nil, Bool(true))}, s1, NewStruct("NewType", StructData{"b": String("hi"), "c": Number(4)})) assertDiff([]ValueChanged{vc(DiffChangeAdded, "b", nil, String("hi"))}, s1, NewStruct("NewType", StructData{"a": Bool(true), "c": Number(4)})) assertDiff([]ValueChanged{vc(DiffChangeRemoved, "Z", Number(17), nil)}, s1, NewStruct("NewType", StructData{"Z": Number(17), "a": Bool(true), "b": String("hi"), "c": Number(4)})) assertDiff([]ValueChanged{vc(DiffChangeAdded, "b", nil, String("hi")), vc(DiffChangeRemoved, "d", Number(5), nil)}, s1, NewStruct("NewType", StructData{"a": Bool(true), "c": Number(4), "d": Number(5)})) s2 := NewStruct("", StructData{ "a": NewList(vs, Number(0), Number(1)), "b": NewMap(vs, String("foo"), Bool(false), String("bar"), Bool(true)), "c": NewSet(vs, Number(0), Number(1), String("foo")), }) assertDiff([]ValueChanged{}, s2, NewStruct("", StructData{ "a": NewList(vs, Number(0), Number(1)), "b": NewMap(vs, String("foo"), Bool(false), String("bar"), Bool(true)), "c": NewSet(vs, Number(0), Number(1), String("foo")), })) assertDiff([]ValueChanged{ vc(DiffChangeModified, "a", NewList(vs, Number(1), Number(1)), NewList(vs, Number(0), Number(1))), vc(DiffChangeModified, "b", NewMap(vs, String("foo"), Bool(true), String("bar"), Bool(true)), NewMap(vs, String("foo"), Bool(false), String("bar"), Bool(true))), }, s2, NewStruct("", StructData{ "a": NewList(vs, Number(1), Number(1)), "b": NewMap(vs, String("foo"), Bool(true), String("bar"), Bool(true)), "c": NewSet(vs, Number(0), Number(1), String("foo")), })) assertDiff([]ValueChanged{ vc(DiffChangeModified, "a", NewList(vs, Number(0)), NewList(vs, Number(0), Number(1))), vc(DiffChangeModified, "c", NewSet(vs, Number(0), Number(2), String("foo")), NewSet(vs, Number(0), Number(1), String("foo"))), }, s2, NewStruct("", StructData{ "a": NewList(vs, Number(0)), "b": NewMap(vs, String("foo"), Bool(false), String("bar"), Bool(true)), "c": NewSet(vs, Number(0), Number(2), String("foo")), })) assertDiff([]ValueChanged{ vc(DiffChangeModified, "b", NewMap(vs, String("boo"), Bool(false), String("bar"), Bool(true)), NewMap(vs, String("foo"), Bool(false), String("bar"), Bool(true))), vc(DiffChangeModified, "c", NewSet(vs, Number(0), Number(1), String("bar")), NewSet(vs, Number(0), Number(1), String("foo"))), }, s2, NewStruct("", StructData{ "a": NewList(vs, Number(0), Number(1)), "b": NewMap(vs, String("boo"), Bool(false), String("bar"), Bool(true)), "c": NewSet(vs, Number(0), Number(1), String("bar")), })) } func TestEscStructField(t *testing.T) { assert := assert.New(t) cases := []string{ "a", "a", "AaZz19_", "AaZz19_", "Q", "Q51", "AQ1", "AQ511", "INSPECTIONQ20STATUS", "INSPECTIONQ5120STATUS", "$", "Q24", "_content", "Q5Fcontent", "Few ¢ents Short", "FewQ20QC2A2entsQ20Short", "💩", "QF09F92A9", "https://picasaweb.google.com/data", "httpsQ3AQ2FQ2FpicasawebQ2EgoogleQ2EcomQ2Fdata", } for i := 0; i < len(cases); i += 2 { orig, expected := cases[i], cases[i+1] assert.Equal(expected, EscapeStructField(orig)) } } func TestMakeStructTemplate(t *testing.T) { assert := assert.New(t) assertInvalidStructName := func(n string) { assert.Panics(func() { MakeStructTemplate(n, []string{}) }) } assertInvalidStructName(" ") assertInvalidStructName(" a") assertInvalidStructName("a ") assertInvalidStructName("0") assertInvalidStructName("_") assertInvalidStructName("0a") assertInvalidStructName("_a") assertInvalidStructName("💩") assertValidStructName := func(n string) { template := MakeStructTemplate(n, []string{}) str := template.NewStruct(nil) assert.Equal(n, str.Name()) } assertValidStructName("") assertValidStructName("a") assertValidStructName("A") assertValidStructName("a0") assertValidStructName("a_") assertValidStructName("a0_") assertInvalidFieldName := func(n string) { assert.Panics(func() { MakeStructTemplate("", []string{n}) }) } assertInvalidFieldName("") assertInvalidFieldName(" ") assertInvalidFieldName(" a") assertInvalidFieldName("a ") assertInvalidFieldName("0") assertInvalidFieldName("_") assertInvalidFieldName("0a") assertInvalidFieldName("_a") assertInvalidFieldName("💩") assertValidFieldName := func(n string) { MakeStructTemplate("", []string{n}) } assertValidFieldName("a") assertValidFieldName("A") assertValidFieldName("a0") assertValidFieldName("a_") assertValidFieldName("a0_") assertInvalidFieldOrder := func(n []string) { assert.Panics(func() { MakeStructTemplate("", n) }) } assertInvalidFieldOrder([]string{"a", "a"}) assertInvalidFieldOrder([]string{"b", "a"}) assertInvalidFieldOrder([]string{"a", "c", "b"}) assertValidFieldOrder := func(n []string) { MakeStructTemplate("", n) } assertValidFieldOrder([]string{"a", "b"}) assertValidFieldOrder([]string{"a", "b", "c"}) template := MakeStructTemplate("A", []string{"a", "b"}) str := template.NewStruct([]Value{Number(42), Bool(true)}) assert.True(NewStruct("A", StructData{ "a": Number(42), "b": Bool(true), }).Equals(str)) } func TestStructWithNil(t *testing.T) { assert.Panics(t, func() { NewStruct("A", StructData{ "a": nil, }) }) assert.Panics(t, func() { NewStruct("A", StructData{ "a": Number(42), "b": nil, }) }) } func TestStructIterFields(t *testing.T) { assert := assert.New(t) tstruct := NewStruct("A", StructData{ "a": String("aaa"), "b": String("bbb"), "c": String("ccc"), }) // Iterate over all. i := 0 tstruct.IterFields(func(k string, v Value) bool { assert.True(tstruct.Get(k).Equals(v)) i += 1 return false }) assert.Equal(3, i) // Iterate and stop. i = 0 tstruct.IterFields(func(k string, v Value) bool { if k == "b" { return true } i += 1 return false }) assert.Equal(1, i) } func TestStructSetName(t *testing.T) { assert := assert.New(t) s1 := NewStruct("", StructData{"foo": Number(42), "bar": String("baz")}) s2 := s1.SetName("S") assert.Equal(2, s2.Len()) assert.Equal(42.0, float64(s2.Get("foo").(Number))) assert.Equal("baz", string(s2.Get("bar").(String))) assert.Equal("S", s2.Name()) } ================================================ FILE: go/types/subtype.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" ) func assertSubtype(t *Type, v Value) { if !IsValueSubtypeOf(v, t) { d.Panic("Invalid type. %s is not a subtype of %s", TypeOf(v).Describe(), t.Describe()) } } // IsSubtype determines whether concreteType is a subtype of requiredType. For example, `Number` is a subtype of `Number | String`. func IsSubtype(requiredType, concreteType *Type) bool { isSub, _ := isSubtypeTopLevel(requiredType, concreteType) return isSub } // IsSubtypeDisallowExtraFields is a slightly weird variant of IsSubtype. It returns true IFF IsSubtype(requiredType, concreteType) AND Structs in concreteType CANNOT have field names absent in requiredType // ISSUE: https://github.com/attic-labs/noms/issues/3446 func IsSubtypeDisallowExtraStructFields(requiredType, concreteType *Type) bool { isSub, hasExtra := isSubtypeDetails(requiredType, concreteType, false, nil) if hasExtra { return false } return isSub } // isSubtypeTopLevel returns two values: IsSub and hasExtra. See IsValueSubtypeOf() // below for an explanation. func isSubtypeTopLevel(requiredType, concreteType *Type) (isSub bool, hasExtra bool) { return isSubtypeDetails(requiredType, concreteType, false, nil) } // IsSubtypeDetails returns two values: // isSub - which indicates whether concreteType is a subtype of requiredType. // hasExtra - which indicates whether concreteType has additional fields. // See comment below on isValueSubtypeOfDetails func isSubtypeDetails(requiredType, concreteType *Type, hasExtra bool, parentStructTypes []*Type) (bool, bool) { if requiredType.Equals(concreteType) { return true, hasExtra } // If the concrete type is a union, all component types must be compatible. if concreteType.TargetKind() == UnionKind { for _, t := range concreteType.Desc.(CompoundDesc).ElemTypes { isSub, hasMore := isSubtypeDetails(requiredType, t, hasExtra, parentStructTypes) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } return true, hasExtra } // If the required type is a union, at least one of the component types must be compatible. if requiredType.TargetKind() == UnionKind { for _, t := range requiredType.Desc.(CompoundDesc).ElemTypes { isSub, hasMore := isSubtypeDetails(t, concreteType, hasExtra, parentStructTypes) if isSub { hasExtra = hasExtra || hasMore return true, hasExtra } } return false, hasExtra } if requiredType.TargetKind() != concreteType.TargetKind() { return requiredType.TargetKind() == ValueKind, hasExtra } if desc, ok := requiredType.Desc.(CompoundDesc); ok { concreteElemTypes := concreteType.Desc.(CompoundDesc).ElemTypes for i, t := range desc.ElemTypes { isSub, hasMore := compoundSubtype(t, concreteElemTypes[i], hasExtra, parentStructTypes) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } return true, hasExtra } if requiredType.TargetKind() == StructKind { requiredDesc := requiredType.Desc.(StructDesc) concreteDesc := concreteType.Desc.(StructDesc) if requiredDesc.Name != "" && requiredDesc.Name != concreteDesc.Name { return false, hasExtra } // We may already be computing the subtype for this type if we have a cycle. // In that case we exit the recursive check. We may still find that the type // is not a subtype but that will be handled at a higher level in the callstack. _, found := indexOfType(requiredType, parentStructTypes) if found { return true, hasExtra } i, j := 0, 0 for i < requiredDesc.Len() && j < concreteDesc.Len() { requiredField := requiredDesc.fields[i] concreteField := concreteDesc.fields[j] if requiredField.Name == concreteField.Name { // Common field name if !requiredField.Optional && concreteField.Optional { return false, hasExtra } isSub, hasMore := isSubtypeDetails(requiredField.Type, concreteField.Type, hasExtra, append(parentStructTypes, requiredType)) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore i++ j++ continue } if requiredField.Name < concreteField.Name { // Concrete lacks field in required if !requiredField.Optional { return false, hasExtra } i++ } else { // Concrete contains extra field hasExtra = true j++ } } for i < requiredDesc.Len() { // Fields in required not in concrete if !requiredDesc.fields[i].Optional { hasExtra = true return false, hasExtra } i++ } hasExtra = hasExtra || j < concreteDesc.Len() return true, hasExtra } panic("unreachable") } // compoundSubtype is called when comparing the element types of two compound types. This is the only case // where a concrete type may have be a union type. func compoundSubtype(requiredType, concreteType *Type, hasExtra bool, parentStructTypes []*Type) (bool, bool) { // If the concrete type is a union then all the types in the union must be subtypes of the required typ. This also means that a compound type with an empty union is going to be a subtype of all compounds, List<> is a subtype of List for all T. if concreteType.TargetKind() == UnionKind { for _, ct := range concreteType.Desc.(CompoundDesc).ElemTypes { isSub, hasExtra1 := isSubtypeDetails(requiredType, ct, hasExtra, parentStructTypes) if !isSub { return false, hasExtra1 } } return true, hasExtra } return isSubtypeDetails(requiredType, concreteType, hasExtra, parentStructTypes) } func IsValueSubtypeOf(v Value, t *Type) bool { isSub, _ := isValueSubtypeOfDetails(v, t, false) return isSub } // IsValueSubtypeOfDetails returns two values: // isSub - which indicates whether v is a subtype of t. // hasExtra - which indicates whether v has additional fields. This field has // no meaning if IsSub is false. // // For example, given the following data: // type1 := struct S { v := Struct S1 { // a Number | string a: "hello" // b ?int b: 2 // } } // IsValueSubtypeOfDetails(v, type1) would return isSub == true, and hasExtra == false // // And given these types: // type2 := struct S { v := Struct S1 { // a Number | string a: "hello" // b ?int b: 2 // } c: "hello again" // } // IsValueSubtypeOfDetails(v, type1) would return isSub == true, and hasExtra == true func IsValueSubtypeOfDetails(v Value, t *Type) (bool, bool) { return isValueSubtypeOfDetails(v, t, false) } func isValueSubtypeOfDetails(v Value, t *Type, hasExtra bool) (bool, bool) { switch t.TargetKind() { case BoolKind, NumberKind, StringKind, BlobKind, TypeKind: return v.Kind() == t.TargetKind(), hasExtra case ValueKind: return true, hasExtra case UnionKind: var anonStruct *Type for _, et := range t.Desc.(CompoundDesc).ElemTypes { // Typically if IsSubtype(v.Type(), A|B|C|...) then exactly one of the // element types in the union will be a supertype of v.Type() because // of type simplification rules (only one of each kind is allowed in // the simplified union except for structs, where one of each unique // struct name is allowed). // // However there is one exception which is that type simplification // allows the struct with empty name. So if v.Type() is a struct with a // name, then it is possible for *two* elements in the union to match // it -- a struct with that same name, and a struct with no name. // // So if we happen across an element type that is an anonymous struct, we // save it for later and only try to use it if we can't find anything // better. if et.TargetKind() == StructKind && et.Desc.(StructDesc).Name == "" { anonStruct = et continue } isSub, hasMore := isValueSubtypeOfDetails(v, et, hasExtra) if isSub { hasExtra = hasExtra || hasMore return isSub, hasExtra } } if anonStruct != nil { isSub, hasMore := isValueSubtypeOfDetails(v, anonStruct, hasExtra) if isSub { hasExtra = hasExtra || hasMore return isSub, hasExtra } } return false, hasExtra case CycleKind: panic("unreachable") // CycleKind are ephemeral. default: if v.Kind() != t.TargetKind() { return false, hasExtra } } switch desc := t.Desc.(type) { case StructDesc: // If we provide a named struct type we require that the names match. s := v.(Struct) if desc.Name != "" && desc.Name != s.Name() { return false, hasExtra } missingOptionalFieldCnt := 0 for _, f := range desc.fields { fv, ok := s.MaybeGet(f.Name) if !ok { if f.Optional { missingOptionalFieldCnt += 1 } else { return false, hasExtra } } else { isSub, hasMore := isValueSubtypeOfDetails(fv, f.Type, hasExtra) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } } if s.Len()+missingOptionalFieldCnt > len(desc.fields) { hasExtra = true } return true, hasExtra case CompoundDesc: switch v := v.(type) { case Ref: // Switching to the type is subtype of type here. return isSubtypeTopLevel(desc.ElemTypes[0], v.TargetType()) case Map: kt := desc.ElemTypes[0] vt := desc.ElemTypes[1] if seq, ok := v.orderedSequence.(mapLeafSequence); ok { for _, entry := range seq.entries() { isSub, hasMore := isValueSubtypeOfDetails(entry.key, kt, hasExtra) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore isSub, hasExtra = isValueSubtypeOfDetails(entry.value, vt, hasExtra) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } return true, hasExtra } return isMetaSequenceSubtypeOf(v.orderedSequence.(metaSequence), t, hasExtra) case Set: et := desc.ElemTypes[0] if seq, ok := v.orderedSequence.(setLeafSequence); ok { for _, v := range seq.values() { isSub, hasMore := isValueSubtypeOfDetails(v, et, hasExtra) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } return true, hasExtra } return isMetaSequenceSubtypeOf(v.orderedSequence.(metaSequence), t, hasExtra) case List: et := desc.ElemTypes[0] if seq, ok := v.sequence.(listLeafSequence); ok { for _, v := range seq.values() { isSub, hasMore := isValueSubtypeOfDetails(v, et, hasExtra) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } return true, hasExtra } return isMetaSequenceSubtypeOf(v.sequence.(metaSequence), t, hasExtra) } } panic("unreachable") } func isMetaSequenceSubtypeOf(ms metaSequence, t *Type, hasExtra bool) (bool, bool) { // TODO: iterRefs for _, mt := range ms.tuples() { // Each prolly tree is also a List where T needs to be a subtype. isSub, hasMore := isSubtypeTopLevel(t, mt.ref().TargetType()) if !isSub { return false, hasExtra } hasExtra = hasExtra || hasMore } return true, hasExtra } ================================================ FILE: go/types/subtype_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "strings" "testing" "github.com/attic-labs/noms/go/d" "github.com/stretchr/testify/assert" ) func assertInvalid(tt *testing.T, t *Type, v Value) { assert := assert.New(tt) assert.Panics(func() { assertSubtype(t, v) }) } func assertAll(tt *testing.T, t *Type, v Value) { allTypes := []*Type{ BoolType, NumberType, StringType, BlobType, TypeType, ValueType, } for _, at := range allTypes { if at == ValueType || t.Equals(at) { assertSubtype(at, v) } else { assertInvalid(tt, at, v) } } } func TestAssertTypePrimitives(t *testing.T) { assertSubtype(BoolType, Bool(true)) assertSubtype(BoolType, Bool(false)) assertSubtype(NumberType, Number(42)) assertSubtype(StringType, String("abc")) assertInvalid(t, BoolType, Number(1)) assertInvalid(t, BoolType, String("abc")) assertInvalid(t, NumberType, Bool(true)) assertInvalid(t, StringType, Number(42)) } func TestAssertTypeValue(t *testing.T) { vs := newTestValueStore() assertSubtype(ValueType, Bool(true)) assertSubtype(ValueType, Number(1)) assertSubtype(ValueType, String("abc")) l := NewList(vs, Number(0), Number(1), Number(2), Number(3)) assertSubtype(ValueType, l) } func TestAssertTypeBlob(t *testing.T) { vs := newTestValueStore() blob := NewBlob(vs, bytes.NewBuffer([]byte{0x00, 0x01})) assertAll(t, BlobType, blob) } func TestAssertTypeList(tt *testing.T) { vs := newTestValueStore() listOfNumberType := MakeListType(NumberType) l := NewList(vs, Number(0), Number(1), Number(2), Number(3)) assertSubtype(listOfNumberType, l) assertAll(tt, listOfNumberType, l) assertSubtype(MakeListType(ValueType), l) } func TestAssertTypeMap(tt *testing.T) { vs := newTestValueStore() mapOfNumberToStringType := MakeMapType(NumberType, StringType) m := NewMap(vs, Number(0), String("a"), Number(2), String("b")) assertSubtype(mapOfNumberToStringType, m) assertAll(tt, mapOfNumberToStringType, m) assertSubtype(MakeMapType(ValueType, ValueType), m) } func TestAssertTypeSet(tt *testing.T) { vs := newTestValueStore() setOfNumberType := MakeSetType(NumberType) s := NewSet(vs, Number(0), Number(1), Number(2), Number(3)) assertSubtype(setOfNumberType, s) assertAll(tt, setOfNumberType, s) assertSubtype(MakeSetType(ValueType), s) } func TestAssertTypeType(tt *testing.T) { t := MakeSetType(NumberType) assertSubtype(TypeType, t) assertAll(tt, TypeType, t) assertSubtype(ValueType, t) } func TestAssertTypeStruct(tt *testing.T) { t := MakeStructType("Struct", StructField{"x", BoolType, false}) v := NewStruct("Struct", StructData{"x": Bool(true)}) assertSubtype(t, v) assertAll(tt, t, v) assertSubtype(ValueType, v) } func TestAssertTypeUnion(tt *testing.T) { vs := newTestValueStore() assertSubtype(MakeUnionType(NumberType), Number(42)) assertSubtype(MakeUnionType(NumberType, StringType), Number(42)) assertSubtype(MakeUnionType(NumberType, StringType), String("hi")) assertSubtype(MakeUnionType(NumberType, StringType, BoolType), Number(555)) assertSubtype(MakeUnionType(NumberType, StringType, BoolType), String("hi")) assertSubtype(MakeUnionType(NumberType, StringType, BoolType), Bool(true)) lt := MakeListType(MakeUnionType(NumberType, StringType)) assertSubtype(lt, NewList(vs, Number(1), String("hi"), Number(2), String("bye"))) st := MakeSetType(StringType) assertSubtype(MakeUnionType(st, NumberType), Number(42)) assertSubtype(MakeUnionType(st, NumberType), NewSet(vs, String("a"), String("b"))) assertInvalid(tt, MakeUnionType(), Number(42)) assertInvalid(tt, MakeUnionType(StringType), Number(42)) assertInvalid(tt, MakeUnionType(StringType, BoolType), Number(42)) assertInvalid(tt, MakeUnionType(st, StringType), Number(42)) assertInvalid(tt, MakeUnionType(st, NumberType), NewSet(vs, Number(1), Number(2))) } func TestAssertConcreteTypeIsUnion(tt *testing.T) { assert.True(tt, IsSubtype( MakeStructTypeFromFields("", FieldMap{}), MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType}), MakeStructTypeFromFields("", FieldMap{"bar": StringType})))) assert.False(tt, IsSubtype( MakeStructTypeFromFields("", FieldMap{}), MakeUnionType(MakeStructTypeFromFields("", FieldMap{"foo": StringType}), NumberType))) assert.True(tt, IsSubtype( MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType}), MakeStructTypeFromFields("", FieldMap{"bar": StringType})), MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType, "bar": StringType}), MakeStructTypeFromFields("", FieldMap{"bar": StringType})))) assert.False(tt, IsSubtype( MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType}), MakeStructTypeFromFields("", FieldMap{"bar": StringType})), MakeUnionType( MakeStructTypeFromFields("", FieldMap{"foo": StringType, "bar": StringType}), NumberType))) } func TestAssertTypeEmptyListUnion(tt *testing.T) { vs := newTestValueStore() lt := MakeListType(MakeUnionType()) assertSubtype(lt, NewList(vs)) } func TestAssertTypeEmptyList(tt *testing.T) { vs := newTestValueStore() lt := MakeListType(NumberType) assertSubtype(lt, NewList(vs)) // List<> not a subtype of List assertInvalid(tt, MakeListType(MakeUnionType()), NewList(vs, Number(1))) } func TestAssertTypeEmptySet(tt *testing.T) { vs := newTestValueStore() st := MakeSetType(NumberType) assertSubtype(st, NewSet(vs)) // Set<> not a subtype of Set assertInvalid(tt, MakeSetType(MakeUnionType()), NewSet(vs, Number(1))) } func TestAssertTypeEmptyMap(tt *testing.T) { vs := newTestValueStore() mt := MakeMapType(NumberType, StringType) assertSubtype(mt, NewMap(vs)) // Map<> not a subtype of Map assertInvalid(tt, MakeMapType(MakeUnionType(), MakeUnionType()), NewMap(vs, Number(1), Number(2))) } func TestAssertTypeStructSubtypeByName(tt *testing.T) { namedT := MakeStructType("Name", StructField{"x", NumberType, false}) anonT := MakeStructType("", StructField{"x", NumberType, false}) namedV := NewStruct("Name", StructData{"x": Number(42)}) name2V := NewStruct("foo", StructData{"x": Number(42)}) anonV := NewStruct("", StructData{"x": Number(42)}) assertSubtype(namedT, namedV) assertInvalid(tt, namedT, name2V) assertInvalid(tt, namedT, anonV) assertSubtype(anonT, namedV) assertSubtype(anonT, name2V) assertSubtype(anonT, anonV) } func TestAssertTypeStructSubtypeExtraFields(tt *testing.T) { at := MakeStructType("") bt := MakeStructType("", StructField{"x", NumberType, false}) ct := MakeStructType("", StructField{"s", StringType, false}, StructField{"x", NumberType, false}) av := NewStruct("", StructData{}) bv := NewStruct("", StructData{"x": Number(1)}) cv := NewStruct("", StructData{"x": Number(2), "s": String("hi")}) assertSubtype(at, av) assertInvalid(tt, bt, av) assertInvalid(tt, ct, av) assertSubtype(at, bv) assertSubtype(bt, bv) assertInvalid(tt, ct, bv) assertSubtype(at, cv) assertSubtype(bt, cv) assertSubtype(ct, cv) } func TestAssertTypeStructSubtype(tt *testing.T) { vs := newTestValueStore() c1 := NewStruct("Commit", StructData{ "value": Number(1), "parents": NewSet(vs), }) t1 := MakeStructType("Commit", StructField{"parents", MakeSetType(MakeUnionType()), false}, StructField{"value", NumberType, false}, ) assertSubtype(t1, c1) t11 := MakeStructType("Commit", StructField{"parents", MakeSetType(MakeRefType(MakeCycleType("Commit"))), false}, StructField{"value", NumberType, false}, ) assertSubtype(t11, c1) c2 := NewStruct("Commit", StructData{ "value": Number(2), "parents": NewSet(vs, NewRef(c1)), }) assertSubtype(t11, c2) } func TestAssertTypeCycleUnion(tt *testing.T) { // struct S { // x: Cycle, // y: Number, // } t1 := MakeStructType("S", StructField{"x", MakeCycleType("S"), false}, StructField{"y", NumberType, false}, ) // struct S { // x: Cycle, // y: Number | String, // } t2 := MakeStructType("S", StructField{"x", MakeCycleType("S"), false}, StructField{"y", MakeUnionType(NumberType, StringType), false}, ) assert.True(tt, IsSubtype(t2, t1)) assert.False(tt, IsSubtype(t1, t2)) // struct S { // x: Cycle | Number, // y: Number | String, // } t3 := MakeStructType("S", StructField{"x", MakeUnionType(MakeCycleType("S"), NumberType), false}, StructField{"y", MakeUnionType(NumberType, StringType), false}, ) assert.True(tt, IsSubtype(t3, t1)) assert.False(tt, IsSubtype(t1, t3)) assert.True(tt, IsSubtype(t3, t2)) assert.False(tt, IsSubtype(t2, t3)) // struct S { // x: Cycle | Number, // y: Number, // } t4 := MakeStructType("S", StructField{"x", MakeUnionType(MakeCycleType("S"), NumberType), false}, StructField{"y", NumberType, false}, ) assert.True(tt, IsSubtype(t4, t1)) assert.False(tt, IsSubtype(t1, t4)) assert.False(tt, IsSubtype(t4, t2)) assert.False(tt, IsSubtype(t2, t4)) assert.True(tt, IsSubtype(t3, t4)) assert.False(tt, IsSubtype(t4, t3)) // struct B { // b: struct C { // c: Cycle, // }, // } // struct C { // c: struct B { // b: Cycle, // }, // } tb := MakeStructType("A", StructField{ "b", MakeStructType("B", StructField{"c", MakeCycleType("A"), false}), false, }, ) tc := MakeStructType("A", StructField{ "c", MakeStructType("B", StructField{"b", MakeCycleType("A"), false}), false, }, ) assert.False(tt, IsSubtype(tb, tc)) assert.False(tt, IsSubtype(tc, tb)) } func TestIsSubtypeEmptySruct(tt *testing.T) { // struct { // a: Number, // b: struct {}, // } t1 := MakeStructType("X", StructField{"a", NumberType, false}, StructField{"b", EmptyStructType, false}, ) // struct { // a: Number, // } t2 := MakeStructType("X", StructField{"a", NumberType, false}) assert.False(tt, IsSubtype(t1, t2)) assert.True(tt, IsSubtype(t2, t1)) } func TestIsSubtypeCompoundUnion(tt *testing.T) { rt := MakeListType(EmptyStructType) st1 := MakeStructType("One", StructField{"a", NumberType, false}) st2 := MakeStructType("Two", StructField{"b", StringType, false}) ct := MakeListType(MakeUnionType(st1, st2)) assert.True(tt, IsSubtype(rt, ct)) assert.False(tt, IsSubtype(ct, rt)) ct2 := MakeListType(MakeUnionType(st1, st2, NumberType)) assert.False(tt, IsSubtype(rt, ct2)) assert.False(tt, IsSubtype(ct2, rt)) } func TestIsSubtypeOptionalFields(tt *testing.T) { assert := assert.New(tt) s1 := MakeStructType("", StructField{"a", NumberType, true}) s2 := MakeStructType("", StructField{"a", NumberType, false}) assert.True(IsSubtype(s1, s2)) assert.False(IsSubtype(s2, s1)) s3 := MakeStructType("", StructField{"a", StringType, false}) assert.False(IsSubtype(s1, s3)) assert.False(IsSubtype(s3, s1)) s4 := MakeStructType("", StructField{"a", StringType, true}) assert.False(IsSubtype(s1, s4)) assert.False(IsSubtype(s4, s1)) test := func(t1s, t2s string, exp1, exp2 bool) { t1 := makeTestStructTypeFromFieldNames(t1s) t2 := makeTestStructTypeFromFieldNames(t2s) assert.Equal(exp1, IsSubtype(t1, t2)) assert.Equal(exp2, IsSubtype(t2, t1)) assert.False(t1.Equals(t2)) } test("n?", "n", true, false) test("", "n", true, false) test("", "n?", true, true) test("a b?", "a", true, true) test("a b?", "a b", true, false) test("a b? c", "a b c", true, false) test("b? c", "a b c", true, false) test("b? c", "b c", true, false) test("a c e", "a b c d e", true, false) test("a c e?", "a b c d e", true, false) test("a c? e", "a b c d e", true, false) test("a c? e?", "a b c d e", true, false) test("a? c e", "a b c d e", true, false) test("a? c e?", "a b c d e", true, false) test("a? c? e", "a b c d e", true, false) test("a? c? e?", "a b c d e", true, false) test("a c e?", "a b c d", true, false) test("a c? e", "a b d e", true, false) test("a c? e?", "a b d", true, false) test("a? c e", "b c d e", true, false) test("a? c e?", "b c d", true, false) test("a? c? e", "b d e", true, false) test("a? c? e?", "b d", true, false) t1 := MakeStructType("", StructField{"a", BoolType, true}) t2 := MakeStructType("", StructField{"a", NumberType, true}) assert.False(IsSubtype(t1, t2)) assert.False(IsSubtype(t2, t1)) } func makeTestStructTypeFromFieldNames(s string) *Type { if s == "" { return MakeStructType("") } fs := strings.Split(s, " ") fields := make([]StructField, len(fs)) for i, f := range fs { optional := false if f[len(f)-1:] == "?" { f = f[:len(f)-1] optional = true } fields[i] = StructField{f, BoolType, optional} } return MakeStructType("", fields...) } func makeTestStructFromFieldNames(s string) Struct { t := makeTestStructTypeFromFieldNames(s) fields := t.Desc.(StructDesc).fields d.Chk.NotEmpty(fields) fieldNames := make([]string, len(fields)) for i, field := range fields { fieldNames[i] = field.Name } vals := make([]Value, len(fields)) for i := range fields { vals[i] = Bool(true) } return newStruct("", fieldNames, vals) } func TestIsSubtypeDisallowExtraStructFields(tt *testing.T) { assert := assert.New(tt) test := func(t1s, t2s string, exp1, exp2 bool) { t1 := makeTestStructTypeFromFieldNames(t1s) t2 := makeTestStructTypeFromFieldNames(t2s) assert.Equal(exp1, IsSubtypeDisallowExtraStructFields(t1, t2)) assert.Equal(exp2, IsSubtypeDisallowExtraStructFields(t2, t1)) assert.False(t1.Equals(t2)) } test("n?", "n", true, false) test("", "n", false, false) test("", "n?", false, true) test("a b?", "a", true, false) test("a b?", "a b", true, false) test("a b? c", "a b c", true, false) test("b? c", "a b c", false, false) test("b? c", "b c", true, false) test("a c e", "a b c d e", false, false) test("a c e?", "a b c d e", false, false) test("a c? e", "a b c d e", false, false) test("a c? e?", "a b c d e", false, false) test("a? c e", "a b c d e", false, false) test("a? c e?", "a b c d e", false, false) test("a? c? e", "a b c d e", false, false) test("a? c? e?", "a b c d e", false, false) test("a c e?", "a b c d", false, false) test("a c? e", "a b d e", false, false) test("a c? e?", "a b d", false, false) test("a? c e", "b c d e", false, false) test("a? c e?", "b c d", false, false) test("a? c? e", "b d e", false, false) test("a? c? e?", "b d", false, false) } func TestIsValueSubtypeOf(tt *testing.T) { assert := assert.New(tt) vs := newTestValueStore() assertTrue := func(v Value, t *Type) { assert.True(IsValueSubtypeOf(v, t)) } assertFalse := func(v Value, t *Type) { assert.False(IsValueSubtypeOf(v, t)) } allTypes := []struct { v Value t *Type }{ {Bool(true), BoolType}, {Number(42), NumberType}, {String("s"), StringType}, {NewEmptyBlob(vs), BlobType}, {BoolType, TypeType}, {NewList(vs, Number(42)), MakeListType(NumberType)}, {NewSet(vs, Number(42)), MakeSetType(NumberType)}, {NewRef(Number(42)), MakeRefType(NumberType)}, {NewMap(vs, Number(42), String("a")), MakeMapType(NumberType, StringType)}, {NewStruct("A", StructData{}), MakeStructType("A")}, // Not including CycleType or Union here } for i, rec := range allTypes { for j, rec2 := range allTypes { if i == j { assertTrue(rec.v, rec.t) } else { assertFalse(rec.v, rec2.t) assertFalse(rec2.v, rec.t) } } } for _, rec := range allTypes { assertTrue(rec.v, ValueType) } assertTrue(Bool(true), MakeUnionType(BoolType, NumberType)) assertTrue(Number(123), MakeUnionType(BoolType, NumberType)) assertFalse(String("abc"), MakeUnionType(BoolType, NumberType)) assertFalse(String("abc"), MakeUnionType()) assertTrue(NewList(vs), MakeListType(NumberType)) assertTrue(NewList(vs, Number(0), Number(1), Number(2), Number(3)), MakeListType(NumberType)) assertFalse(NewList(vs, Number(0), Number(1), Number(2), Number(3)), MakeListType(BoolType)) assertTrue(NewList(vs, Number(0), Number(1), Number(2), Number(3)), MakeListType(MakeUnionType(NumberType, BoolType))) assertTrue(NewList(vs, Number(0), Bool(true)), MakeListType(MakeUnionType(NumberType, BoolType))) assertFalse(NewList(vs, Number(0)), MakeListType(MakeUnionType())) assertTrue(NewList(vs), MakeListType(MakeUnionType())) { newChunkedList := func(vals ...Value) List { newSequenceMetaTuple := func(v Value) metaTuple { seq := newListLeafSequence(vs, v) list := newList(seq) return newMetaTuple(vs.WriteValue(list), newOrderedKey(v), 1) } tuples := make([]metaTuple, len(vals)) for i, v := range vals { tuples[i] = newSequenceMetaTuple(v) } return newList(newListMetaSequence(1, tuples, vs)) } assertTrue(newChunkedList(Number(0), Number(1), Number(2), Number(3)), MakeListType(NumberType)) assertFalse(newChunkedList(Number(0), Number(1), Number(2), Number(3)), MakeListType(BoolType)) assertTrue(newChunkedList(Number(0), Number(1), Number(2), Number(3)), MakeListType(MakeUnionType(NumberType, BoolType))) assertTrue(newChunkedList(Number(0), Bool(true)), MakeListType(MakeUnionType(NumberType, BoolType))) assertFalse(newChunkedList(Number(0)), MakeListType(MakeUnionType())) } assertTrue(NewSet(vs), MakeSetType(NumberType)) assertTrue(NewSet(vs, Number(0), Number(1), Number(2), Number(3)), MakeSetType(NumberType)) assertFalse(NewSet(vs, Number(0), Number(1), Number(2), Number(3)), MakeSetType(BoolType)) assertTrue(NewSet(vs, Number(0), Number(1), Number(2), Number(3)), MakeSetType(MakeUnionType(NumberType, BoolType))) assertTrue(NewSet(vs, Number(0), Bool(true)), MakeSetType(MakeUnionType(NumberType, BoolType))) assertFalse(NewSet(vs, Number(0)), MakeSetType(MakeUnionType())) assertTrue(NewSet(vs), MakeSetType(MakeUnionType())) { newChunkedSet := func(vals ...Value) Set { newSequenceMetaTuple := func(v Value) metaTuple { seq := newSetLeafSequence(vs, v) set := newSet(seq) return newMetaTuple(vs.WriteValue(set), newOrderedKey(v), 1) } tuples := make([]metaTuple, len(vals)) for i, v := range vals { tuples[i] = newSequenceMetaTuple(v) } return newSet(newSetMetaSequence(1, tuples, vs)) } assertTrue(newChunkedSet(Number(0), Number(1), Number(2), Number(3)), MakeSetType(NumberType)) assertFalse(newChunkedSet(Number(0), Number(1), Number(2), Number(3)), MakeSetType(BoolType)) assertTrue(newChunkedSet(Number(0), Number(1), Number(2), Number(3)), MakeSetType(MakeUnionType(NumberType, BoolType))) assertTrue(newChunkedSet(Number(0), Bool(true)), MakeSetType(MakeUnionType(NumberType, BoolType))) assertFalse(newChunkedSet(Number(0)), MakeSetType(MakeUnionType())) } assertTrue(NewMap(vs), MakeMapType(NumberType, StringType)) assertTrue(NewMap(vs, Number(0), String("a"), Number(1), String("b")), MakeMapType(NumberType, StringType)) assertFalse(NewMap(vs, Number(0), String("a"), Number(1), String("b")), MakeMapType(BoolType, StringType)) assertFalse(NewMap(vs, Number(0), String("a"), Number(1), String("b")), MakeMapType(NumberType, BoolType)) assertTrue(NewMap(vs, Number(0), String("a"), Number(1), String("b")), MakeMapType(MakeUnionType(NumberType, BoolType), StringType)) assertTrue(NewMap(vs, Number(0), String("a"), Number(1), String("b")), MakeMapType(NumberType, MakeUnionType(BoolType, StringType))) assertTrue(NewMap(vs, Number(0), String("a"), Bool(true), String("b")), MakeMapType(MakeUnionType(NumberType, BoolType), StringType)) assertTrue(NewMap(vs, Number(0), String("a"), Number(1), Bool(true)), MakeMapType(NumberType, MakeUnionType(BoolType, StringType))) assertFalse(NewMap(vs, Number(0), String("a")), MakeMapType(MakeUnionType(), StringType)) assertFalse(NewMap(vs, Number(0), String("a")), MakeMapType(NumberType, MakeUnionType())) assertTrue(NewMap(vs), MakeMapType(MakeUnionType(), MakeUnionType())) { newChunkedMap := func(vals ...Value) Map { newSequenceMetaTuple := func(e mapEntry) metaTuple { seq := newMapLeafSequence(vs, e) m := newMap(seq) return newMetaTuple(vs.WriteValue(m), newOrderedKey(e.key), 1) } tuples := make([]metaTuple, len(vals)/2) for i := 0; i < len(vals); i += 2 { tuples[i/2] = newSequenceMetaTuple(mapEntry{vals[i], vals[i+1]}) } return newMap(newMapMetaSequence(1, tuples, vs)) } assertTrue(newChunkedMap(Number(0), String("a"), Number(1), String("b")), MakeMapType(NumberType, StringType)) assertFalse(newChunkedMap(Number(0), String("a"), Number(1), String("b")), MakeMapType(BoolType, StringType)) assertFalse(newChunkedMap(Number(0), String("a"), Number(1), String("b")), MakeMapType(NumberType, BoolType)) assertTrue(newChunkedMap(Number(0), String("a"), Number(1), String("b")), MakeMapType(MakeUnionType(NumberType, BoolType), StringType)) assertTrue(newChunkedMap(Number(0), String("a"), Number(1), String("b")), MakeMapType(NumberType, MakeUnionType(BoolType, StringType))) assertTrue(newChunkedMap(Number(0), String("a"), Bool(true), String("b")), MakeMapType(MakeUnionType(NumberType, BoolType), StringType)) assertTrue(newChunkedMap(Number(0), String("a"), Number(1), Bool(true)), MakeMapType(NumberType, MakeUnionType(BoolType, StringType))) assertFalse(newChunkedMap(Number(0), String("a")), MakeMapType(MakeUnionType(), StringType)) assertFalse(newChunkedMap(Number(0), String("a")), MakeMapType(NumberType, MakeUnionType())) } assertTrue(NewRef(Number(1)), MakeRefType(NumberType)) assertFalse(NewRef(Number(1)), MakeRefType(BoolType)) assertTrue(NewRef(Number(1)), MakeRefType(MakeUnionType(NumberType, BoolType))) assertFalse(NewRef(Number(1)), MakeRefType(MakeUnionType())) assertTrue( NewStruct("Struct", StructData{"x": Bool(true)}), MakeStructType("Struct", StructField{"x", BoolType, false}), ) assertTrue( NewStruct("Struct", StructData{"x": Bool(true)}), MakeStructType("Struct", StructField{"x", BoolType, true}), ) assertTrue( NewStruct("Struct", StructData{"x": Bool(true)}), MakeStructType("Struct"), ) assertTrue( NewStruct("Struct", StructData{}), MakeStructType("Struct"), ) assertFalse( NewStruct("", StructData{"x": Bool(true)}), MakeStructType("Struct"), ) assertFalse( NewStruct("struct", StructData{"x": Bool(true)}), // lower case name MakeStructType("Struct"), ) assertTrue( NewStruct("Struct", StructData{"x": Bool(true)}), MakeStructType("Struct", StructField{"x", MakeUnionType(BoolType, NumberType), true}), ) assertTrue( NewStruct("Struct", StructData{"x": Bool(true)}), MakeStructType("Struct", StructField{"y", BoolType, true}), ) assertFalse( NewStruct("Struct", StructData{"x": Bool(true)}), MakeStructType("Struct", StructField{"x", StringType, true}), ) assertTrue( NewStruct("Node", StructData{ "value": Number(1), "children": NewList(vs, NewStruct("Node", StructData{ "value": Number(2), "children": NewList(vs), }), ), }), MakeStructType("Node", StructField{"value", NumberType, false}, StructField{"children", MakeListType(MakeCycleType("Node")), false}, ), ) assertFalse( // inner Node has wrong type. NewStruct("Node", StructData{ "value": Number(1), "children": NewList(vs, NewStruct("Node", StructData{ "value": Bool(true), "children": NewList(vs), }), ), }), MakeStructType("Node", StructField{"value", NumberType, false}, StructField{"children", MakeListType(MakeCycleType("Node")), false}, ), ) { node := func(value Value, children ...Value) Value { childrenAsRefs := make(ValueSlice, len(children)) for i, c := range children { childrenAsRefs[i] = NewRef(c) } rv := NewStruct("Node", StructData{ "value": value, "children": NewList(vs, childrenAsRefs...), }) return rv } requiredType := MakeStructType("Node", StructField{"value", NumberType, false}, StructField{"children", MakeListType(MakeRefType(MakeCycleType("Node"))), false}, ) assertTrue( node(Number(0), node(Number(1)), node(Number(2), node(Number(3)))), requiredType, ) assertFalse( node(Number(0), node(Number(1)), node(Number(2), node(String("no"))), ), requiredType, ) } { t1 := MakeStructType("A", StructField{"a", NumberType, false}, StructField{"b", MakeCycleType("A"), false}, ) t2 := MakeStructType("A", StructField{"a", NumberType, false}, StructField{"b", MakeCycleType("A"), true}, ) v := NewStruct("A", StructData{ "a": Number(1), "b": NewStruct("A", StructData{ "a": Number(2), }), }) assertFalse(v, t1) assertTrue(v, t2) } { t := MakeStructType("A", StructField{"aa", NumberType, true}, StructField{"bb", BoolType, false}, ) v := NewStruct("A", StructData{ "a": Number(1), "b": Bool(true), }) assertFalse(v, t) } } func TestIsValueSubtypeOfDetails(tt *testing.T) { a := assert.New(tt) test := func(vString, tString string, exp1, exp2 bool) { v := makeTestStructFromFieldNames(vString) t := makeTestStructTypeFromFieldNames(tString) isSub, hasExtra := IsValueSubtypeOfDetails(v, t) a.Equal(exp1, isSub, "expected %t for IsSub, received: %t", exp1, isSub) if isSub { a.Equal(exp2, hasExtra, "expected %t for hasExtra, received: %t", exp2, hasExtra) } } test("x", "x", true, false) test("x", "", true, true) test("x", "x? y?", true, false) test("x z", "x? y?", true, true) test("x", "x y", false, false) } ================================================ FILE: go/types/type.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package types contains most of the data structures available to/from Noms. package types import ( "github.com/attic-labs/noms/go/hash" ) // Type defines and describes Noms types, both built-in and user-defined. // Desc provides the composition of the type. It may contain only a types.NomsKind, in the case of // primitives, or it may contain additional information -- e.g. element Types for compound type // specializations, field descriptions for structs, etc. Either way, checking Kind() allows code // to understand how to interpret the rest of the data. // If Kind() refers to a primitive, then Desc has no more info. // If Kind() refers to List, Map, Ref, Set, or Union, then Desc is a list of Types describing the element type(s). // If Kind() refers to Struct, then Desc contains a []field. type Type struct { Desc TypeDesc } func newType(desc TypeDesc) *Type { return &Type{desc} } // Describe generate text that should parse into the struct being described. func (t *Type) Describe() (out string) { return EncodedValue(t) } func (t *Type) TargetKind() NomsKind { return t.Desc.Kind() } // Value interface func (t *Type) Value() Value { return t } func (t *Type) Equals(other Value) (res bool) { // This is highly optimized to not having to encode a *Type unless we have too. if t == other { return true } if otherType, ok := other.(*Type); ok { return t.TargetKind() == otherType.TargetKind() && t.Hash() == other.Hash() } return false } func (t *Type) Less(other Value) (res bool) { return valueLess(t, other) } func (t *Type) Hash() hash.Hash { return getHash(t) } func (t *Type) writeTo(w nomsWriter) { TypeKind.writeTo(w) t.writeToAsType(w, map[string]*Type{}) } func (t *Type) writeToAsType(w nomsWriter, seensStructs map[string]*Type) { t.Desc.writeTo(w, t, seensStructs) } func (t *Type) WalkValues(cb ValueCallback) { t.Desc.walkValues(cb) } func (t *Type) WalkRefs(cb RefCallback) { return } func (t *Type) typeOf() *Type { return TypeType } func (t *Type) Kind() NomsKind { return TypeKind } func (t *Type) valueReadWriter() ValueReadWriter { return nil } // TypeOf returns the type describing the value. This is not an exact type but // often a simplification of the concrete type. func TypeOf(v Value) *Type { return simplifyType(v.typeOf(), false) } // HasStructCycles determines if the type contains any struct cycles. func HasStructCycles(t *Type) bool { return hasStructCycles(t, nil) } func hasStructCycles(t *Type, visited []*Type) bool { if _, found := indexOfType(t, visited); found { return true } switch desc := t.Desc.(type) { case CompoundDesc: for _, et := range desc.ElemTypes { b := hasStructCycles(et, visited) if b { return true } } case StructDesc: for _, f := range desc.fields { b := hasStructCycles(f.Type, append(visited, t)) if b { return true } } case CycleDesc: panic("unexpected unresolved cycle") } return false } func indexOfType(t *Type, tl []*Type) (uint32, bool) { for i, tt := range tl { if tt == t { return uint32(i), true } } return 0, false } ================================================ FILE: go/types/type_desc.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sort" ) // TypeDesc describes a type of the kind returned by Kind(), e.g. Map, Number, or a custom type. type TypeDesc interface { Kind() NomsKind walkValues(cb ValueCallback) writeTo(w nomsWriter, t *Type, seenStructs map[string]*Type) // isSimplifiedForSure is used to determine if the type should be // simplified. It may contain false negatives. isSimplifiedForSure() bool isSimplifiedInner() bool } // PrimitiveDesc implements TypeDesc for all primitive Noms types: // Blob // Bool // Number // String // Type // Value type PrimitiveDesc NomsKind func (p PrimitiveDesc) Kind() NomsKind { return NomsKind(p) } func (p PrimitiveDesc) walkValues(cb ValueCallback) { } func (p PrimitiveDesc) writeTo(w nomsWriter, t *Type, seenStructs map[string]*Type) { NomsKind(p).writeTo(w) } func (p PrimitiveDesc) isSimplifiedForSure() bool { return true } func (p PrimitiveDesc) isSimplifiedInner() bool { return true } // CompoundDesc describes a List, Map, Set, Ref, or Union type. // ElemTypes indicates what type or types are in the container indicated by kind, e.g. Map key and value or Set element. type CompoundDesc struct { kind NomsKind ElemTypes typeSlice } func (c CompoundDesc) Kind() NomsKind { return c.kind } func (c CompoundDesc) walkValues(cb ValueCallback) { for _, t := range c.ElemTypes { cb(t) } } func (c CompoundDesc) writeTo(w nomsWriter, t *Type, seenStructs map[string]*Type) { c.kind.writeTo(w) if c.kind == UnionKind { w.writeCount(uint64(len(c.ElemTypes))) } for _, t := range c.ElemTypes { t.writeToAsType(w, seenStructs) } } func (c CompoundDesc) isSimplifiedForSure() bool { if c.kind == UnionKind { return len(c.ElemTypes) == 0 } for _, t := range c.ElemTypes { if !t.Desc.isSimplifiedInner() { return false } } return true } func (c CompoundDesc) isSimplifiedInner() bool { return c.isSimplifiedForSure() } // StructDesc describes a custom Noms Struct. type StructDesc struct { Name string fields structTypeFields } func (s StructDesc) Kind() NomsKind { return StructKind } func (s StructDesc) walkValues(cb ValueCallback) { for _, field := range s.fields { cb(field.Type) } } func (s StructDesc) writeTo(w nomsWriter, t *Type, seenStructs map[string]*Type) { name := s.Name if name != "" { if _, ok := seenStructs[name]; ok { CycleKind.writeTo(w) w.writeString(name) return } seenStructs[name] = t } StructKind.writeTo(w) w.writeString(name) w.writeCount(uint64(s.Len())) // Write all names, all types and finally all the optional flags. for _, field := range s.fields { w.writeString(field.Name) } for _, field := range s.fields { field.Type.writeToAsType(w, seenStructs) } for _, field := range s.fields { w.writeBool(field.Optional) } } func (s StructDesc) isSimplifiedForSure() bool { for _, f := range s.fields { if !f.Type.Desc.isSimplifiedInner() { return false } } return true } func (s StructDesc) isSimplifiedInner() bool { // We do not try to to determine if a type is simplified if it contains a struct. return false } func (s StructDesc) IterFields(cb func(name string, t *Type, optional bool)) { for _, field := range s.fields { cb(field.Name, field.Type, field.Optional) } } func (s StructDesc) Field(name string) (typ *Type, optional bool) { f, i := s.findField(name) if i == -1 { return nil, false } return f.Type, f.Optional } func (s StructDesc) findField(name string) (*StructField, int) { i := sort.Search(len(s.fields), func(i int) bool { return s.fields[i].Name >= name }) if i == len(s.fields) || s.fields[i].Name != name { return nil, -1 } return &s.fields[i], i } // Len returns the number of fields in the struct func (s StructDesc) Len() int { return len(s.fields) } type CycleDesc string func (c CycleDesc) Kind() NomsKind { return CycleKind } func (c CycleDesc) walkValues(cb ValueCallback) { } func (c CycleDesc) writeTo(w nomsWriter, t *Type, seenStruct map[string]*Type) { panic("Should not write cycle types") } func (c CycleDesc) isSimplifiedForSure() bool { return false } func (c CycleDesc) isSimplifiedInner() bool { return false } type typeSlice []*Type func (ts typeSlice) Len() int { return len(ts) } func (ts typeSlice) Less(i, j int) bool { return unionLess(ts[i], ts[j]) } func (ts typeSlice) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] } // unionLess is used for sorting union types in a predictable order as well as // validating the order when reading union types from a chunk. func unionLess(ti, tj *Type) bool { if ti == tj { panic("unreachable") // unions must not contain the same type twice. } ki, kj := ti.TargetKind(), tj.TargetKind() if ki == kj { switch ki { case StructKind: // Due to type simplification, the only thing that matters is the name of the struct. return ti.Desc.(StructDesc).Name < tj.Desc.(StructDesc).Name case CycleKind: return ti.Desc.(CycleDesc) < tj.Desc.(CycleDesc) default: panic("unreachable") // We should have folded all other types into one. } } return ki < kj } ================================================ FILE: go/types/type_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/stretchr/testify/assert" ) func TestTypes(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() mapType := MakeMapType(StringType, NumberType) setType := MakeSetType(StringType) mahType := MakeStructType("MahStruct", StructField{"Field1", StringType, false}, StructField{"Field2", BoolType, false}, ) recType := MakeStructType("RecursiveStruct", StructField{"self", MakeCycleType("RecursiveStruct"), false}) mRef := vs.WriteValue(mapType).TargetHash() setRef := vs.WriteValue(setType).TargetHash() mahRef := vs.WriteValue(mahType).TargetHash() recRef := vs.WriteValue(recType).TargetHash() assert.True(mapType.Equals(vs.ReadValue(mRef))) assert.True(setType.Equals(vs.ReadValue(setRef))) assert.True(mahType.Equals(vs.ReadValue(mahRef))) assert.True(recType.Equals(vs.ReadValue(recRef))) } func TestTypeType(t *testing.T) { assert.True(t, TypeOf(BoolType).Equals(TypeType)) } func TestTypeRefDescribe(t *testing.T) { assert := assert.New(t) mapType := MakeMapType(StringType, NumberType) setType := MakeSetType(StringType) assert.Equal("Bool", BoolType.Describe()) assert.Equal("Number", NumberType.Describe()) assert.Equal("String", StringType.Describe()) assert.Equal("Map", mapType.Describe()) assert.Equal("Set", setType.Describe()) mahType := MakeStructType("MahStruct", StructField{"Field1", StringType, false}, StructField{"Field2", BoolType, false}, ) assert.Equal("Struct MahStruct {\n Field1: String,\n Field2: Bool,\n}", mahType.Describe()) } func TestTypeOrdered(t *testing.T) { assert := assert.New(t) assert.True(isKindOrderedByValue(BoolType.TargetKind())) assert.True(isKindOrderedByValue(NumberType.TargetKind())) assert.True(isKindOrderedByValue(StringType.TargetKind())) assert.False(isKindOrderedByValue(BlobType.TargetKind())) assert.False(isKindOrderedByValue(ValueType.TargetKind())) assert.False(isKindOrderedByValue(MakeListType(StringType).TargetKind())) assert.False(isKindOrderedByValue(MakeSetType(StringType).TargetKind())) assert.False(isKindOrderedByValue(MakeMapType(StringType, ValueType).TargetKind())) assert.False(isKindOrderedByValue(MakeRefType(StringType).TargetKind())) } func TestFlattenUnionTypes(t *testing.T) { assert := assert.New(t) assert.Equal(BoolType, MakeUnionType(BoolType)) assert.Equal(MakeUnionType(), MakeUnionType()) assert.Equal(MakeUnionType(BoolType, StringType), MakeUnionType(BoolType, MakeUnionType(StringType))) assert.Equal(MakeUnionType(BoolType, StringType, NumberType), MakeUnionType(BoolType, MakeUnionType(StringType, NumberType))) assert.Equal(BoolType, MakeUnionType(BoolType, BoolType)) assert.Equal(BoolType, MakeUnionType(BoolType, MakeUnionType())) assert.Equal(BoolType, MakeUnionType(MakeUnionType(), BoolType)) assert.True(MakeUnionType(MakeUnionType(), MakeUnionType()).Equals(MakeUnionType())) assert.Equal(MakeUnionType(BoolType, NumberType), MakeUnionType(BoolType, NumberType)) assert.Equal(MakeUnionType(BoolType, NumberType), MakeUnionType(NumberType, BoolType)) assert.Equal(MakeUnionType(BoolType, NumberType), MakeUnionType(BoolType, NumberType, BoolType)) assert.Equal(MakeUnionType(BoolType, NumberType), MakeUnionType(MakeUnionType(BoolType, NumberType), NumberType, BoolType)) } func TestVerifyStructFieldName(t *testing.T) { assert := assert.New(t) assertInvalid := func(n string) { assert.Panics(func() { MakeStructType("S", StructField{n, StringType, false}) }) } assertInvalid("") assertInvalid(" ") assertInvalid(" a") assertInvalid("a ") assertInvalid("0") assertInvalid("_") assertInvalid("0a") assertInvalid("_a") assertInvalid("💩") assertValid := func(n string) { MakeStructType("S", StructField{n, StringType, false}) } assertValid("a") assertValid("A") assertValid("a0") assertValid("a_") assertValid("a0_") } func TestVerifyStructName(t *testing.T) { assert := assert.New(t) assertInvalid := func(n string) { assert.Panics(func() { MakeStructType(n) }) } assertInvalid(" ") assertInvalid(" a") assertInvalid("a ") assertInvalid("0") assertInvalid("_") assertInvalid("0a") assertInvalid("_a") assertInvalid("💩") assertValid := func(n string) { MakeStructType(n) } assertValid("") assertValid("a") assertValid("A") assertValid("a0") assertValid("a_") assertValid("a0_") } func TestStructUnionWithCycles(tt *testing.T) { inodeType := MakeStructTypeFromFields("Inode", FieldMap{ "attr": MakeStructTypeFromFields("Attr", FieldMap{ "ctime": NumberType, "mode": NumberType, "mtime": NumberType, }), "contents": MakeUnionType( MakeStructTypeFromFields("Directory", FieldMap{ "entries": MakeMapType(StringType, MakeCycleType("Inode")), }), MakeStructTypeFromFields("File", FieldMap{ "data": BlobType, }), MakeStructTypeFromFields("Symlink", FieldMap{ "targetPath": StringType, }), ), }) t1, _ := inodeType.Desc.(StructDesc).Field("contents") t2 := DecodeValue(EncodeValue(t1), nil) assert.True(tt, t1.Equals(t2)) // Note that we cannot ensure pointer equality between t1 and t2 because the // types used to the construct the Unions, while eventually equivalent, are // not identical due to the potentially differing placement of the Cycle type. // We do not remake Union types after putting their component types into // their canonical ordering. } func TestHasStructCycles(tt *testing.T) { assert := assert.New(tt) assert.False(HasStructCycles(BoolType)) assert.False(HasStructCycles(BlobType)) assert.False(HasStructCycles(NumberType)) assert.False(HasStructCycles(StringType)) assert.False(HasStructCycles(TypeType)) assert.False(HasStructCycles(ValueType)) assert.Panics(func() { HasStructCycles(MakeCycleType("Abc")) }) assert.False(HasStructCycles(MakeStructType(""))) assert.False(HasStructCycles(MakeStructType("A"))) assert.True(HasStructCycles( MakeStructType("A", StructField{"a", MakeStructType("A"), false}))) assert.True(HasStructCycles( MakeStructType("A", StructField{"a", MakeCycleType("A"), false}))) assert.True(HasStructCycles( MakeSetType(MakeStructType("A", StructField{"a", MakeCycleType("A"), false})))) assert.True(HasStructCycles( MakeStructType("A", StructField{"a", MakeSetType(MakeCycleType("A")), false}))) assert.False(HasStructCycles( MakeMapType( MakeStructType("A"), MakeStructType("A"), ), )) assert.False(HasStructCycles( MakeMapType( MakeStructType("A"), MakeCycleType("A"), ), )) assert.False(HasStructCycles( MakeStructType("", StructField{"a", MakeStructType("", StructField{"b", BoolType, false}, ), false}, StructField{"b", MakeStructType("", StructField{"b", BoolType, false}, ), false}, )), ) } ================================================ FILE: go/types/util_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" ) type iterator interface { Next() Value } func iterToSlice(iter iterator) ValueSlice { vs := ValueSlice{} for { v := iter.Next() if v == nil { break } vs = append(vs, v) } return vs } func intsToValueSlice(ints ...int) ValueSlice { vs := ValueSlice{} for _, i := range ints { vs = append(vs, Number(i)) } return vs } func generateNumbersAsValues(n int) []Value { return generateNumbersAsValuesFromToBy(0, n, 1) } func generateNumbersAsValueSlice(n int) ValueSlice { return generateNumbersAsValuesFromToBy(0, n, 1) } func generateNumbersAsValuesFromToBy(from, to, by int) ValueSlice { d.Chk.True(to >= from, "to must be greater than or equal to from") d.Chk.True(by > 0, "must be an integer greater than zero") nums := []Value{} for i := from; i < to; i += by { nums = append(nums, Number(i)) } return nums } func generateNumbersAsStructs(n int) ValueSlice { return generateNumbersAsValuesFromToBy(0, n, 1) } func generateNumbersAsStructsFromToBy(from, to, by int) ValueSlice { d.Chk.True(to >= from, "to must be greater than or equal to from") d.Chk.True(by > 0, "must be an integer greater than zero") nums := []Value{} for i := from; i < to; i += by { nums = append(nums, NewStruct("num", StructData{"n": Number(i)})) } return nums } func generateNumbersAsRefOfStructs(vrw ValueReadWriter, n int) []Value { nums := []Value{} for i := 0; i < n; i++ { r := vrw.WriteValue(NewStruct("num", StructData{"n": Number(i)})) nums = append(nums, r) } return nums } func leafCount(c Collection) int { leaves, _ := LoadLeafNodes([]Collection{c}, 0, c.Len()) return len(leaves) } func leafDiffCount(c1, c2 Collection) int { count := 0 hashes := make(map[hash.Hash]int) leaves1, _ := LoadLeafNodes([]Collection{c1}, 0, c1.Len()) leaves2, _ := LoadLeafNodes([]Collection{c2}, 0, c2.Len()) for _, l := range leaves1 { hashes[l.Hash()]++ } for _, l := range leaves2 { if c, ok := hashes[l.Hash()]; ok { if c == 1 { delete(hashes, l.Hash()) } else { hashes[l.Hash()] = c - 1 } } else { count++ } } for _, c := range hashes { count += c } return count } func reverseValues(values []Value) []Value { newValues := make([]Value, len(values)) for i := 0; i < len(values); i++ { newValues[i] = values[len(values)-i-1] } return newValues } func spliceValues(values []Value, start int, deleteCount int, newItems ...Value) []Value { numCurrentItems := len(values) numNewItems := len(newItems) newArr := make([]Value, numCurrentItems-deleteCount+numNewItems) copy(newArr[0:], values[0:start]) copy(newArr[start:], newItems[0:]) copy(newArr[start+numNewItems:], values[start+deleteCount:]) return newArr } ================================================ FILE: go/types/validate_type.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types func validateType(t *Type) { validateTypeImpl(t, map[string]struct{}{}) } func validateTypeImpl(t *Type, seenStructs map[string]struct{}) { switch desc := t.Desc.(type) { case CompoundDesc: if desc.Kind() == UnionKind { if len(desc.ElemTypes) == 1 { panic("Invalid union type") } for i := 1; i < len(desc.ElemTypes); i++ { if !unionLess(desc.ElemTypes[i-1], desc.ElemTypes[i]) { panic("Invalid union order") } } } for _, et := range desc.ElemTypes { validateTypeImpl(et, seenStructs) } case StructDesc: if desc.Name != "" { if _, ok := seenStructs[desc.Name]; ok { return } seenStructs[desc.Name] = struct{}{} } verifyStructName(desc.Name) verifyFields(desc.fields) for _, f := range desc.fields { validateTypeImpl(f.Type, seenStructs) } } } ================================================ FILE: go/types/validating_decoder.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" ) type ValidatingDecoder struct { vs *ValueStore } func NewValidatingDecoder(cs chunks.ChunkStore) *ValidatingDecoder { return &ValidatingDecoder{NewValueStore(cs)} } // DecodedChunk holds a pointer to a Chunk and the Value that results from // calling DecodeFromBytes(c.Data()). type DecodedChunk struct { Chunk *chunks.Chunk Value *Value } // Decode decodes c and checks that the hash of the resulting value // matches c.Hash(). It returns a DecodedChunk holding both c and a pointer to // the decoded Value. func (vbs *ValidatingDecoder) Decode(c *chunks.Chunk) DecodedChunk { h := c.Hash() v := decodeFromBytesWithValidation(c.Data(), vbs.vs) if getHash(v) != h { d.Panic("Invalid hash found") } return DecodedChunk{c, &v} } ================================================ FILE: go/types/validating_decoder_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/stretchr/testify/assert" ) func TestValidatingBatchingSinkDecode(t *testing.T) { v := Number(42) c := EncodeValue(v) storage := &chunks.TestStorage{} vdc := NewValidatingDecoder(storage.NewView()) dc := vdc.Decode(&c) assert.True(t, v.Equals(*dc.Value)) } func assertPanicsOnInvalidChunk(t *testing.T, data []interface{}) { storage := &chunks.TestStorage{} vs := NewValueStore(storage.NewView()) dataAsByteSlice := toBinaryNomsReaderData(data) dec := newValueDecoder(dataAsByteSlice, vs) v := dec.readValue() c := EncodeValue(v) vdc := NewValidatingDecoder(storage.NewView()) assert.Panics(t, func() { vdc.Decode(&c) }) } func TestValidatingBatchingSinkDecodeInvalidUnion(t *testing.T) { data := []interface{}{ uint8(TypeKind), uint8(UnionKind), uint64(2) /* len */, uint8(NumberKind), uint8(BoolKind), } assertPanicsOnInvalidChunk(t, data) } func TestValidatingBatchingSinkDecodeInvalidStructFieldOrder(t *testing.T) { data := []interface{}{ uint8(TypeKind), uint8(StructKind), "S", uint64(2), /* len */ "b", "a", uint8(NumberKind), uint8(NumberKind), false, false, } assertPanicsOnInvalidChunk(t, data) } func TestValidatingBatchingSinkDecodeInvalidStructName(t *testing.T) { data := []interface{}{ uint8(TypeKind), uint8(StructKind), "S ", uint64(0), /* len */ } assertPanicsOnInvalidChunk(t, data) } func TestValidatingBatchingSinkDecodeInvalidStructFieldName(t *testing.T) { data := []interface{}{ uint8(TypeKind), uint8(StructKind), "S", uint64(1), /* len */ "b ", uint8(NumberKind), false, } assertPanicsOnInvalidChunk(t, data) } ================================================ FILE: go/types/value.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "github.com/attic-labs/noms/go/hash" ) type ValueCallback func(v Value) type RefCallback func(ref Ref) // Valuable is an interface from which a Value can be retrieved. type Valuable interface { // Kind is the NomsKind describing the kind of value this is. Kind() NomsKind Value() Value } // Value is the interface all Noms values implement. type Value interface { Valuable // Equals determines if two different Noms values represents the same underlying value. Equals(other Value) bool // Less determines if this Noms value is less than another Noms value. // When comparing two Noms values and both are comparable and the same type (Bool, Number or // String) then the natural ordering is used. For other Noms values the Hash of the value is // used. When comparing Noms values of different type the following ordering is used: // Bool < Number < String < everything else. Less(other Value) bool // Hash is the hash of the value. All Noms values have a unique hash and if two values have the // same hash they must be equal. Hash() hash.Hash // WalkValues iterates over the immediate children of this value in the DAG, if any, not including // Type() WalkValues(ValueCallback) // WalkRefs iterates over the refs to the underlying chunks. If this value is a collection that has been // chunked then this will return the refs of th sub trees of the prolly-tree. WalkRefs(RefCallback) // typeOf is the internal implementation of types.TypeOf. It is not normalized // and unions might have a single element, duplicates and be in the wrong // order. typeOf() *Type // writeTo writes the encoded version of the value to a nomsWriter. writeTo(nomsWriter) } type ValueSlice []Value func (vs ValueSlice) Len() int { return len(vs) } func (vs ValueSlice) Swap(i, j int) { vs[i], vs[j] = vs[j], vs[i] } func (vs ValueSlice) Less(i, j int) bool { return vs[i].Less(vs[j]) } func (vs ValueSlice) Equals(other ValueSlice) bool { if vs.Len() != other.Len() { return false } for i, v := range vs { if !v.Equals(other[i]) { return false } } return true } func (vs ValueSlice) Contains(v Value) bool { for _, v := range vs { if v.Equals(v) { return true } } return false } type valueReadWriter interface { valueReadWriter() ValueReadWriter } type valueImpl struct { vrw ValueReadWriter buff []byte offsets []uint32 } func (v valueImpl) valueReadWriter() ValueReadWriter { return v.vrw } func (v valueImpl) writeTo(enc nomsWriter) { enc.writeRaw(v.buff) } func (v valueImpl) valueBytes() []byte { return v.buff } // IsZeroValue can be used to test if a Value is the same as T{}. func (v valueImpl) IsZeroValue() bool { return v.buff == nil } func (v valueImpl) Hash() hash.Hash { return hash.Of(v.buff) } func (v valueImpl) decoder() valueDecoder { return newValueDecoder(v.buff, v.vrw) } func (v valueImpl) decoderAtOffset(offset int) valueDecoder { return newValueDecoder(v.buff[offset:], v.vrw) } func (v valueImpl) asValueImpl() valueImpl { return v } func (v valueImpl) Equals(other Value) bool { if otherValueImpl, ok := other.(asValueImpl); ok { return bytes.Equal(v.buff, otherValueImpl.asValueImpl().buff) } return false } func (v valueImpl) Less(other Value) bool { return valueLess(v, other) } func (v valueImpl) WalkRefs(cb RefCallback) { walkRefs(v.valueBytes(), cb) } type asValueImpl interface { asValueImpl() valueImpl } func (v valueImpl) Kind() NomsKind { return NomsKind(v.buff[0]) } ================================================ FILE: go/types/value_decoder.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/attic-labs/noms/go/d" type valueDecoder struct { typedBinaryNomsReader vrw ValueReadWriter } // typedBinaryNomsReader provides some functionality for reading and skipping types that is shared by both valueDecoder and refWalker. type typedBinaryNomsReader struct { binaryNomsReader validating bool } func newValueDecoder(buff []byte, vrw ValueReadWriter) valueDecoder { nr := binaryNomsReader{buff, 0} return valueDecoder{typedBinaryNomsReader{nr, false}, vrw} } func newValueDecoderWithValidation(nr binaryNomsReader, vrw ValueReadWriter) valueDecoder { return valueDecoder{typedBinaryNomsReader{nr, true}, vrw} } func (r *valueDecoder) copyString(w nomsWriter) { start := r.pos() r.skipString() end := r.pos() w.writeRaw(r.byteSlice(start, end)) } func (r *valueDecoder) readRef() Ref { return readRef(&(r.typedBinaryNomsReader)) } func (r *valueDecoder) skipRef() { skipRef(&(r.typedBinaryNomsReader)) } func (r *valueDecoder) skipBlobLeafSequence() ([]uint32, uint64) { size := r.readCount() valuesPos := r.pos() r.offset += uint32(size) return []uint32{valuesPos, r.pos()}, size } func (r *valueDecoder) skipValueSequence(elementsPerIndex int) ([]uint32, uint64) { count := r.readCount() offsets := make([]uint32, count+1) offsets[0] = r.pos() for i := uint64(0); i < count; i++ { for j := 0; j < elementsPerIndex; j++ { r.skipValue() } offsets[i+1] = r.pos() } return offsets, count } func (r *valueDecoder) skipListLeafSequence() ([]uint32, uint64) { return r.skipValueSequence(getValuesPerIdx(ListKind)) } func (r *valueDecoder) skipSetLeafSequence() ([]uint32, uint64) { return r.skipValueSequence(getValuesPerIdx(SetKind)) } func (r *valueDecoder) skipMapLeafSequence() ([]uint32, uint64) { return r.skipValueSequence(getValuesPerIdx(MapKind)) } func (r *valueDecoder) readSequence(kind NomsKind, leafSkipper func() ([]uint32, uint64)) sequence { start := r.pos() offsets := []uint32{start} r.skipKind() offsets = append(offsets, r.pos()) level := r.readCount() offsets = append(offsets, r.pos()) var seqOffsets []uint32 var length uint64 if level > 0 { seqOffsets, length = r.skipMetaSequence(kind, level) } else { seqOffsets, length = leafSkipper() } offsets = append(offsets, seqOffsets...) end := r.pos() if level > 0 { return newMetaSequence(r.vrw, r.byteSlice(start, end), offsets, length) } return newLeafSequence(r.vrw, r.byteSlice(start, end), offsets, length) } func (r *valueDecoder) readBlobSequence() sequence { seq := r.readSequence(BlobKind, r.skipBlobLeafSequence) if seq.isLeaf() { return blobLeafSequence{seq.(leafSequence)} } return seq } func (r *valueDecoder) readListSequence() sequence { seq := r.readSequence(ListKind, r.skipListLeafSequence) if seq.isLeaf() { return listLeafSequence{seq.(leafSequence)} } return seq } func (r *valueDecoder) readSetSequence() orderedSequence { seq := r.readSequence(SetKind, r.skipSetLeafSequence) if seq.isLeaf() { return setLeafSequence{seq.(leafSequence)} } return seq.(orderedSequence) } func (r *valueDecoder) readMapSequence() orderedSequence { seq := r.readSequence(MapKind, r.skipMapLeafSequence) if seq.isLeaf() { return mapLeafSequence{seq.(leafSequence)} } return seq.(orderedSequence) } func (r *valueDecoder) skipList() { r.skipSequence(ListKind, r.skipListLeafSequence) } func (r *valueDecoder) skipSet() { r.skipSequence(SetKind, r.skipSetLeafSequence) } func (r *valueDecoder) skipMap() { r.skipSequence(MapKind, r.skipMapLeafSequence) } func (r *valueDecoder) skipBlob() { r.skipSequence(BlobKind, r.skipBlobLeafSequence) } func (r *valueDecoder) skipSequence(kind NomsKind, leafSkipper func() ([]uint32, uint64)) { r.skipKind() level := r.readCount() if level > 0 { r.skipMetaSequence(kind, level) } else { leafSkipper() } } func (r *valueDecoder) skipOrderedKey() { switch r.peekKind() { case hashKind: r.skipKind() r.skipHash() default: r.skipValue() } } func (r *valueDecoder) skipMetaSequence(k NomsKind, level uint64) ([]uint32, uint64) { count := r.readCount() offsets := make([]uint32, count+1) offsets[0] = r.pos() length := uint64(0) for i := uint64(0); i < count; i++ { r.skipRef() r.skipOrderedKey() length += r.readCount() offsets[i+1] = r.pos() } return offsets, length } func (r *valueDecoder) readValue() Value { k := r.peekKind() switch k { case BlobKind: return newBlob(r.readBlobSequence()) case BoolKind: r.skipKind() return Bool(r.readBool()) case NumberKind: r.skipKind() return r.readNumber() case StringKind: r.skipKind() return String(r.readString()) case ListKind: return newList(r.readListSequence()) case MapKind: return newMap(r.readMapSequence()) case RefKind: return r.readRef() case SetKind: return newSet(r.readSetSequence()) case StructKind: return r.readStruct() case TypeKind: r.skipKind() return r.readType() case CycleKind, UnionKind, ValueKind: d.Panic("A value instance can never have type %s", k) } panic("not reachable") } func (r *valueDecoder) skipValue() { k := r.peekKind() switch k { case BlobKind: r.skipBlob() case BoolKind: r.skipKind() r.skipBool() case NumberKind: r.skipKind() r.skipNumber() case StringKind: r.skipKind() r.skipString() case ListKind: r.skipList() case MapKind: r.skipMap() case RefKind: r.skipRef() case SetKind: r.skipSet() case StructKind: r.skipStruct() case TypeKind: r.skipKind() r.skipType() case CycleKind, UnionKind, ValueKind: d.Panic("A value instance can never have type %s", k) default: panic("not reachable") } } // readTypeOfValue is basically readValue().typeOf() but it ensures that we do // not allocate values where we do not need to. func (r *valueDecoder) readTypeOfValue() *Type { k := r.peekKind() switch k { case BlobKind: r.skipBlob() return BlobType case BoolKind: r.skipKind() r.skipBool() return BoolType case NumberKind: r.skipKind() r.skipNumber() return NumberType case StringKind: r.skipKind() r.skipString() return StringType case ListKind, MapKind, RefKind, SetKind: // These do not decode the actual values anyway. return r.readValue().typeOf() case StructKind: return readStructTypeOfValue(r) case TypeKind: r.skipKind() r.skipType() return TypeType case CycleKind, UnionKind, ValueKind: d.Panic("A value instance can never have type %s", k) } panic("not reachable") } // isValueSameTypeForSure may return false even though the type of the value is // equal. We do that in cases wherer it would be too expensive to compute the // type. // If this returns false the decoder might not have visited the whole value and // its offset is no longer valid. func (r *valueDecoder) isValueSameTypeForSure(t *Type) bool { k := r.peekKind() if k != t.TargetKind() { return false } switch k { case BlobKind, BoolKind, NumberKind, StringKind: r.skipValue() return true case ListKind, MapKind, RefKind, SetKind: // TODO: Maybe do some simple cases here too. Performance metrics should determine // what is going to be worth doing. // https://github.com/attic-labs/noms/issues/3776 return false case StructKind: return isStructSameTypeForSure(r, t) case TypeKind: return false case CycleKind, UnionKind, ValueKind: d.Panic("A value instance can never have type %s", k) } panic("not reachable") } // isStringSame checks if the next string in the decoder matches string. It // moves the decoder to after the string in all cases. func (r *valueDecoder) isStringSame(s string) bool { count := r.readCount() start := uint64(r.offset) r.offset += uint32(count) if uint64(len(s)) != count { return false } for i := uint64(0); i < count; i++ { if s[i] != r.buff[start+i] { return false } } return true } func (r *valueDecoder) copyValue(w nomsWriter) { start := r.pos() r.skipValue() end := r.pos() w.writeRaw(r.byteSlice(start, end)) } func (r *valueDecoder) readStruct() Value { return readStruct(r) } func (r *valueDecoder) skipStruct() { skipStruct(r) } func boolToUint32(b bool) uint32 { if b { return 1 } return 0 } func (r *valueDecoder) readOrderedKey() orderedKey { switch r.peekKind() { case hashKind: r.skipKind() h := r.readHash() return orderedKeyFromHash(h) default: v := r.readValue() return newOrderedKey(v) } } func (r *typedBinaryNomsReader) readType() *Type { t := r.readTypeInner(map[string]*Type{}) if r.validating { validateType(t) } return t } func (r *typedBinaryNomsReader) skipType() { if r.validating { r.readType() return } r.skipTypeInner() } func (r *typedBinaryNomsReader) readTypeInner(seenStructs map[string]*Type) *Type { k := r.readKind() switch k { case ListKind: return makeCompoundType(ListKind, r.readTypeInner(seenStructs)) case MapKind: return makeCompoundType(MapKind, r.readTypeInner(seenStructs), r.readTypeInner(seenStructs)) case RefKind: return makeCompoundType(RefKind, r.readTypeInner(seenStructs)) case SetKind: return makeCompoundType(SetKind, r.readTypeInner(seenStructs)) case StructKind: return r.readStructType(seenStructs) case UnionKind: return r.readUnionType(seenStructs) case CycleKind: name := r.readString() d.PanicIfTrue(name == "") // cycles to anonymous structs are disallowed t, ok := seenStructs[name] d.PanicIfFalse(ok) return t } d.PanicIfFalse(IsPrimitiveKind(k)) return MakePrimitiveType(k) } func (r *typedBinaryNomsReader) skipTypeInner() { k := r.readKind() switch k { case ListKind, RefKind, SetKind: r.skipTypeInner() case MapKind: r.skipTypeInner() r.skipTypeInner() case StructKind: r.skipStructType() case UnionKind: r.skipUnionType() case CycleKind: r.skipString() default: d.PanicIfFalse(IsPrimitiveKind(k)) } } func (r *typedBinaryNomsReader) readStructType(seenStructs map[string]*Type) *Type { name := r.readString() count := r.readCount() fields := make(structTypeFields, count) t := newType(StructDesc{name, fields}) seenStructs[name] = t for i := uint64(0); i < count; i++ { t.Desc.(StructDesc).fields[i] = StructField{ Name: r.readString(), } } for i := uint64(0); i < count; i++ { t.Desc.(StructDesc).fields[i].Type = r.readTypeInner(seenStructs) } for i := uint64(0); i < count; i++ { t.Desc.(StructDesc).fields[i].Optional = r.readBool() } return t } func (r *typedBinaryNomsReader) skipStructType() { r.skipString() // name count := r.readCount() for i := uint64(0); i < count; i++ { r.skipString() // name } for i := uint64(0); i < count; i++ { r.skipTypeInner() } for i := uint64(0); i < count; i++ { r.skipBool() // optional } } func (r *typedBinaryNomsReader) readUnionType(seenStructs map[string]*Type) *Type { l := r.readCount() ts := make(typeSlice, l) for i := uint64(0); i < l; i++ { ts[i] = r.readTypeInner(seenStructs) } return makeUnionType(ts...) } func (r *typedBinaryNomsReader) skipUnionType() { l := r.readCount() for i := uint64(0); i < l; i++ { r.skipTypeInner() } } ================================================ FILE: go/types/value_stats.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "fmt" "io" "github.com/attic-labs/noms/go/hash" humanize "github.com/dustin/go-humanize" "github.com/golang/snappy" ) type ValueStats interface { String() string } func WriteValueStats(w io.Writer, v Value, vr ValueReader) { switch v.Kind() { case BoolKind, NumberKind, StringKind, RefKind, StructKind, TypeKind: writeUnchunkedValueStats(w, v, vr) case BlobKind, ListKind, MapKind, SetKind: writePtreeStats(w, v, vr) } } func writeUnchunkedValueStats(w io.Writer, v Value, vr ValueReader) { fmt.Fprintf(w, "Kind: %s\nCompressedSize: %s\n", v.Kind().String(), humanize.Bytes(compressedSize(v))) } const treeRowFormat = "%5s%20s%20s%20s\n" var treeLevelHeader = fmt.Sprintf(treeRowFormat, "Level", "Nodes", "Values/Node", "Size/Node") func writePtreeStats(w io.Writer, v Value, vr ValueReader) { totalCompressedSize := uint64(0) totalChunks := uint64(0) fmt.Fprintf(w, "Kind: %s\n", v.Kind().String()) fmt.Fprintf(w, treeLevelHeader) level := int64(v.(Collection).asSequence().treeLevel()) nodes := ValueSlice{v} // TODO: For level 0, use NBS to fetch leaf sizes without actually reading leaf data. for level >= 0 { children := RefSlice{} visited := hash.HashSet{} chunkCount, valueCount, byteSize := uint64(0), uint64(0), uint64(0) for _, n := range nodes { chunkCount++ if level > 0 { n.WalkRefs(func(r Ref) { children = append(children, r) }) } s := n.(Collection).asSequence() valueCount += uint64(s.seqLen()) h := n.Hash() if !visited.Has(h) { // Indexed Ptrees can share nodes within the same tree level. Only count each unique value once byteSize += compressedSize(n) visited.Insert(h) } } printTreeLevel(w, uint64(level), valueCount, chunkCount, byteSize) nodes = loadNextLevel(children, vr) level-- totalCompressedSize += byteSize totalChunks += chunkCount } } func printTreeLevel(w io.Writer, level, values, chunks, byteSize uint64) { avgItems := float64(values) / float64(chunks) avgSize := byteSize / chunks fmt.Fprintf(w, treeRowFormat, fmt.Sprintf("%d", level), humanize.Comma(int64(chunks)), fmt.Sprintf("%.1f", avgItems), humanize.Bytes(avgSize)) } func compressedSize(v Value) uint64 { chunk := EncodeValue(v) compressed := snappy.Encode(nil, chunk.Data()) return uint64(len(compressed)) } func loadNextLevel(refs RefSlice, vr ValueReader) ValueSlice { hs := make(hash.HashSlice, len(refs)) for i, r := range refs { hs[i] = r.TargetHash() } // Fetch committed child sequences in a single batch return vr.ReadManyValues(hs) } ================================================ FILE: go/types/value_store.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "sync" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/constants" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/util/sizecache" ) // ValueReader is an interface that knows how to read Noms Values, e.g. // datas/Database. Required to avoid import cycle between this package and the // package that implements Value reading. type ValueReader interface { ReadValue(h hash.Hash) Value ReadManyValues(hashes hash.HashSlice) ValueSlice } // ValueWriter is an interface that knows how to write Noms Values, e.g. // datas/Database. Required to avoid import cycle between this package and the // package that implements Value writing. type ValueWriter interface { WriteValue(v Value) Ref } // ValueReadWriter is an interface that knows how to read and write Noms // Values, e.g. datas/Database. Required to avoid import cycle between this // package and the package that implements Value read/writing. type ValueReadWriter interface { ValueReader ValueWriter } // ValueStore provides methods to read and write Noms Values to a ChunkStore. // It minimally validates Values as they're written, but does not guarantee // that these Values are persisted through the ChunkStore until a subsequent // Flush. // Currently, WriteValue validates the following properties of a Value v: // - v can be correctly serialized and its Ref taken type ValueStore struct { cs chunks.ChunkStore bufferMu sync.RWMutex bufferedChunks map[hash.Hash]chunks.Chunk bufferedChunksMax uint64 bufferedChunkSize uint64 withBufferedChildren map[hash.Hash]uint64 // chunk Hash -> ref height unresolvedRefs hash.HashSet enforceCompleteness bool decodedChunks *sizecache.SizeCache versOnce sync.Once } func PanicIfDangling(unresolved hash.HashSet, cs chunks.ChunkStore) { absent := cs.HasMany(unresolved) if len(absent) != 0 { d.Panic("Found dangling references to %v", absent) } } const ( defaultDecodedChunksSize = 1 << 25 // 32MB defaultPendingPutMax = 1 << 28 // 256MB ) // newTestValueStore creates a simple struct that satisfies ValueReadWriter // and is backed by a chunks.TestStore. func newTestValueStore() *ValueStore { ts := &chunks.TestStorage{} return NewValueStore(ts.NewView()) } // NewValueStore returns a ValueStore instance that owns the provided // ChunkStore and manages its lifetime. Calling Close on the returned // ValueStore will Close() cs. func NewValueStore(cs chunks.ChunkStore) *ValueStore { return newValueStoreWithCacheAndPending(cs, defaultDecodedChunksSize, defaultPendingPutMax) } func newValueStoreWithCacheAndPending(cs chunks.ChunkStore, cacheSize, pendingMax uint64) *ValueStore { return &ValueStore{ cs: cs, bufferMu: sync.RWMutex{}, bufferedChunks: map[hash.Hash]chunks.Chunk{}, bufferedChunksMax: pendingMax, withBufferedChildren: map[hash.Hash]uint64{}, decodedChunks: sizecache.New(cacheSize), unresolvedRefs: hash.HashSet{}, enforceCompleteness: true, versOnce: sync.Once{}, } } func (lvs *ValueStore) expectVersion() { dataVersion := lvs.cs.Version() if constants.NomsVersion != dataVersion { d.Panic("SDK version %s incompatible with data of version %s", constants.NomsVersion, dataVersion) } } func (lvs *ValueStore) SetEnforceCompleteness(enforce bool) { lvs.enforceCompleteness = enforce } func (lvs *ValueStore) ChunkStore() chunks.ChunkStore { return lvs.cs } // ReadValue reads and decodes a value from lvs. It is not considered an error // for the requested chunk to be empty; in this case, the function simply // returns nil. func (lvs *ValueStore) ReadValue(h hash.Hash) Value { lvs.versOnce.Do(lvs.expectVersion) if v, ok := lvs.decodedChunks.Get(h); ok { d.PanicIfTrue(v == nil) return v.(Value) } chunk := func() chunks.Chunk { lvs.bufferMu.RLock() defer lvs.bufferMu.RUnlock() if pending, ok := lvs.bufferedChunks[h]; ok { return pending } return chunks.EmptyChunk }() if chunk.IsEmpty() { chunk = lvs.cs.Get(h) } if chunk.IsEmpty() { return nil } v := DecodeValue(chunk, lvs) d.PanicIfTrue(v == nil) lvs.decodedChunks.Add(h, uint64(len(chunk.Data())), v) return v } // ReadManyValues reads and decodes Values indicated by |hashes| from lvs and // returns the found Values in the same order. Any non-present Values will be // represented by nil. func (lvs *ValueStore) ReadManyValues(hashes hash.HashSlice) ValueSlice { lvs.versOnce.Do(lvs.expectVersion) decode := func(h hash.Hash, chunk *chunks.Chunk) Value { v := DecodeValue(*chunk, lvs) d.PanicIfTrue(v == nil) lvs.decodedChunks.Add(h, uint64(len(chunk.Data())), v) return v } foundValues := make(map[hash.Hash]Value, len(hashes)) // First, see which hashes can be found in either the Value cache or bufferedChunks. // Put the rest into a new HashSet to be requested en masse from the ChunkStore. remaining := hash.HashSet{} for _, h := range hashes { if v, ok := lvs.decodedChunks.Get(h); ok { d.PanicIfTrue(v == nil) foundValues[h] = v.(Value) continue } chunk := func() chunks.Chunk { lvs.bufferMu.RLock() defer lvs.bufferMu.RUnlock() if pending, ok := lvs.bufferedChunks[h]; ok { return pending } return chunks.EmptyChunk }() if !chunk.IsEmpty() { foundValues[h] = decode(h, &chunk) continue } remaining.Insert(h) } if len(remaining) != 0 { // Request remaining hashes from ChunkStore, processing the found chunks as they come in. foundChunks := make(chan *chunks.Chunk, 16) go func() { lvs.cs.GetMany(remaining, foundChunks); close(foundChunks) }() for c := range foundChunks { h := c.Hash() foundValues[h] = decode(h, c) } } rv := make(ValueSlice, len(hashes)) for i, h := range hashes { rv[i] = foundValues[h] } return rv } // WriteValue takes a Value, schedules it to be written it to lvs, and returns // an appropriately-typed types.Ref. v is not guaranteed to be actually // written until after Flush(). func (lvs *ValueStore) WriteValue(v Value) Ref { lvs.versOnce.Do(lvs.expectVersion) d.PanicIfFalse(v != nil) c := EncodeValue(v) d.PanicIfTrue(c.IsEmpty()) h := c.Hash() height := maxChunkHeight(v) + 1 r := constructRef(h, TypeOf(v), height) lvs.bufferChunk(v, c, height) return r } // bufferChunk enqueues c (which is the serialization of v) within this // ValueStore. Buffered chunks are flushed progressively to the underlying // ChunkStore in a way which attempts to locate children and grandchildren // sequentially together. The following invariants are retained: // // 1. For any given chunk currently in the buffer, only direct children of the // chunk may also be presently buffered (any grandchildren will have been // flushed). // 2. The total data occupied by buffered chunks does not exceed // lvs.bufferedChunksMax func (lvs *ValueStore) bufferChunk(v Value, c chunks.Chunk, height uint64) { lvs.bufferMu.Lock() defer lvs.bufferMu.Unlock() d.PanicIfTrue(height == 0) h := c.Hash() if _, present := lvs.bufferedChunks[h]; !present { lvs.bufferedChunks[h] = c lvs.bufferedChunkSize += uint64(len(c.Data())) } put := func(h hash.Hash, c chunks.Chunk) { lvs.cs.Put(c) lvs.bufferedChunkSize -= uint64(len(c.Data())) delete(lvs.bufferedChunks, h) } putChildren := func(parent hash.Hash) { pending, isBuffered := lvs.bufferedChunks[parent] if !isBuffered { return } WalkRefs(pending, func(grandchildRef Ref) { gch := grandchildRef.TargetHash() if pending, present := lvs.bufferedChunks[gch]; present { put(gch, pending) } }) delete(lvs.withBufferedChildren, parent) return } // Enforce invariant (1) if height > 1 { v.WalkRefs(func(childRef Ref) { childHash := childRef.TargetHash() if _, isBuffered := lvs.bufferedChunks[childHash]; isBuffered { lvs.withBufferedChildren[h] = height } else if lvs.enforceCompleteness { // If the childRef isn't presently buffered, we must consider it an // unresolved ref. lvs.unresolvedRefs.Insert(childHash) } if _, hasBufferedChildren := lvs.withBufferedChildren[childHash]; hasBufferedChildren { putChildren(childHash) } }) } // Enforce invariant (2) for lvs.bufferedChunkSize > lvs.bufferedChunksMax { var tallest hash.Hash var height uint64 = 0 for parent, ht := range lvs.withBufferedChildren { if ht > height { tallest = parent height = ht } } if height == 0 { // This can happen if there are no pending parents var chunk chunks.Chunk for tallest, chunk = range lvs.bufferedChunks { // Any pendingPut is as good as another in this case, so take the first one break } put(tallest, chunk) continue } putChildren(tallest) } } func (lvs *ValueStore) Root() hash.Hash { return lvs.cs.Root() } func (lvs *ValueStore) Rebase() { lvs.cs.Rebase() } // Commit() flushes all bufferedChunks into the ChunkStore, with best-effort // locality, and attempts to Commit, updating the root to |current| (or keeping // it the same as Root()). If the root has moved since this ValueStore was // opened, or last Rebased(), it will return false and will have internally // rebased. Until Commit() succeeds, no work of the ValueStore will be visible // to other readers of the underlying ChunkStore. func (lvs *ValueStore) Commit(current, last hash.Hash) bool { return func() bool { lvs.bufferMu.Lock() defer lvs.bufferMu.Unlock() put := func(h hash.Hash, chunk chunks.Chunk) { lvs.cs.Put(chunk) delete(lvs.bufferedChunks, h) lvs.bufferedChunkSize -= uint64(len(chunk.Data())) } for parent := range lvs.withBufferedChildren { if pending, present := lvs.bufferedChunks[parent]; present { WalkRefs(pending, func(reachable Ref) { if pending, present := lvs.bufferedChunks[reachable.TargetHash()]; present { put(reachable.TargetHash(), pending) } }) put(parent, pending) } } for _, c := range lvs.bufferedChunks { // Can't use put() because it's wrong to delete from a lvs.bufferedChunks while iterating it. lvs.cs.Put(c) lvs.bufferedChunkSize -= uint64(len(c.Data())) } d.PanicIfFalse(lvs.bufferedChunkSize == 0) lvs.withBufferedChildren = map[hash.Hash]uint64{} lvs.bufferedChunks = map[hash.Hash]chunks.Chunk{} if lvs.enforceCompleteness { if (current != hash.Hash{} && current != lvs.Root()) { if _, ok := lvs.bufferedChunks[current]; !ok { // If the client is attempting to move the root and the referenced // value isn't still buffered, we need to ensure that it is contained // in the ChunkStore. lvs.unresolvedRefs.Insert(current) } } PanicIfDangling(lvs.unresolvedRefs, lvs.cs) } if !lvs.cs.Commit(current, last) { return false } if lvs.enforceCompleteness { lvs.unresolvedRefs = hash.HashSet{} } return true }() } // Close closes the underlying ChunkStore func (lvs *ValueStore) Close() error { return lvs.cs.Close() } func getTargetType(refBase Ref) *Type { refType := TypeOf(refBase) d.PanicIfFalse(RefKind == refType.TargetKind()) return refType.Desc.(CompoundDesc).ElemTypes[0] } ================================================ FILE: go/types/value_store_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestValueReadWriteRead(t *testing.T) { assert := assert.New(t) s := String("hello") vs := newTestValueStore() assert.Nil(vs.ReadValue(s.Hash())) // nil h := vs.WriteValue(s).TargetHash() vs.Commit(vs.Root(), vs.Root()) v := vs.ReadValue(h) // non-nil if assert.NotNil(v) { assert.True(s.Equals(v), "%s != %s", EncodedValue(s), EncodedValue(v)) } } func TestReadWriteCache(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} ts := storage.NewView() vs := NewValueStore(ts) var v Value = Bool(true) r := vs.WriteValue(v) assert.NotEqual(hash.Hash{}, r.TargetHash()) vs.Commit(vs.Root(), vs.Root()) assert.Equal(1, ts.Writes) v = vs.ReadValue(r.TargetHash()) assert.True(v.Equals(Bool(true))) assert.Equal(1, ts.Reads) v = vs.ReadValue(r.TargetHash()) assert.True(v.Equals(Bool(true))) assert.Equal(1, ts.Reads) } func TestValueReadMany(t *testing.T) { assert := assert.New(t) vals := ValueSlice{String("hello"), Bool(true), Number(42)} vs := newTestValueStore() hashes := hash.HashSlice{} for _, v := range vals { h := vs.WriteValue(v).TargetHash() hashes = append(hashes, h) vs.Commit(vs.Root(), vs.Root()) } // Get one Value into vs's Value cache vs.ReadValue(vals[0].Hash()) // Get one Value into vs's pendingPuts three := Number(3) vals = append(vals, three) vs.WriteValue(three) hashes = append(hashes, three.Hash()) // Add one Value to request that's not in vs hashes = append(hashes, Bool(false).Hash()) found := map[hash.Hash]Value{} readValues := vs.ReadManyValues(hashes) for i, v := range readValues { if v != nil { found[hashes[i]] = v } } assert.Len(found, len(vals)) for _, v := range vals { assert.True(v.Equals(found[v.Hash()])) } } func TestValueWriteFlush(t *testing.T) { assert := assert.New(t) vals := ValueSlice{String("hello"), Bool(true), Number(42)} vs := newTestValueStore() hashes := hash.HashSet{} for _, v := range vals { hashes.Insert(vs.WriteValue(v).TargetHash()) } assert.NotZero(vs.bufferedChunkSize) vs.Commit(vs.Root(), vs.Root()) assert.Zero(vs.bufferedChunkSize) } type checkingChunkStore struct { chunks.ChunkStore a *assert.Assertions expectedOrder hash.HashSlice } func (cbs *checkingChunkStore) expect(rs ...Ref) { for _, r := range rs { cbs.expectedOrder = append(cbs.expectedOrder, r.TargetHash()) } } func (cbs *checkingChunkStore) Put(c chunks.Chunk) { if cbs.a.NotZero(len(cbs.expectedOrder), "Unexpected Put of %s", c.Hash()) { cbs.a.Equal(cbs.expectedOrder[0], c.Hash()) cbs.expectedOrder = cbs.expectedOrder[1:] } cbs.ChunkStore.Put(c) } func (cbs *checkingChunkStore) Flush() { cbs.a.Empty(cbs.expectedOrder) } func TestFlushOrder(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} ccs := &checkingChunkStore{storage.NewView(), assert, nil} vs := NewValueStore(ccs) // Graph, which should be flushed grandchildren-first, bottom-up // l // / \ // ml1 ml2 // / \ \ // b ml f // / \ // s n // // Expected order: s, n, b, ml, f, ml1, ml2, l s := String("oy") n := Number(42) sr, nr := vs.WriteValue(s), vs.WriteValue(n) ccs.expect(sr, nr) ml := NewList(vs, sr, nr) b := NewEmptyBlob(vs) br, mlr := vs.WriteValue(b), vs.WriteValue(ml) ccs.expect(br, mlr) ml1 := NewList(vs, br, mlr) f := Bool(false) fr := vs.WriteValue(f) ccs.expect(fr) ml2 := NewList(vs, fr) ml1r, ml2r := vs.WriteValue(ml1), vs.WriteValue(ml2) ccs.expect(ml1r, ml2r) l := NewList(vs, ml1r, ml2r) r := vs.WriteValue(l) ccs.expect(r) vs.Commit(vs.Root(), vs.Root()) } func TestFlushOverSize(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} ccs := &checkingChunkStore{storage.NewView(), assert, nil} vs := newValueStoreWithCacheAndPending(ccs, 0, 30) s := String("oy") sr := vs.WriteValue(s) ccs.expect(sr) NewList(vs, sr) // will write the root chunk } func TestTolerateTopDown(t *testing.T) { assert := assert.New(t) storage := &chunks.TestStorage{} ccs := &checkingChunkStore{storage.NewView(), assert, nil} vs := NewValueStore(ccs) // Once the L-ML-S portion of this graph is written once, it's legal to make a Struct ST that contains a ref directly to ML and write it. Then you can write S and ML and Flush ST, which contitutes top-down writing. // L ST // \ / // ML // / // S S := String("oy") sr := vs.WriteValue(S) ccs.expect(sr) ML := NewList(vs, sr) mlr := vs.WriteValue(ML) ccs.expect(mlr) L := NewList(vs, mlr) lr := vs.WriteValue(L) ccs.expect(lr) vs.Commit(vs.Root(), vs.Root()) assert.Zero(len(vs.bufferedChunks)) ST := NewStruct("", StructData{"r": mlr}) str := vs.WriteValue(ST) // ST into bufferedChunks vs.WriteValue(S) // S into bufferedChunks vs.WriteValue(ML) // ML into bufferedChunks AND withBufferedChunks // At this point, ValueStore believes ST is a standalone chunk, and that ML -> S // So, it'll look at ML, the one parent it knows about, first and write its child (S). Then, it'll write ML, and then it'll flush the remaining buffered chunks, which is just ST. ccs.expect(sr, mlr, str) vs.Commit(vs.Root(), vs.Root()) } func TestPanicOnBadVersion(t *testing.T) { storage := &chunks.MemoryStorage{} t.Run("Read", func(t *testing.T) { cvs := NewValueStore(&badVersionStore{ChunkStore: storage.NewView()}) assert.Panics(t, func() { cvs.ReadValue(hash.Hash{}) }) }) t.Run("Write", func(t *testing.T) { cvs := NewValueStore(&badVersionStore{ChunkStore: storage.NewView()}) assert.Panics(t, func() { cvs.WriteValue(NewEmptyBlob(cvs)) cvs.Commit(cvs.Root(), cvs.Root()) }) }) } func TestPanicIfDangling(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() r := NewRef(Bool(true)) l := NewList(vs, r) vs.WriteValue(l) assert.Panics(func() { vs.Commit(vs.Root(), vs.Root()) }) } func TestSkipEnforceCompleteness(t *testing.T) { vs := newTestValueStore() vs.SetEnforceCompleteness(false) r := NewRef(Bool(true)) l := NewList(vs, r) vs.WriteValue(l) vs.Commit(vs.Root(), vs.Root()) } type badVersionStore struct { chunks.ChunkStore } func (b *badVersionStore) Version() string { return "BAD" } ================================================ FILE: go/types/walk.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import "github.com/attic-labs/noms/go/hash" type SkipValueCallback func(v Value) bool // WalkValues loads prolly trees progressively by walking down the tree. We don't wants to invoke // the value callback on internal sub-trees (which are valid values) because they are not logical // values in the graph type valueRec struct { v Value cb bool } const maxRefCount = 1 << 12 // ~16MB of data // WalkValues recursively walks over all types.Values reachable from r and calls cb on them. func WalkValues(target Value, vr ValueReader, cb SkipValueCallback) { visited := hash.HashSet{} refs := map[hash.Hash]bool{} values := []valueRec{{target, true}} for len(values) > 0 || len(refs) > 0 { for len(values) > 0 { rec := values[len(values)-1] values = values[:len(values)-1] v := rec.v if rec.cb && cb(v) { continue } if _, ok := v.(Blob); ok { continue // don't traverse into blob ptrees } if r, ok := v.(Ref); ok { refs[r.TargetHash()] = true continue } if col, ok := v.(Collection); ok && !col.asSequence().isLeaf() { col.WalkRefs(func(r Ref) { refs[r.TargetHash()] = false }) continue } v.WalkValues(func(sv Value) { values = append(values, valueRec{sv, true}) }) } if len(refs) == 0 { continue } hs := make(hash.HashSlice, 0, len(refs)) oldRefs := refs refs = map[hash.Hash]bool{} for h := range oldRefs { if _, ok := visited[h]; ok { continue } if len(hs) >= maxRefCount { refs[h] = oldRefs[h] continue } hs = append(hs, h) visited.Insert(h) } if len(hs) > 0 { readValues := vr.ReadManyValues(hs) for i, sv := range readValues { values = append(values, valueRec{sv, oldRefs[hs[i]]}) } } } } func mightContainStructs(t *Type) (mightHaveStructs bool) { if t.TargetKind() == StructKind || t.TargetKind() == ValueKind { mightHaveStructs = true return } t.WalkValues(func(v Value) { mightHaveStructs = mightHaveStructs || mightContainStructs(v.(*Type)) }) return } ================================================ FILE: go/types/walk_refs.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" ) // WalkRefs calls cb() on each Ref that can be decoded from |c|. The results // are precisely equal to DecodeValue(c).WalkRefs(cb), but this should be much // faster. func WalkRefs(c chunks.Chunk, cb RefCallback) { walkRefs(c.Data(), cb) } func walkRefs(data []byte, cb RefCallback) { rw := newRefWalker(data) rw.walkValue(cb) } type refWalker struct { typedBinaryNomsReader } func newRefWalker(buff []byte) refWalker { nr := binaryNomsReader{buff, 0} return refWalker{typedBinaryNomsReader{nr, false}} } func (r *refWalker) walkRef(cb RefCallback) { cb(readRef(&(r.typedBinaryNomsReader))) } func (r *refWalker) walkBlobLeafSequence() { size := r.readCount() r.offset += uint32(size) } func (r *refWalker) walkValueSequence(cb RefCallback) { count := int(r.readCount()) for i := 0; i < count; i++ { r.walkValue(cb) } } func (r *refWalker) walkList(cb RefCallback) { r.walkListOrSet(ListKind, cb) } func (r *refWalker) walkSet(cb RefCallback) { r.walkListOrSet(SetKind, cb) } func (r *refWalker) walkListOrSet(kind NomsKind, cb RefCallback) { r.skipKind() level := r.readCount() if level > 0 { r.walkMetaSequence(kind, level, cb) } else { r.walkValueSequence(cb) } } func (r *refWalker) walkMap(cb RefCallback) { r.skipKind() level := r.readCount() if level > 0 { r.walkMetaSequence(MapKind, level, cb) } else { r.walkMapLeafSequence(cb) } } func (r *refWalker) walkBlob(cb RefCallback) { r.skipKind() level := r.readCount() if level > 0 { r.walkMetaSequence(BlobKind, level, cb) } else { r.walkBlobLeafSequence() } } func (r *refWalker) walkMapLeafSequence(cb RefCallback) { count := r.readCount() for i := uint64(0); i < count; i++ { r.walkValue(cb) // k r.walkValue(cb) // v } } func (r *refWalker) walkMetaSequence(k NomsKind, level uint64, cb RefCallback) { count := r.readCount() for i := uint64(0); i < count; i++ { r.walkRef(cb) // ref to child sequence r.skipOrderedKey() r.skipCount() // numLeaves } } func (r *refWalker) skipOrderedKey() { switch r.peekKind() { case hashKind: r.skipKind() r.skipHash() default: r.walkValue(func(r Ref) {}) // max Value in subtree reachable from here } } func (r *refWalker) walkValue(cb RefCallback) { k := r.peekKind() switch k { case BlobKind: r.walkBlob(cb) case BoolKind: r.skipKind() r.skipBool() case NumberKind: r.skipKind() r.skipNumber() case StringKind: r.skipKind() r.skipString() case ListKind: r.walkList(cb) case MapKind: r.walkMap(cb) case RefKind: r.walkRef(cb) case SetKind: r.walkSet(cb) case StructKind: r.walkStruct(cb) case TypeKind: r.skipKind() r.skipType() case CycleKind, UnionKind, ValueKind: d.Panic("A value instance can never have type %s", k) default: panic("not reachable") } } func (r *refWalker) walkStruct(cb RefCallback) { walkStruct(r, cb) } ================================================ FILE: go/types/walk_refs_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "io" "math/rand" "testing" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func TestWalkRefs(t *testing.T) { runTest := func(v Value, t *testing.T) { assert := assert.New(t) expected := hash.HashSlice{} v.WalkRefs(func(r Ref) { expected = append(expected, r.TargetHash()) }) WalkRefs(EncodeValue(v), func(r Ref) { if assert.True(len(expected) > 0) { assert.Equal(expected[0], r.TargetHash()) expected = expected[1:] } }) assert.Len(expected, 0) } t.Run("SingleRef", func(t *testing.T) { t.Parallel() t.Run("Typed", func(t *testing.T) { vrw := newTestValueStore() s := NewStruct("", StructData{"n": Number(1)}) runTest(NewRef(NewMap(vrw, s, Number(2))), t) }) t.Run("OfValue", func(t *testing.T) { runTest(ToRefOfValue(NewRef(Bool(false))), t) }) }) t.Run("Struct", func(t *testing.T) { t.Parallel() data := StructData{ "ref": NewRef(Bool(false)), "num": Number(42), } runTest(NewStruct("nom", data), t) }) // must return a slice with an even number of elements newValueSlice := func(r *rand.Rand) ValueSlice { vs := make(ValueSlice, 256) for i := range vs { vs[i] = NewStruct("", StructData{"n": Number(r.Uint64())}) } return vs } t.Run("List", func(t *testing.T) { t.Parallel() vrw := newTestValueStore() r := rand.New(rand.NewSource(0)) t.Run("OfRefs", func(t *testing.T) { l := NewList(vrw, vrw.WriteValue(Number(42)), vrw.WriteValue(Number(0))) runTest(l, t) }) t.Run("Chunked", func(t *testing.T) { l := NewList(vrw, newValueSlice(r)...) for l.sequence.isLeaf() { l = l.Concat(NewList(vrw, newValueSlice(r)...)) } runTest(l, t) }) }) t.Run("Set", func(t *testing.T) { t.Parallel() vrw := newTestValueStore() r := rand.New(rand.NewSource(0)) t.Run("OfRefs", func(t *testing.T) { s := NewSet(vrw, vrw.WriteValue(Number(42)), vrw.WriteValue(Number(0))) runTest(s, t) }) t.Run("Chunked", func(t *testing.T) { s := NewSet(vrw, newValueSlice(r)...) for s.isLeaf() { e := s.Edit() e = e.Insert(newValueSlice(r)...) s = e.Set() } runTest(s, t) }) }) t.Run("Map", func(t *testing.T) { t.Parallel() vrw := newTestValueStore() r := rand.New(rand.NewSource(0)) t.Run("OfRefs", func(t *testing.T) { m := NewMap(vrw, vrw.WriteValue(Number(42)), vrw.WriteValue(Number(0))) runTest(m, t) }) t.Run("Chunked", func(t *testing.T) { m := NewMap(vrw, newValueSlice(r)...) for m.isLeaf() { e := m.Edit() vs := newValueSlice(r) for i := 0; i < len(vs); i += 2 { e = e.Set(vs[i], vs[i+1]) } m = e.Map() } runTest(m, t) }) }) t.Run("Blob", func(t *testing.T) { t.Parallel() vrw := newTestValueStore() r := rand.New(rand.NewSource(0)) scratch := make([]byte, 1024) freshRandomBytes := func() io.Reader { r.Read(scratch) return bytes.NewReader(scratch) } b := NewBlob(vrw, freshRandomBytes()) for b.sequence.isLeaf() { b = b.Concat(NewBlob(vrw, freshRandomBytes())) } runTest(b, t) }) } ================================================ FILE: go/types/walk_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package types import ( "bytes" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/stretchr/testify/suite" ) func TestWalkTestSuite(t *testing.T) { suite.Run(t, &WalkTestSuite{}) } func TestWalkAllTestSuite(t *testing.T) { suite.Run(t, &WalkAllTestSuite{}) } type WalkAllTestSuite struct { suite.Suite vs *ValueStore ts *chunks.TestStoreView } func (suite *WalkAllTestSuite) SetupTest() { storage := &chunks.TestStorage{} suite.ts = storage.NewView() suite.vs = NewValueStore(suite.ts) } func (suite *WalkAllTestSuite) assertCallbackCount(v Value, expected int) { actual := 0 WalkValues(v, suite.vs, func(c Value) (stop bool) { actual++ return }) suite.Equal(expected, actual) } func (suite *WalkAllTestSuite) assertVisitedOnce(root, v Value) { actual := 0 WalkValues(v, suite.vs, func(c Value) bool { if c == v { actual++ } return false }) suite.Equal(1, actual) } func (suite *WalkAllTestSuite) TestWalkValuesDuplicates() { dup := suite.NewList(Number(9), Number(10), Number(11), Number(12), Number(13)) l := suite.NewList(Number(8), dup, dup) suite.assertCallbackCount(l, 11) } func (suite *WalkAllTestSuite) TestWalkAvoidBlobChunks() { buff := randomBuff(16) blob := NewBlob(suite.vs, bytes.NewReader(buff)) r := suite.vs.WriteValue(blob) suite.True(r.Height() > 1) outBlob := suite.vs.ReadValue(r.TargetHash()).(Blob) suite.Equal(suite.ts.Reads, 0) suite.assertCallbackCount(outBlob, 1) suite.Equal(suite.ts.Reads, 0) } func (suite *WalkAllTestSuite) TestWalkPrimitives() { suite.assertCallbackCount(suite.vs.WriteValue(Number(0.0)), 2) suite.assertCallbackCount(suite.vs.WriteValue(String("hello")), 2) } func (suite *WalkAllTestSuite) TestWalkComposites() { suite.assertCallbackCount(suite.NewList(), 2) suite.assertCallbackCount(suite.NewList(Bool(false), Number(8)), 4) suite.assertCallbackCount(suite.NewSet(), 2) suite.assertCallbackCount(suite.NewSet(Bool(false), Number(8)), 4) suite.assertCallbackCount(suite.NewMap(), 2) suite.assertCallbackCount(suite.NewMap(Number(8), Bool(true), Number(0), Bool(false)), 6) } func (suite *WalkAllTestSuite) TestWalkMultilevelList() { count := 1 << 12 nums := make([]Value, count) for i := 0; i < count; i++ { nums[i] = Number(i) } l := NewList(suite.vs, nums...) suite.True(NewRef(l).Height() > 1) suite.assertCallbackCount(l, count+1) r := suite.vs.WriteValue(l) outList := suite.vs.ReadValue(r.TargetHash()) suite.assertCallbackCount(outList, count+1) } func (suite *WalkAllTestSuite) TestWalkType() { t := MakeStructTypeFromFields("TestStruct", FieldMap{ "s": StringType, "b": BoolType, "n": NumberType, "bl": BlobType, "t": TypeType, "v": ValueType, }) suite.assertVisitedOnce(t, t) suite.assertVisitedOnce(t, BoolType) suite.assertVisitedOnce(t, NumberType) suite.assertVisitedOnce(t, StringType) suite.assertVisitedOnce(t, BlobType) suite.assertVisitedOnce(t, TypeType) suite.assertVisitedOnce(t, ValueType) { t2 := MakeListType(BoolType) suite.assertVisitedOnce(t2, t2) suite.assertVisitedOnce(t2, BoolType) } { t2 := MakeSetType(BoolType) suite.assertVisitedOnce(t2, t2) suite.assertVisitedOnce(t2, BoolType) } { t2 := MakeRefType(BoolType) suite.assertVisitedOnce(t2, t2) suite.assertVisitedOnce(t2, BoolType) } t2 := MakeMapType(NumberType, StringType) suite.assertVisitedOnce(t2, t2) suite.assertVisitedOnce(t2, NumberType) suite.assertVisitedOnce(t2, StringType) t3 := MakeUnionType(NumberType, StringType, BoolType) suite.assertVisitedOnce(t3, t3) suite.assertVisitedOnce(t3, BoolType) suite.assertVisitedOnce(t3, NumberType) suite.assertVisitedOnce(t3, StringType) t4 := MakeCycleType("ABC") suite.assertVisitedOnce(t4, t4) } func (suite *WalkTestSuite) skipWorker(composite Value) (reached ValueSlice) { WalkValues(composite, suite.vs, func(v Value) bool { suite.False(v.Equals(suite.deadValue), "Should never have reached %+v", suite.deadValue) reached = append(reached, v) return v.Equals(suite.mustSkip) }) return } // Skipping a sub-tree must allow other items in the list to be processed. func (suite *WalkTestSuite) TestSkipListElement() { wholeList := NewList(suite.vs, suite.mustSkip, suite.shouldSee, suite.shouldSee) reached := suite.skipWorker(wholeList) for _, v := range []Value{wholeList, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { suite.True(reached.Contains(v), "Doesn't contain %+v", v) } suite.Len(reached, 6) } func (suite *WalkTestSuite) TestSkipSetElement() { wholeSet := NewSet(suite.vs, suite.mustSkip, suite.shouldSee).Edit().Insert(suite.shouldSee).Set() reached := suite.skipWorker(wholeSet) for _, v := range []Value{wholeSet, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { suite.True(reached.Contains(v), "Doesn't contain %+v", v) } suite.Len(reached, 4) } func (suite *WalkTestSuite) TestSkipMapValue() { shouldAlsoSeeItem := String("Also good") shouldAlsoSee := NewSet(suite.vs, shouldAlsoSeeItem) wholeMap := NewMap(suite.vs, suite.shouldSee, suite.mustSkip, shouldAlsoSee, suite.shouldSee) reached := suite.skipWorker(wholeMap) for _, v := range []Value{wholeMap, suite.shouldSee, suite.shouldSeeItem, suite.mustSkip, shouldAlsoSee, shouldAlsoSeeItem} { suite.True(reached.Contains(v), "Doesn't contain %+v", v) } suite.Len(reached, 8) } func (suite *WalkTestSuite) TestSkipMapKey() { wholeMap := NewMap(suite.vs, suite.mustSkip, suite.shouldSee, suite.shouldSee, suite.shouldSee) reached := suite.skipWorker(wholeMap) for _, v := range []Value{wholeMap, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { suite.True(reached.Contains(v), "Doesn't contain %+v", v) } suite.Len(reached, 8) } func (suite *WalkAllTestSuite) NewList(vs ...Value) Ref { v := NewList(suite.vs, vs...) return suite.vs.WriteValue(v) } func (suite *WalkAllTestSuite) NewMap(vs ...Value) Ref { v := NewMap(suite.vs, vs...) return suite.vs.WriteValue(v) } func (suite *WalkAllTestSuite) NewSet(vs ...Value) Ref { v := NewSet(suite.vs, vs...) return suite.vs.WriteValue(v) } func (suite *WalkAllTestSuite) TestWalkNestedComposites() { suite.assertCallbackCount(suite.NewList(suite.NewSet(), Number(8)), 5) suite.assertCallbackCount(suite.NewSet(suite.NewList(), suite.NewSet()), 6) // {"string": "string", // "list": [false true], // "map": {"nested": "string"} // "mtlist": [] // "set": [5 7 8] // []: "wow" // } nested := suite.NewMap( String("string"), String("string"), String("list"), suite.NewList(Bool(false), Bool(true)), String("map"), suite.NewMap(String("nested"), String("string")), String("mtlist"), suite.NewList(), String("set"), suite.NewSet(Number(5), Number(7), Number(8)), suite.NewList(), String("wow"), // note that the dupe list chunk is skipped ) suite.assertCallbackCount(nested, 25) } type WalkTestSuite struct { WalkAllTestSuite shouldSeeItem Value shouldSee Value mustSkip Value deadValue Value } func (suite *WalkTestSuite) SetupTest() { storage := &chunks.TestStorage{} suite.ts = storage.NewView() suite.vs = NewValueStore(suite.ts) suite.shouldSeeItem = String("zzz") suite.shouldSee = NewList(suite.vs, suite.shouldSeeItem) suite.deadValue = Number(0xDEADBEEF) suite.mustSkip = NewList(suite.vs, suite.deadValue) } ================================================ FILE: go/util/clienttest/client_test_suite.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package clienttest import ( "fmt" "io/ioutil" "os" "path" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/exit" "github.com/stretchr/testify/suite" ) const DefaultMemTableSize = 8 * (1 << 20) // 8MB type ClientTestSuite struct { suite.Suite TempDir string DBDir string DBDir2 string ExitStatus int out *os.File err *os.File } type ExitError struct { Code int } func (e ExitError) Error() string { return fmt.Sprintf("main exited with code: %d", e.Code) } func (suite *ClientTestSuite) SetupSuite() { dir, err := ioutil.TempDir(os.TempDir(), "nomstest") d.Chk.NoError(err) stdOutput, err := ioutil.TempFile(dir, "out") d.Chk.NoError(err) errOutput, err := ioutil.TempFile(dir, "err") d.Chk.NoError(err) suite.TempDir = dir suite.DBDir = path.Join(dir, "db") suite.DBDir2 = path.Join(suite.TempDir, "db2") suite.out = stdOutput suite.err = errOutput exit.Exit = MockExit os.Mkdir(suite.DBDir, 0777) os.Mkdir(suite.DBDir2, 0777) } func (suite *ClientTestSuite) TearDownSuite() { suite.out.Close() suite.err.Close() defer d.Chk.NoError(os.RemoveAll(suite.TempDir)) } // MustRun is a wrapper around Run that will panic on Exit or Panic func (suite *ClientTestSuite) MustRun(m func(), args []string) (stdout string, stderr string) { var err interface{} if stdout, stderr, err = suite.Run(m, args); err != nil { panic(err) } return } // Run will execute a function passing to it commandline args, and captures stdout,stderr. // If m() panics the panic is caught, and returned with recoveredError // If m() calls exit.Exit() m() will panic and return ExitError with recoveredError func (suite *ClientTestSuite) Run(m func(), args []string) (stdout string, stderr string, recoveredErr interface{}) { fmt.Println(args) origArgs := os.Args origOut := os.Stdout origErr := os.Stderr os.Args = append([]string{"cmd"}, args...) os.Stdout = suite.out os.Stderr = suite.err defer func() { recoveredErr = recover() // Reset everything right away so that error-checking below goes to terminal. os.Args = origArgs os.Stdout = origOut os.Stderr = origErr _, err := suite.out.Seek(0, 0) d.Chk.NoError(err) capturedOut, err := ioutil.ReadAll(suite.out) d.Chk.NoError(err) _, err = suite.out.Seek(0, 0) d.Chk.NoError(err) err = suite.out.Truncate(0) d.Chk.NoError(err) _, err = suite.err.Seek(0, 0) d.Chk.NoError(err) capturedErr, err := ioutil.ReadAll(suite.err) d.Chk.NoError(err) _, err = suite.err.Seek(0, 0) d.Chk.NoError(err) err = suite.err.Truncate(0) d.Chk.NoError(err) stdout, stderr = string(capturedOut), string(capturedErr) }() suite.ExitStatus = 0 m() return } // Mock exit.Exit() implementation for use during testing. func MockExit(status int) { panic(ExitError{status}) } ================================================ FILE: go/util/datetime/date_time.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package datetime implements marshalling of Go DateTime values into Noms structs // with type DateTimeType. package datetime import ( "math" "time" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/types" ) const ( datetypename = "DateTime" hrsEncodingName = "noms-datetime" ) // DateTime implements marshaling of time.Time to and from Noms. type DateTime struct { time.Time } // DateTimeType is the Noms type used to represent date time objects in Noms. // The field secSinceEpoch may contain fractions in cases where seconds are // not sufficient. var DateTimeType = types.MakeStructTypeFromFields(datetypename, types.FieldMap{ "secSinceEpoch": types.NumberType, }) var dateTimeTemplate = types.MakeStructTemplate(datetypename, []string{"secSinceEpoch"}) // Epoch is the unix Epoch. This time is very consistent, // which makes it useful for testing or checking for uninitialized values var Epoch = DateTime{time.Unix(0, 0)} func init() { RegisterHRSCommenter(time.Local) } // Now is an alias for a DateTime initialized with time.Now() func Now() DateTime { return DateTime{time.Now()} } // MarshalNoms makes DateTime implement marshal.Marshaler and it makes // DateTime marshal into a Noms struct with type DateTimeType. func (dt DateTime) MarshalNoms(vrw types.ValueReadWriter) (types.Value, error) { return dateTimeTemplate.NewStruct([]types.Value{types.Number(float64(dt.Unix()) + float64(dt.Nanosecond())*1e-9)}), nil } // MarshalNomsType makes DateTime implement marshal.TypeMarshaler and it // allows marshal.MarshalType to work with DateTime. func (dt DateTime) MarshalNomsType() (*types.Type, error) { return DateTimeType, nil } // UnmarshalNoms makes DateTime implement marshal.Unmarshaler and it allows // Noms struct with type DateTimeType able to be unmarshaled onto a DateTime // Go struct func (dt *DateTime) UnmarshalNoms(v types.Value) error { strct := struct { SecSinceEpoch float64 }{} err := marshal.Unmarshal(v, &strct) if err != nil { return err } s, frac := math.Modf(strct.SecSinceEpoch) *dt = DateTime{time.Unix(int64(s), int64(frac*1e9))} return nil } type DateTimeCommenter struct { tz *time.Location } func (c DateTimeCommenter) Comment(v types.Value) string { if !types.IsValueSubtypeOf(v, DateTimeType) { return "" } var dt DateTime marshal.MustUnmarshal(v, &dt) return dt.In(c.tz).Format(time.RFC3339) } func RegisterHRSCommenter(tz *time.Location) { hrsCommenter := DateTimeCommenter{tz: tz} types.RegisterHRSCommenter(datetypename, hrsEncodingName, hrsCommenter) } ================================================ FILE: go/util/datetime/date_time_test.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package datetime import ( "strings" "testing" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestBasics(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() // Since we are using float64 in noms we cannot represent all possible times. dt := DateTime{time.Unix(1234567, 1234567)} nomsValue, err := marshal.Marshal(vs, dt) assert.NoError(err) var dt2 DateTime err = marshal.Unmarshal(nomsValue, &dt2) assert.NoError(err) assert.True(dt.Equal(dt2.Time)) } func TestUnmarshal(t *testing.T) { assert := assert.New(t) test := func(v types.Struct, t time.Time) { var dt DateTime err := marshal.Unmarshal(v, &dt) assert.NoError(err) assert.True(dt.Equal(t)) } for _, name := range []string{"DateTime", "Date", "xxx", ""} { test(types.NewStruct(name, types.StructData{ "secSinceEpoch": types.Number(42), }), time.Unix(42, 0)) } test(types.NewStruct("", types.StructData{ "secSinceEpoch": types.Number(42), "extra": types.String("field"), }), time.Unix(42, 0)) } func TestUnmarshalInvalid(t *testing.T) { assert := assert.New(t) test := func(v types.Value) { var dt DateTime err := marshal.Unmarshal(v, &dt) assert.Error(err) } test(types.Number(42)) test(types.NewStruct("DateTime", types.StructData{})) test(types.NewStruct("DateTime", types.StructData{ "secSinceEpoch": types.String(42), })) test(types.NewStruct("DateTime", types.StructData{ "SecSinceEpoch": types.Number(42), })) test(types.NewStruct("DateTime", types.StructData{ "msSinceEpoch": types.Number(42), })) } func TestMarshal(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() test := func(dt DateTime, expected float64) { v, err := marshal.Marshal(vs, dt) assert.NoError(err) assert.True(types.NewStruct("DateTime", types.StructData{ "secSinceEpoch": types.Number(expected), }).Equals(v)) } test(DateTime{time.Unix(0, 0)}, 0) test(DateTime{time.Unix(42, 0)}, 42) test(DateTime{time.Unix(42, 123456789)}, 42.123456789) test(DateTime{time.Unix(123456789, 123456789)}, 123456789.123456789) test(DateTime{time.Unix(-42, 0)}, -42) test(DateTime{time.Unix(-42, -123456789)}, -42.123456789) test(DateTime{time.Unix(-123456789, -123456789)}, -123456789.123456789) } func TestMarshalType(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() dt := DateTime{time.Unix(0, 0)} typ := marshal.MustMarshalType(dt) assert.Equal(DateTimeType, typ) v := marshal.MustMarshal(vs, dt) assert.Equal(typ, types.TypeOf(v)) } func newTestValueStore() *types.ValueStore { st := &chunks.TestStorage{} return types.NewValueStore(st.NewView()) } func TestZeroValues(t *testing.T) { assert := assert.New(t) vs := newTestValueStore() defer vs.Close() dt1 := DateTime{} assert.True(dt1.IsZero()) nomsDate, _ := dt1.MarshalNoms(vs) dt2 := DateTime{} marshal.Unmarshal(nomsDate, &dt2) assert.True(dt2.IsZero()) dt3 := DateTime{} dt3.UnmarshalNoms(nomsDate) assert.True(dt3.IsZero()) } func TestString(t *testing.T) { assert := assert.New(t) dt := DateTime{time.Unix(1234567, 1234567)} // Don't test the actual output since that assert.IsType(dt.String(), "s") } func TestEpoch(t *testing.T) { assert := assert.New(t) assert.Equal(Epoch, DateTime{time.Unix(0, 0)}) } func TestHRSComment(t *testing.T) { a := assert.New(t) vs := newTestValueStore() dt := Now() mdt := marshal.MustMarshal(vs, dt) exp := dt.Format(time.RFC3339) s1 := types.EncodedValue(mdt) a.True(strings.Contains(s1, "{ // "+exp)) RegisterHRSCommenter(time.UTC) exp = dt.In(time.UTC).Format((time.RFC3339)) s1 = types.EncodedValue(mdt) a.True(strings.Contains(s1, "{ // "+exp)) types.UnregisterHRSCommenter(datetypename, hrsEncodingName) s1 = types.EncodedValue(mdt) a.False(strings.Contains(s1, "{ // 20")) } ================================================ FILE: go/util/exit/exit.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package exit provides a mockable implementation of os.Exit. // That's all! package exit import ( "os" ) var def = func(code int) { os.Exit(code) } var Exit = def // Reset sets the implementation of Exit() to the default. func Reset() { Exit = def } // Fail exits with a failure status. func Fail() { Exit(1) } // Success exits with a success status. func Success() { Exit(0) } ================================================ FILE: go/util/functions/all.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package functions import "sync" // All runs all functions in |fs| in parallel, and returns when all functions have returned. func All(fs ...func()) { wg := &sync.WaitGroup{} wg.Add(len(fs)) for _, f_ := range fs { f := f_ go func() { f() wg.Done() }() } wg.Wait() } ================================================ FILE: go/util/functions/all_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package functions import ( "testing" "github.com/stretchr/testify/assert" ) func TestAll(t *testing.T) { assert := assert.New(t) // Set |res| via |ch| to test it's running in parallel - if not, they'll deadlock. var res int ch := make(chan int) All(func() { ch <- 42 }, func() { res = <-ch }) assert.Equal(42, res) } ================================================ FILE: go/util/json/from_json.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package json import ( "encoding/json" "io" "reflect" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) func nomsValueFromDecodedJSONBase(vrw types.ValueReadWriter, o interface{}, useStruct bool) types.Value { switch o := o.(type) { case string: return types.String(o) case bool: return types.Bool(o) case float64: return types.Number(o) case nil: return nil case []interface{}: items := make([]types.Value, 0, len(o)) for _, v := range o { nv := nomsValueFromDecodedJSONBase(vrw, v, useStruct) if nv != nil { items = append(items, nv) } } return types.NewList(vrw, items...) case map[string]interface{}: var v types.Value if useStruct { structName := "" fields := make(types.StructData, len(o)) for k, v := range o { nv := nomsValueFromDecodedJSONBase(vrw, v, useStruct) if nv != nil { k := types.EscapeStructField(k) fields[k] = nv } } v = types.NewStruct(structName, fields) } else { kv := make([]types.Value, 0, len(o)*2) for k, v := range o { nv := nomsValueFromDecodedJSONBase(vrw, v, useStruct) if nv != nil { kv = append(kv, types.String(k), nv) } } v = types.NewMap(vrw, kv...) } return v default: d.Chk.Fail("Nomsification failed.", "I don't understand %+v, which is of type %s!\n", o, reflect.TypeOf(o).String()) } return nil } // NomsValueFromDecodedJSON takes a generic Go interface{} and recursively // tries to resolve the types within so that it can build up and return // a Noms Value with the same structure. // // Currently, the only types supported are the Go versions of legal JSON types: // Primitives: // - float64 // - bool // - string // - nil // // Composites: // - []interface{} // - map[string]interface{} func NomsValueFromDecodedJSON(vrw types.ValueReadWriter, o interface{}, useStruct bool) types.Value { return nomsValueFromDecodedJSONBase(vrw, o, useStruct) } func FromJSON(r io.Reader, vrw types.ValueReadWriter, opts FromOptions) (types.Value, error) { dec := json.NewDecoder(r) // TODO: This is pretty inefficient. It would be better to parse the JSON directly into Noms values, // rather than going through a pile of Go interfaces. var pile interface{} err := dec.Decode(&pile) if err != nil { return nil, err } return NomsValueFromDecodedJSON(vrw, pile, opts.Structs), nil } // FromOptions controls how FromJSON works. type FromOptions struct { // If true, JSON objects are decoded into Noms Structs. Otherwise, they are decoded into Maps. Structs bool } ================================================ FILE: go/util/json/from_json_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package json import ( "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/suite" ) func TestLibTestSuite(t *testing.T) { suite.Run(t, &LibTestSuite{}) } type LibTestSuite struct { suite.Suite vs *types.ValueStore } func (suite *LibTestSuite) SetupTest() { st := &chunks.TestStorage{} suite.vs = types.NewValueStore(st.NewView()) } func (suite *LibTestSuite) TearDownTest() { suite.vs.Close() } func (suite *LibTestSuite) TestPrimitiveTypes() { vs := suite.vs suite.EqualValues(types.String("expected"), NomsValueFromDecodedJSON(vs, "expected", false)) suite.EqualValues(types.Bool(false), NomsValueFromDecodedJSON(vs, false, false)) suite.EqualValues(types.Number(1.7), NomsValueFromDecodedJSON(vs, 1.7, false)) suite.False(NomsValueFromDecodedJSON(vs, 1.7, false).Equals(types.Bool(true))) } func (suite *LibTestSuite) TestCompositeTypes() { vs := suite.vs // [false true] suite.EqualValues( types.NewList(vs).Edit().Append(types.Bool(false)).Append(types.Bool(true)).List(), NomsValueFromDecodedJSON(vs, []interface{}{false, true}, false)) // [[false true]] suite.EqualValues( types.NewList(vs).Edit().Append( types.NewList(vs).Edit().Append(types.Bool(false)).Append(types.Bool(true)).List()).List(), NomsValueFromDecodedJSON(vs, []interface{}{[]interface{}{false, true}}, false)) // {"string": "string", // "list": [false true], // "map": {"nested": "string"} // } m := types.NewMap( vs, types.String("string"), types.String("string"), types.String("list"), types.NewList(vs).Edit().Append(types.Bool(false)).Append(types.Bool(true)).List(), types.String("map"), types.NewMap( vs, types.String("nested"), types.String("string"))) o := NomsValueFromDecodedJSON(vs, map[string]interface{}{ "string": "string", "list": []interface{}{false, true}, "map": map[string]interface{}{"nested": "string"}, }, false) suite.True(m.Equals(o)) } func (suite *LibTestSuite) TestCompositeTypeWithStruct() { vs := suite.vs // {"string": "string", // "list": [false true], // "struct": {"nested": "string"} // } tstruct := types.NewStruct("", types.StructData{ "string": types.String("string"), "list": types.NewList(vs).Edit().Append(types.Bool(false)).Append(types.Bool(true)).List(), "struct": types.NewStruct("", types.StructData{ "nested": types.String("string"), }), }) o := NomsValueFromDecodedJSON(vs, map[string]interface{}{ "string": "string", "list": []interface{}{false, true}, "struct": map[string]interface{}{"nested": "string"}, }, true) suite.True(tstruct.Equals(o)) } func (suite *LibTestSuite) TestPanicOnUnsupportedType() { vs := suite.vs suite.Panics(func() { NomsValueFromDecodedJSON(vs, map[int]string{1: "one"}, false) }, "Should panic on map[int]string!") } ================================================ FILE: go/util/json/to_json.go ================================================ // Copyright 2019 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package json import ( "encoding/json" "errors" "fmt" "io" "github.com/attic-labs/noms/go/types" ) // ToJSON encodes a Noms value as JSON. func ToJSON(v types.Value, w io.Writer, opts ToOptions) error { // TODO: This is a quick hack that is expedient. We should marshal directly to the writer without // allocating a bunch of Go values. p, err := toPile(v, opts) if err != nil { return err } enc := json.NewEncoder(w) enc.SetIndent("", opts.Indent) return enc.Encode(p) } // ToOptions controls how ToJSON works. type ToOptions struct { // Enable support for encoding Noms Lists. Lists are encoded as JSON arrays. Lists bool // Enable support for encoding Noms Maps. Maps are encoded as JSON objects. Maps bool // Enable support for encoding Noms Sets. Sets are encoded as JSON arrays. Sets bool // Enable support for encoding Noms Structs. Structs are encoded as JSON objects. Structs bool // String to use for indent when pretty-printing Indent string } func toPile(v types.Value, opts ToOptions) (ret interface{}, err error) { switch v := v.(type) { case types.Bool: return bool(v), nil case types.Number: return float64(v), nil case types.String: return string(v), nil case types.Struct: if !opts.Structs { return nil, errors.New("Struct marshaling not enabled") } r := map[string]interface{}{} if v.Name() != "" { return nil, errors.New("Named struct marshaling not supported") } v.IterFields(func(k string, cv types.Value) (stop bool) { var cp interface{} cp, err = toPile(cv, opts) if err != nil { return true } r[k] = cp return false }) return r, err case types.Map: if !opts.Maps { return nil, errors.New("Map marshaling not enabled") } r := make(map[string]interface{}, v.Len()) v.Iter(func(k, cv types.Value) (stop bool) { sk, ok := k.(types.String) if !ok { err = fmt.Errorf("Map key kind %s not supported", types.KindToString[k.Kind()]) return true } var cp interface{} cp, err = toPile(cv, opts) if err != nil { return true } r[string(sk)] = cp return false }) return r, err case types.List: if !opts.Lists { return nil, errors.New("List marshaling not enabled") } r := make([]interface{}, v.Len()) v.Iter(func(cv types.Value, i uint64) (stop bool) { var cp interface{} cp, err = toPile(cv, opts) if err != nil { return true } r[i] = cp return false }) return r, err case types.Set: if !opts.Sets { return nil, errors.New("Set marshaling not enabled") } r := make([]interface{}, 0, v.Len()) v.Iter(func(cv types.Value) (stop bool) { var cp interface{} cp, err = toPile(cv, opts) if err != nil { return true } r = append(r, cp) return false }) return r, err } return nil, fmt.Errorf("Unsupported kind: %s", types.KindToString[v.Kind()]) } ================================================ FILE: go/util/json/to_json_test.go ================================================ // Copyright 2019 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package json import ( "bytes" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/suite" ) func TestToJSONSuite(t *testing.T) { suite.Run(t, &ToJSONSuite{}) } type ToJSONSuite struct { suite.Suite vs *types.ValueStore } func (suite *ToJSONSuite) SetupTest() { st := &chunks.TestStorage{} suite.vs = types.NewValueStore(st.NewView()) } func (suite *ToJSONSuite) TearDownTest() { suite.vs.Close() } func (suite *ToJSONSuite) TestToJSON() { tc := []struct { desc string in types.Value opts ToOptions exp string expError string }{ {"true", types.Bool(true), ToOptions{}, "true", ""}, {"false", types.Bool(false), ToOptions{}, "false", ""}, {"42", types.Number(42), ToOptions{}, "42", ""}, {"88.8", types.Number(88.8), ToOptions{}, "88.8", ""}, {"empty string", types.String(""), ToOptions{}, `""`, ""}, {"foobar", types.String("foobar"), ToOptions{}, `"foobar"`, ""}, {"strings with newlines", types.String(`"\nmonkey`), ToOptions{}, `"\"\\nmonkey"`, ""}, {"structs when not enabled", types.NewStruct("", types.StructData{}), ToOptions{}, "", "Struct marshaling not enabled"}, {"named struct", types.NewStruct("Person", types.StructData{}), ToOptions{Structs: true}, "", "Named struct marshaling not supported"}, {"struct nested errors", types.NewStruct("", types.StructData{"foo": types.NewList(suite.vs)}), ToOptions{Structs: true}, "", "List marshaling not enabled"}, {"empty struct", types.NewStruct("", types.StructData{}), ToOptions{Structs: true}, "{}", ""}, {"non-empty struct", types.NewStruct("", types.StructData{"str": types.String("bar"), "num": types.Number(42)}), ToOptions{Structs: true}, `{"num":42,"str":"bar"}`, ""}, {"list when not enabled", types.NewList(suite.vs), ToOptions{}, "", "List marshaling not enabled"}, {"list nested errors", types.NewList(suite.vs, types.NewSet(suite.vs)), ToOptions{Lists: true}, "", "Set marshaling not enabled"}, {"empty list", types.NewList(suite.vs), ToOptions{Lists: true}, "[]", ""}, {"non-empty list", types.NewList(suite.vs, types.Number(42), types.String("foo")), ToOptions{Lists: true}, `[42,"foo"]`, ""}, {"sets when not enabled", types.NewSet(suite.vs), ToOptions{}, "", "Set marshaling not enabled"}, {"set nested errors", types.NewSet(suite.vs, types.NewList(suite.vs)), ToOptions{Sets: true}, "", "List marshaling not enabled"}, {"empty set", types.NewSet(suite.vs), ToOptions{Sets: true}, "[]", ""}, {"non-empty set", types.NewSet(suite.vs, types.Number(42), types.String("foo")), ToOptions{Sets: true}, `[42,"foo"]`, ""}, {"maps when not enabled", types.NewMap(suite.vs), ToOptions{}, "", "Map marshaling not enabled"}, {"map nested errors", types.NewMap(suite.vs, types.String("foo"), types.NewSet(suite.vs)), ToOptions{Maps: true}, "", "Set marshaling not enabled"}, {"map non-string key", types.NewMap(suite.vs, types.Number(42), types.Number(42)), ToOptions{Maps: true}, "", "Map key kind Number not supported"}, {"empty map", types.NewMap(suite.vs), ToOptions{Maps: true}, "{}", ""}, {"non-empty map", types.NewMap(suite.vs, types.String("foo"), types.String("bar"), types.String("baz"), types.Number(42)), ToOptions{Maps: true}, `{"baz":42,"foo":"bar"}`, ""}, {"complex value", types.NewStruct("", types.StructData{ "list": types.NewList(suite.vs, types.NewSet(suite.vs, types.NewMap(suite.vs, types.String("foo"), types.String("bar"), types.String("hot"), types.Number(42))))}), ToOptions{Structs: true, Lists: true, Sets: true, Maps: true}, `{"list":[[{"foo":"bar","hot":42}]]}`, ""}, } for _, t := range tc { buf := &bytes.Buffer{} err := ToJSON(t.in, buf, t.opts) if t.expError != "" { suite.EqualError(err, t.expError, t.desc) suite.Equal("", string(buf.Bytes()), t.desc) } else { suite.NoError(err) suite.Equal(t.exp+"\n", string(buf.Bytes()), t.desc) } } } ================================================ FILE: go/util/math/minmax.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package math // MaxInt returns the larger of x or y. func MaxInt(x, y int) int { if x > y { return x } return y } // MinInt returns the smaller of x or y. func MinInt(x, y int) int { if x < y { return x } return y } ================================================ FILE: go/util/outputpager/page_output.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package outputpager import ( "io" "os" "os/exec" "sync" "github.com/attic-labs/kingpin" goisatty "github.com/mattn/go-isatty" "github.com/attic-labs/noms/go/d" ) var ( noPager bool ) type Pager struct { Writer io.Writer stdin, stdout *os.File mtx *sync.Mutex doneCh chan struct{} } func Start() *Pager { if noPager || !IsStdoutTty() { return &Pager{os.Stdout, nil, nil, nil, nil} } lessPath, err := exec.LookPath("less") d.Chk.NoError(err) // -F ... Quit if entire file fits on first screen. // -S ... Chop (truncate) long lines rather than wrapping. // -R ... Output "raw" control characters. // -X ... Don't use termcap init/deinit strings. cmd := exec.Command(lessPath, "-FSRX") stdin, stdout, err := os.Pipe() d.Chk.NoError(err) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.Stdin = stdin cmd.Start() p := &Pager{stdout, stdin, stdout, &sync.Mutex{}, make(chan struct{})} go func() { err := cmd.Wait() d.Chk.NoError(err) p.closePipe() p.doneCh <- struct{}{} }() return p } func (p *Pager) Stop() { if p.Writer != os.Stdout { p.closePipe() // Wait until less has fully exited, otherwise it might not have printed the terminal restore characters. <-p.doneCh } } func (p *Pager) closePipe() { p.mtx.Lock() defer p.mtx.Unlock() if p.stdin != nil { // Closing the pipe will cause any outstanding writes to stdout fail, and fail from now on. p.stdin.Close() p.stdout.Close() p.stdin, p.stdout = nil, nil } } func RegisterOutputpagerFlags(cmd *kingpin.CmdClause) { cmd.Flag("no-pager", "suppress paging functionality").BoolVar(&noPager) } func IsStdoutTty() bool { return goisatty.IsTerminal(os.Stdout.Fd()) } ================================================ FILE: go/util/profile/profile.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package profile import ( "io" "os" "runtime" "runtime/pprof" "github.com/attic-labs/kingpin" "github.com/attic-labs/noms/go/d" ) var ( cpuProfile string memProfile string blockProfile string ) func RegisterProfileFlags(app *kingpin.Application) { // Must reset globals because under test this can get called multiple times. cpuProfile = "" memProfile = "" blockProfile = "" app.Flag("cpuprofile", "write cpu profile to file").StringVar(&cpuProfile) app.Flag("memprofile", "write memory profile to file").StringVar(&memProfile) app.Flag("blockprofile", "write block profile to file").StringVar(&blockProfile) } // MaybeStartProfile checks the -blockProfile, -cpuProfile, and -memProfile flag and, for each that is set, attempts to start gathering profiling data into the appropriate files. It returns an object with one method, Stop(), that must be called in order to flush profile data to disk before the process terminates. func MaybeStartProfile() interface { Stop() } { p := &prof{} if blockProfile != "" { f, err := os.Create(blockProfile) d.PanicIfError(err) runtime.SetBlockProfileRate(1) p.bp = f } if cpuProfile != "" { f, err := os.Create(cpuProfile) d.PanicIfError(err) pprof.StartCPUProfile(f) p.cpu = f } if memProfile != "" { f, err := os.Create(memProfile) d.PanicIfError(err) p.mem = f } return p } type prof struct { bp io.WriteCloser cpu io.Closer mem io.WriteCloser } func (p *prof) Stop() { if p.bp != nil { pprof.Lookup("block").WriteTo(p.bp, 0) p.bp.Close() runtime.SetBlockProfileRate(0) } if p.cpu != nil { pprof.StopCPUProfile() p.cpu.Close() } if p.mem != nil { pprof.WriteHeapProfile(p.mem) p.mem.Close() } } ================================================ FILE: go/util/progressreader/reader.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package progressreader provides an io.Reader that reports progress to a callback package progressreader import ( "io" "time" "github.com/attic-labs/noms/go/util/status" ) type Callback func(seen uint64) func New(inner io.Reader, cb Callback) io.Reader { return &reader{inner, uint64(0), time.Time{}, cb} } type reader struct { inner io.Reader seen uint64 lastTime time.Time cb Callback } func (r *reader) Read(p []byte) (n int, err error) { n, err = r.inner.Read(p) r.seen += uint64(n) if now := time.Now(); now.Sub(r.lastTime) >= status.Rate || err == io.EOF { r.cb(r.seen) r.lastTime = now } return } ================================================ FILE: go/util/random/id.go ================================================ package random import ( "crypto/rand" "encoding/hex" "github.com/attic-labs/noms/go/d" ) var ( reader = rand.Reader ) // Id creates a unique ID which is a random 16 byte hex string func Id() string { data := make([]byte, 16) _, err := reader.Read(data) d.Chk.NoError(err) return hex.EncodeToString(data) } ================================================ FILE: go/util/random/id_test.go ================================================ package random import ( "testing" "github.com/stretchr/testify/assert" ) type testReader byte func (r *testReader) Read(dest []byte) (int, error) { for i := 0; i < len(dest); i++ { dest[i] = byte(*r) } return len(dest), nil } func TestBasic(t *testing.T) { assert := assert.New(t) func() { var r testReader oldReader := reader reader = &r defer func() { reader = oldReader }() r = testReader(byte(0x00)) assert.Equal("00000000000000000000000000000000", Id()) r = testReader(byte(0x01)) assert.Equal("01010101010101010101010101010101", Id()) r = testReader(byte(0xFF)) assert.Equal("ffffffffffffffffffffffffffffffff", Id()) }() one := Id() two := Id() assert.NotEqual(one, two) } ================================================ FILE: go/util/sizecache/size_cache.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package sizecache // SizeCache implements a simple LRU cache of interface{}-typed key-value pairs. // When items are added, the "size" of the item must be provided. LRU items will // be expired until the total of all items is below the specified size for the // SizeCache import ( "container/list" "sync" "github.com/attic-labs/noms/go/d" ) type sizeCacheEntry struct { size uint64 lruEntry *list.Element value interface{} } type SizeCache struct { totalSize uint64 maxSize uint64 mu sync.Mutex lru list.List cache map[interface{}]sizeCacheEntry expireCb func(elm interface{}) } type ExpireCallback func(key interface{}) // New creates a SizeCache that will hold up to |maxSize| item data. func New(maxSize uint64) *SizeCache { return NewWithExpireCallback(maxSize, nil) } // NewWithExpireCallback creates a SizeCache that will hold up to |maxSize| // item data, and will call cb(key) when the item corresponding with that key // expires. func NewWithExpireCallback(maxSize uint64, cb ExpireCallback) *SizeCache { return &SizeCache{ maxSize: maxSize, cache: map[interface{}]sizeCacheEntry{}, expireCb: cb, } } // entry() checks if the value is in the cache. If not in the cache, it returns an // empty sizeCacheEntry and false. It it is in the cache, it moves it to // to the back of lru and returns the entry and true. // Callers should have locked down the |c| with a call to c.mu.Lock() before // calling this entry(). func (c *SizeCache) entry(key interface{}) (sizeCacheEntry, bool) { entry, ok := c.cache[key] if !ok { return sizeCacheEntry{}, false } c.lru.MoveToBack(entry.lruEntry) return entry, true } // Get checks the searches the cache for an entry. If it exists, it moves it's // lru entry to the back of the queue and returns (value, true). Otherwise, it // returns (nil, false). func (c *SizeCache) Get(key interface{}) (interface{}, bool) { c.mu.Lock() defer c.mu.Unlock() if entry, ok := c.entry(key); ok { return entry.value, true } return nil, false } // Add will add this element to the cache at the back of the queue as long it's // size does not exceed maxSize. If the addition of this entry causes the size of // the cache to exceed maxSize, the necessary entries at the front of the queue // will be deleted in order to keep the total cache size below maxSize. func (c *SizeCache) Add(key interface{}, size uint64, value interface{}) { if size <= c.maxSize { c.mu.Lock() defer c.mu.Unlock() if _, ok := c.entry(key); ok { // this value is already in the cache; just return return } newEl := c.lru.PushBack(key) ce := sizeCacheEntry{size: size, lruEntry: newEl, value: value} c.cache[key] = ce c.totalSize += ce.size for el := c.lru.Front(); el != nil && c.totalSize > c.maxSize; { key1 := el.Value ce, ok := c.cache[key1] if !ok { d.Panic("SizeCache is missing expected value") } next := el.Next() delete(c.cache, key1) c.totalSize -= ce.size c.lru.Remove(el) if c.expireCb != nil { c.expireCb(key1) } el = next } } } // Drop will remove the element associated with the given key from the cache. func (c *SizeCache) Drop(key interface{}) { c.mu.Lock() defer c.mu.Unlock() if entry, ok := c.entry(key); ok { c.totalSize -= entry.size c.lru.Remove(entry.lruEntry) delete(c.cache, key) } } ================================================ FILE: go/util/sizecache/size_cache_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package sizecache import ( "fmt" "sort" "sync" "testing" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) func hashFromString(s string) hash.Hash { return hash.Of([]byte(s)) } func TestSizeCache(t *testing.T) { assert := assert.New(t) defSize := uint64(200) c := New(1024) for i, v := range []string{"data-1", "data-2", "data-3", "data-4", "data-5", "data-6", "data-7", "data-8", "data-9"} { c.Add(hashFromString(v), defSize, v) maxElements := uint64(i + 1) if maxElements >= uint64(5) { maxElements = uint64(5) } assert.Equal(maxElements*defSize, c.totalSize) } _, ok := c.Get(hashFromString("data-1")) assert.False(ok) assert.Equal(hashFromString("data-5"), c.lru.Front().Value) v, ok := c.Get(hashFromString("data-5")) assert.True(ok) assert.Equal("data-5", v.(string)) assert.Equal(hashFromString("data-5"), c.lru.Back().Value) assert.Equal(hashFromString("data-6"), c.lru.Front().Value) c.Add(hashFromString("data-7"), defSize, "data-7") assert.Equal(hashFromString("data-7"), c.lru.Back().Value) assert.Equal(uint64(1000), c.totalSize) c.Add(hashFromString("no-data"), 0, nil) v, ok = c.Get(hashFromString("no-data")) assert.True(ok) assert.Nil(v) assert.Equal(hashFromString("no-data"), c.lru.Back().Value) assert.Equal(uint64(1000), c.totalSize) assert.Equal(6, c.lru.Len()) assert.Equal(6, len(c.cache)) for _, v := range []string{"data-5", "data-6", "data-7", "data-8", "data-9"} { c.Get(hashFromString(v)) assert.Equal(hashFromString(v), c.lru.Back().Value) } assert.Equal(hashFromString("no-data"), c.lru.Front().Value) c.Add(hashFromString("data-10"), 200, "data-10") assert.Equal(uint64(1000), c.totalSize) assert.Equal(5, c.lru.Len()) assert.Equal(5, len(c.cache)) _, ok = c.Get(hashFromString("no-data")) assert.False(ok) _, ok = c.Get(hashFromString("data-5")) assert.False(ok) c.Drop(hashFromString("data-10")) assert.Equal(uint64(800), c.totalSize) assert.Equal(4, c.lru.Len()) assert.Equal(4, len(c.cache)) } func TestSizeCacheWithExpiry(t *testing.T) { expired := []string{} expire := func(key interface{}) { expired = append(expired, key.(string)) } c := NewWithExpireCallback(5, expire) data := []string{"a", "b", "c", "d", "e"} for i, k := range data { c.Add(k, 1, i) } c.Add("big", 5, "thing") sort.Sort(sort.StringSlice(expired)) assert.Equal(t, data, expired) } func concurrencySizeCacheTest(data []string) { dchan := make(chan string, 128) go func() { for _, d := range data { dchan <- d } close(dchan) }() cache := New(25) wg := sync.WaitGroup{} for i := 0; i < 3; i++ { wg.Add(1) go func() { for d := range dchan { cache.Add(d, uint64(len(d)), d) } wg.Done() }() } wg.Wait() } // I can't guarantee this will fail if the code isn't correct, but in the // previous version of SizeCache, this was able to reliably repro bug #2663. func TestConcurrency(t *testing.T) { assert := assert.New(t) generateDataStrings := func(numStrings, numValues int) []string { l := []string{} for i := 0; len(l) < numStrings; i++ { for j := 0; j < numValues && len(l) < numStrings; j++ { l = append(l, fmt.Sprintf("data-%d", i)) } } return l } data := generateDataStrings(50, 3) for i := 0; i < 100; i++ { assert.NotPanics(func() { concurrencySizeCacheTest(data) }) } } func TestTooLargeValue(t *testing.T) { assert := assert.New(t) c := New(1024) c.Add(hashFromString("big-data"), 2048, "big-data") _, ok := c.Get(hashFromString("big-data")) assert.False(ok) } func TestZeroSizeCache(t *testing.T) { assert := assert.New(t) c := New(0) c.Add(hashFromString("data1"), 200, "data1") _, ok := c.Get(hashFromString("data1")) assert.False(ok) } ================================================ FILE: go/util/status/status.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 // Package status prints status messages to a console, overwriting previous values. package status import ( "fmt" "time" ) const ( clearLine = "\x1b[2K\r" Rate = 100 * time.Millisecond ) var ( lastTime time.Time lastFormat string lastArgs []interface{} ) func Clear() { fmt.Print(clearLine) reset(time.Time{}) } func WillPrint() bool { return time.Now().Sub(lastTime) >= Rate } func Printf(format string, args ...interface{}) { now := time.Now() if now.Sub(lastTime) < Rate { lastFormat, lastArgs = format, args } else { fmt.Printf(clearLine+format, args...) reset(now) } } func Done() { if lastArgs != nil { fmt.Printf(clearLine+lastFormat, lastArgs...) } fmt.Println() reset(time.Time{}) } func reset(time time.Time) { lastTime = time lastFormat, lastArgs = "", nil } ================================================ FILE: go/util/test/equals_ignore_hashes.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package test import ( "regexp" "strconv" "strings" "testing" "github.com/attic-labs/noms/go/hash" "github.com/stretchr/testify/assert" ) var pattern = regexp.MustCompile("([0-9a-v]{" + strconv.Itoa(hash.StringLen) + "})") // EqualsIgnoreHashes compares two strings, ignoring hashes in them. func EqualsIgnoreHashes(tt *testing.T, expected, actual string) { if RemoveHashes(expected) != RemoveHashes(actual) { assert.Equal(tt, expected, actual) } } func RemoveHashes(str string) string { return pattern.ReplaceAllString(str, strings.Repeat("*", hash.StringLen)) } ================================================ FILE: go/util/verbose/verbose.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package verbose import ( "log" "github.com/attic-labs/kingpin" ) var ( verbose bool quiet bool ) // RegisterVerboseFlags registers -v|--verbose flags for general usage func RegisterVerboseFlags(app *kingpin.Application) { // Must reset globals because under test this can get called multiple times. verbose = false quiet = false app.Flag("verbose", "show more").Short('v').BoolVar(&verbose) app.Flag("quite", "show less").Short('q').BoolVar(&quiet) } // Verbose returns True if the verbose flag was set func Verbose() bool { return verbose } func SetVerbose(v bool) { verbose = v } // Quiet returns True if the verbose flag was set func Quiet() bool { return quiet } func SetQuiet(q bool) { quiet = q } // Log calls Printf(format, args...) iff Verbose() returns true. func Log(format string, args ...interface{}) { if Verbose() { if len(args) > 0 { log.Printf(format+"\n", args...) } else { log.Println(format) } } } ================================================ FILE: go/util/writers/max_line_writer.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package writers import "io" var ( // MaxLinesErr is an instance of MaxLinesError that gets returned by // Write() whenever the number of lines written has exceeded the number // in |MaxLineWriter.MaxLines|. MaxLinesErr = MaxLinesError{"Maximum number of lines written"} ) // MaxLinesError is the type of error returned by Write() whenever the number // of lines written has exceeded the number in |MaxLineWriter.MaxLines|. type MaxLinesError struct { msg string } func (e MaxLinesError) Error() string { return e.msg } // MaxLineWriter provides an io.Writer interface that counts the number of lines // that have been written. It will stop writing and returns an error if the // number of lines written exceeds the number specified in MaxLineWriter.NumLines. type MaxLineWriter struct { Dest io.Writer MaxLines uint32 NumLines uint32 } // Write() stops writing and returns an error if an attempt is made to write // any byte after |MaxLines| newLines have been written. For example, if MaxLines // is 1, all bytes will be written up to and including the 1st newline. If there // are any bytes in |data| after the 1st newline, an error will be returned. // // Callers can change the value of |w.MaxLines| before any call to Write(). // Setting MaxLines to 0 will allow any number of newLines. func (w *MaxLineWriter) Write(data []byte) (int, error) { if len(data) == 0 { return 0, nil } checkMax := w.MaxLines > 0 if checkMax && w.NumLines >= w.MaxLines { return 0, MaxLinesErr } var err error byteCnt := 0 for i, b := range data { if b == byte('\n') { w.NumLines++ if checkMax && w.NumLines > w.MaxLines { err = MaxLinesErr break } } else if checkMax && w.NumLines >= w.MaxLines { err = MaxLinesErr break } byteCnt = i } cnt, err1 := w.Dest.Write(data[:byteCnt+1]) if err1 != nil { return cnt, err1 } return cnt, err } ================================================ FILE: go/util/writers/prefix_writer.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package writers import "io" // PrefixWriter makes it easy to prefix lines with a custom prefix. Each time // it writes a byte after a newline('\n') character it calls PrefixFunc() to get // the byte slice that should be written. |NeedsPrefix| can be set to true to // cause a prefix to be written immediately. This is useful for causing a prefix // to get written on the first line. type PrefixWriter struct { Dest io.Writer PrefixFunc func(w *PrefixWriter) []byte NeedsPrefix bool NumLines uint32 } // Write() will add a prefix to the beginning of each line. It obtains the // prefix by call |PrefixFunc(w *PrefixWriter)| before printing out any character // following a newLine. Callers can force a prefix to be printed out before the // first character in |data| by setting NeedsPrefix to true. Conversely, callers // can suppress prefixes from being printed by setting NeedsPrefix to false. func (w *PrefixWriter) Write(data []byte) (int, error) { writtenCnt := 0 for i, b := range data { if w.NeedsPrefix { w.NeedsPrefix = false d1 := w.PrefixFunc(w) cnt, err := w.Dest.Write(d1) writtenCnt += cnt if err != nil { return writtenCnt, err } } if b == byte('\n') { w.NumLines++ w.NeedsPrefix = true } cnt, err := w.Dest.Write(data[i : i+1]) writtenCnt += cnt if err != nil { return writtenCnt, err } } return writtenCnt, nil } ================================================ FILE: go/util/writers/writers_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package writers import ( "bytes" "io" "testing" "github.com/stretchr/testify/assert" ) type maxLineTestCase struct { data string maxLines uint32 expected string errorExpected bool } func TestMaxLineWriter(t *testing.T) { assert := assert.New(t) tcs := []maxLineTestCase{ {"hey there\nthis text contains\n3 lines\n", 1, "hey there\n", true}, {"hey there\nthis text contains\n3 lines\n", 2, "hey there\nthis text contains\n", true}, {"hey there\nthis text contains\n3 lines\n", 3, "hey there\nthis text contains\n3 lines\n", false}, {"hey there\nthis text contains\n3 lines\nand more\n", 3, "hey there\nthis text contains\n3 lines\n", true}, {"hey there\nthis text contains\n3 lines\n", 4, "hey there\nthis text contains\n3 lines\n", false}, {"hey there\nthis text contains\n3 lines\n", 0, "hey there\nthis text contains\n3 lines\n", false}, {"\n\n\n\n", 2, "\n\n", true}, } for i, tc := range tcs { buf := bytes.NewBuffer(nil) mlw := MaxLineWriter{Dest: buf, MaxLines: tc.maxLines} l, err := mlw.Write([]byte(tc.data)) assert.Equal(len(tc.expected), l, "test #%d case failed", i) if tc.errorExpected { assert.Error(err, "test #%d case failed", i) assert.IsType(MaxLinesError{}, err, "test #%d case failed", i) } else { assert.NoError(err, "test #%d case failed", i) } assert.Equal(tc.expected, buf.String(), "test #%d case failed", i) } } type prefixTestCase struct { data string prefix string expected string needsPrefix bool } func TestPrefixWriter(t *testing.T) { assert := assert.New(t) tcs := []prefixTestCase{ {"\n", "yo:", "yo:\n", true}, {"\n", "yo:", "\n", false}, {"\n\n", "yo:", "yo:\nyo:\n", true}, {"\n\n", "yo:", "\nyo:\n", false}, {"hey there\nthis text contains\n3 lines\n", "yo:", "yo:hey there\nyo:this text contains\nyo:3 lines\n", true}, {"hey there\nthis text contains\n3 lines\n", "yo:", "hey there\nyo:this text contains\nyo:3 lines\n", false}, {"hey there\nthis text contains\n3 lines\n", "", "hey there\nthis text contains\n3 lines\n", true}, {"hey there\nthis text contains\n3 lines\n", "", "hey there\nthis text contains\n3 lines\n", false}, } for _, tc := range tcs { getPrefix := func(w *PrefixWriter) []byte { return []byte(tc.prefix) } buf := bytes.NewBuffer(nil) pw := PrefixWriter{Dest: buf, PrefixFunc: getPrefix, NeedsPrefix: tc.needsPrefix} l, err := pw.Write([]byte(tc.data)) assert.NoError(err) assert.Equal(len(tc.expected), l) assert.Equal(tc.expected, buf.String()) } } type prefixMaxLineTestCase struct { data string prefix string expected string needsPrefix bool maxLines uint32 errorExpected bool } func TestPrefixMaxLineWriter(t *testing.T) { assert := assert.New(t) tcs := []prefixMaxLineTestCase{ {"hey there\nthis text contains\n3 lines\n", "yo:", "yo:hey there\nyo:this text contains\nyo:3 lines\n", true, 0, false}, {"hey there\nthis text contains\n3 lines\n", "yo:", "yo:hey there\n", true, 1, true}, {"hey there\nthis text contains\n3 lines\n", "yo:", "hey there\nyo:this text contains\nyo:3 lines\n", false, 0, false}, {"hey there\nthis text contains\n3 lines\n", "yo:", "hey there\nyo:this text contains\n", false, 2, true}, {"hey there\nthis text contains\n3 lines\n", "", "hey there\nthis text contains\n3 lines\n", true, 0, false}, {"hey there\nthis text contains\n3 lines\n", "", "hey there\nthis text contains\n", false, 2, true}, } doTest := func(tc prefixMaxLineTestCase, tcNum int, buf *bytes.Buffer, tw io.Writer) { l, err := tw.Write([]byte(tc.data)) if tc.errorExpected { assert.Error(err, "test #%d case failed", tcNum) assert.IsType(MaxLinesError{}, err, "test #%d case failed", tcNum) } else { assert.NoError(err, "test #%d case failed", tcNum) } assert.Equal(len(tc.expected), l, "test #%d case failed", tcNum) assert.Equal(tc.expected, buf.String(), "test #%d case failed", tcNum) } for i, tc := range tcs { getPrefix := func(w *PrefixWriter) []byte { return []byte(tc.prefix) } buf := &bytes.Buffer{} mlw := &MaxLineWriter{Dest: buf, MaxLines: tc.maxLines} pw := &PrefixWriter{Dest: mlw, PrefixFunc: getPrefix, NeedsPrefix: tc.needsPrefix} doTest(tc, i, buf, pw) buf = &bytes.Buffer{} pw = &PrefixWriter{Dest: buf, PrefixFunc: getPrefix, NeedsPrefix: tc.needsPrefix} mlw = &MaxLineWriter{Dest: pw, MaxLines: tc.maxLines} doTest(tc, i, buf, mlw) } } ================================================ FILE: go.mod ================================================ module github.com/attic-labs/noms go 1.12 require ( github.com/BurntSushi/toml v0.3.1 github.com/aboodman/noms-gx v0.0.0-20180714061401-d6cb97cb040b github.com/alecthomas/kingpin v2.2.6+incompatible // indirect github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect github.com/attic-labs/graphql v0.0.0-20190507195614-b6552d20145f github.com/attic-labs/kingpin v2.2.7-0.20180312050558-442efcfac769+incompatible github.com/aws/aws-sdk-go v1.19.26 github.com/clbanning/mxj v1.8.4 github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf github.com/dustin/go-humanize v1.0.0 github.com/golang/snappy v0.0.1 github.com/hanwen/go-fuse v1.0.0 github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d github.com/julienschmidt/httprouter v1.2.0 github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/mattn/go-colorable v0.1.1 // indirect github.com/mattn/go-isatty v0.0.7 github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b github.com/shirou/gopsutil v2.18.12+incompatible github.com/skratchdot/open-golang v0.0.0-20190402232053-79abb63cd66e github.com/stretchr/testify v1.3.0 github.com/syndtr/goleveldb v1.0.0 golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 gopkg.in/alecthomas/kingpin.v2 v2.2.6 ) ================================================ FILE: go.sum ================================================ cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/aboodman/noms-gx v0.0.0-20180714061401-d6cb97cb040b h1:6pe29GhapzKB4egv5gr8JHHFiExf2m81JJw4BKDBC6g= github.com/aboodman/noms-gx v0.0.0-20180714061401-d6cb97cb040b/go.mod h1:ni7quUEZfdz5Q36a9VJgeUlTaYfwY3fS3j/v5WIz8zs= github.com/alecthomas/kingpin v2.2.6+incompatible h1:5svnBTFgJjZvGKyYBtMB0+m5wvrbUHiqye8wRJMlnYI= github.com/alecthomas/kingpin v2.2.6+incompatible/go.mod h1:59OFYbFVLKQKq+mqrL6Rw5bR0c3ACQaawgXx0QYndlE= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/attic-labs/graphql v0.0.0-20190507195614-b6552d20145f h1:WMEteRGdJItAZfxaCPyL6SEfyh4+bE+LsN50UKz46EA= github.com/attic-labs/graphql v0.0.0-20190507195614-b6552d20145f/go.mod h1:1U3eDKPYQXn3o4jpC2rAlH9THIo+ZOKWSI0FyeG1SEI= github.com/attic-labs/kingpin v2.2.6+incompatible h1:gzq18qMaCcbpq2ysBO51P6D8bficBtmYk6ZjiSOK/OQ= github.com/attic-labs/kingpin v2.2.6+incompatible/go.mod h1:Cp18FeDCvsK+cD2QAGkqerGjrgSXLiJWnjHeY2mneBc= github.com/attic-labs/kingpin v2.2.7-0.20180312050558-442efcfac769+incompatible h1:wd5mq8xSfwCYd1JpQ309s+3tTlP/gifcG2awOA3x5Vk= github.com/attic-labs/kingpin v2.2.7-0.20180312050558-442efcfac769+incompatible/go.mod h1:Cp18FeDCvsK+cD2QAGkqerGjrgSXLiJWnjHeY2mneBc= github.com/aws/aws-sdk-go v1.19.26 h1:GavKlzJDfYQGoS4jn2F+KYYZlR8QEhrLPfpf8+oJhS4= github.com/aws/aws-sdk-go v1.19.26/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/clbanning/mxj v1.8.4 h1:HuhwZtbyvyOw+3Z1AowPkU87JkJUSv751ELWaiTpj8I= github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng= github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf h1:5ZeQB3mThuz5C2MSER6T5GdtXTF9CMMk42F9BOyRsEQ= github.com/codahale/blake2 v0.0.0-20150924215134-8d10d0420cbf/go.mod h1:BO2rLUAZMrpgh6GBVKi0Gjdqw2MgCtJrtmUdDeZRKjY= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/hanwen/go-fuse v1.0.0 h1:GxS9Zrn6c35/BnfiVsZVWmsG803xwE7eVRDvcf/BEVc= github.com/hanwen/go-fuse v1.0.0/go.mod h1:unqXarDXqzAk0rt98O2tVndEPIpUgLD9+rwFisZH3Ok= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7 h1:K//n/AqR5HjG3qxbrBCL4vJPW0MVFSs9CPK1OOJdRME= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d h1:c93kUJDtVAXFEhsCh5jSxyOJmFHuzcihnslQiX8Urwo= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/julienschmidt/httprouter v1.2.0 h1:TDTW5Yz1mjftljbcKqRcrYhd4XeOoI98t+9HbQbYf7g= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 h1:l6Y3mFnF46A+CeZsTrT8kVIuhayq1266oxWpDKE7hnQ= github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6/go.mod h1:UtDV9qK925GVmbdjR+e1unqoo+wGWNHHC6XB1Eu6wpE= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mattn/go-colorable v0.1.1 h1:G1f5SKeVxmagw/IyvzvtZE4Gybcc4Tr1tf7I8z0XgOg= github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.7 h1:UvyT9uN+3r7yLEYSlJsbQGdsaB/a0DlgWP3pql6iwOc= github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b h1:j7+1HpAFS1zy5+Q4qx1fWh90gTKwiN4QCGoY9TWyyO4= github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/shirou/gopsutil v2.18.12+incompatible h1:1eaJvGomDnH74/5cF4CTmTbLHAriGFsTZppLXDX93OM= github.com/shirou/gopsutil v2.18.12+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/skratchdot/open-golang v0.0.0-20190402232053-79abb63cd66e h1:VAzdS5Nw68fbf5RZ8RDVlUvPXNU6Z3jtPCK/qvm4FoQ= github.com/skratchdot/open-golang v0.0.0-20190402232053-79abb63cd66e/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c h1:uOCk1iQW6Vc18bnC13MfzScl+wdKBmM9Y9kU7Z83/lw= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0K86vlAs/eXGFms2txfJfA= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= ================================================ FILE: release.sh ================================================ #!/bin/sh git tag -f latest HEAD git push -f origin latest ================================================ FILE: samples/cli/nomsconfig/README.md ================================================ # nomsconfig The noms cli now provides experimental support for configuring a convenient default database and database aliases. You can enable this support by placing a *.nomsconfig* config file (like the [one](.nomsconfig) in this sample) in the directory where you'd like to use the configuration. Like git, any noms command issued from that directory or below will use it. # Features - *Database Aliases* - Define simple names to be used in place of database URLs - *Default Database* - Define one database to be used by default when no database in mentioned - *Dot (`.`) Shorthand* - Use `.` instead of repeating dataset/object name in destination # Example This example defines a simple [.nomsconfig](.nomsconfig) to try: ```shell # Default database URL to be used whenever a database is not explictly provided [db.default] url = "ldb:.noms/tour" # DB alias named `origin` that refers to the remote cli-tour db [db.origin] url = "http://demo.noms.io/cli-tour" # DB alias named `temp` that refers to a noms db stored under /tmp [db.temp] url = "ldb:/tmp/noms/shared ``` The *[db.default]* section: - Defines a default database - It will be used implicitly whenever a database url is omitted in a command The *[db.origin]* and *[db.shared]* sections: - Define aliases that can be used wherever a db url is required - You can define additional aliases by adding *[db.**alias**]* sections using any **alias** you prefer Dot (`.`) shorthand: - When issuing a command that requires a source and destination (like `noms sync`), you can use `.` in place of the dataset/object in the destination. This is shorthand that repeats whatever was used in the source (see below). You can kick the tires by running noms commands from this directory. Here are some examples and what to expect: ```shell noms ds # -> noms ds ldb:.noms/tour noms ds default # -> noms ds ldb:.noms/tour noms ds origin # -> noms ds http://demo.noms.io/cli-tour noms sync origin::sf-film-locations sf-films # sync ds from origin to default noms log sf-films # -> noms log ldb:.noms/tour::sf-films noms log origin::sf-film-locations # -> noms log http://demo.noms.io/cli-tour::sf-film-locations noms show '#1a2aj8svslsu7g8hplsva6oq6iq3ib6c' # -> noms show ldb:.noms/tour::'#1a2a...' noms show origin::'#1a2aj8svslsu7g8hplsva6oq6iq3ib6c' # -> noms show http://demo.noms.io/cli-tour::'#1a2a...' noms diff '#1a2aj8svslsu7g8hplsva6oq6iq3ib6c' origin::. # diff default::object with origin::object noms sync origin::sf-bike-parking . # sync origin::sf-bike-parking to default::sf-bike-parking ``` A few more things to note: - Relative paths will be expanded relative to the directory where the *.nomsconfg* is defined - Use `noms config` to see the current alias definitions with expanded paths - Use `-v` or `--verbose` on any command to see how the command arguments are being resolved - Explicit DB urls are still fully supported ================================================ FILE: samples/go/csv/README.md ================================================ # CSV Importer Imports a CSV file as `List` where `T` is a struct with fields corresponding to the CSV's column headers. The struct spec can also be set manually with the `-header` flag. ## Usage ```shell $ cd csv-import $ go build $ ./csv-import http://localhost:8000::foo ``` ## Some places for CSV files - https://data.cityofnewyork.us/api/views/kku6-nxdu/rows.csv?accessType=DOWNLOAD - http://www.opendatacache.com/ # CSV Exporter Export a dataset in CSV format to stdout with column headers. ## Usage ```shell $ cd csv-export $ go build $ ./csv-export http://localhost:8000:foo ``` ================================================ FILE: samples/go/csv/common.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "fmt" "unicode/utf8" ) // StringToRune returns the rune contained in delimiter or an error. func StringToRune(delimiter string) (rune, error) { dlimLen := len(delimiter) if dlimLen == 0 { return 0, fmt.Errorf("delimiter flag must contain exactly one character (rune), not an empty string") } d, runeSize := utf8.DecodeRuneInString(delimiter) if d == utf8.RuneError { return 0, fmt.Errorf("Invalid utf8 string in delimiter flag: %s", delimiter) } if dlimLen != runeSize { return 0, fmt.Errorf("delimiter flag is too long. It must contain exactly one character (rune), but instead it is: %s", delimiter) } return d, nil } ================================================ FILE: samples/go/csv/csv_reader.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "bufio" "encoding/csv" "io" ) var ( rByte byte = 13 // the byte that corresponds to the '\r' rune. nByte byte = 10 // the byte that corresponds to the '\n' rune. ) type reader struct { r *bufio.Reader } // Read replaces CR line endings in the source reader with LF line endings if the CR is not followed by a LF. func (r reader) Read(p []byte) (n int, err error) { n, err = r.r.Read(p) bn, err := r.r.Peek(1) for i, b := range p { // if the current byte is a CR and the next byte is NOT a LF then replace the current byte with a LF if j := i + 1; b == rByte && ((j < len(p) && p[j] != nByte) || (len(bn) > 0 && bn[0] != nByte)) { p[i] = nByte } } return } func SkipRecords(r *csv.Reader, n uint) error { var err error for i := uint(0); i < n; i++ { _, err = r.Read() if err != nil { return err } } return err } // NewCSVReader returns a new csv.Reader that splits on comma func NewCSVReader(res io.Reader, comma rune) *csv.Reader { bufRes := bufio.NewReader(res) r := csv.NewReader(reader{r: bufRes}) r.Comma = comma r.FieldsPerRecord = -1 // Don't enforce number of fields. return r } ================================================ FILE: samples/go/csv/csv_reader_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "bytes" "strings" "testing" "github.com/stretchr/testify/assert" ) func TestCR(t *testing.T) { testFile := []byte("a,b,c\r1,2,3\r") delimiter, err := StringToRune(",") r := NewCSVReader(bytes.NewReader(testFile), delimiter) lines, err := r.ReadAll() assert.NoError(t, err, "An error occurred while reading the data: %v", err) if len(lines) != 2 { t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } func TestLF(t *testing.T) { testFile := []byte("a,b,c\n1,2,3\n") delimiter, err := StringToRune(",") r := NewCSVReader(bytes.NewReader(testFile), delimiter) lines, err := r.ReadAll() assert.NoError(t, err, "An error occurred while reading the data: %v", err) if len(lines) != 2 { t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } func TestCRLF(t *testing.T) { testFile := []byte("a,b,c\r\n1,2,3\r\n") delimiter, err := StringToRune(",") r := NewCSVReader(bytes.NewReader(testFile), delimiter) lines, err := r.ReadAll() assert.NoError(t, err, "An error occurred while reading the data: %v", err) if len(lines) != 2 { t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } func TestCRInQuote(t *testing.T) { testFile := []byte("a,\"foo,\rbar\",c\r1,\"2\r\n2\",3\r") delimiter, err := StringToRune(",") r := NewCSVReader(bytes.NewReader(testFile), delimiter) lines, err := r.ReadAll() assert.NoError(t, err, "An error occurred while reading the data: %v", err) if len(lines) != 2 { t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } if strings.Contains(lines[1][1], "\n\n") { t.Error("The CRLF was converted to a LFLF") } } func TestCRLFEndOfBufferLength(t *testing.T) { testFile := make([]byte, 4096*2, 4096*2) testFile[4095] = 13 // \r byte testFile[4096] = 10 // \n byte delimiter, err := StringToRune(",") r := NewCSVReader(bytes.NewReader(testFile), delimiter) lines, err := r.ReadAll() assert.NoError(t, err, "An error occurred while reading the data: %v", err) if len(lines) != 2 { t.Errorf("Wrong number of lines. Expected 2, got %d", len(lines)) } } ================================================ FILE: samples/go/csv/kind_slice.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "fmt" "strconv" "strings" "github.com/attic-labs/noms/go/types" ) // KindSlice is an alias for []types.NomsKind. It's needed because types.NomsKind are really just 8 bit unsigned ints, which are what Go uses to represent 'byte', and this confuses the Go JSON marshal/unmarshal code -- it treats them as byte arrays and base64 encodes them! type KindSlice []types.NomsKind func (ks KindSlice) MarshalJSON() ([]byte, error) { elems := make([]string, len(ks)) for i, k := range ks { elems[i] = fmt.Sprintf("%d", k) } return []byte("[" + strings.Join(elems, ",") + "]"), nil } func (ks *KindSlice) UnmarshalJSON(value []byte) error { elems := strings.Split(string(value[1:len(value)-1]), ",") *ks = make(KindSlice, len(elems)) for i, e := range elems { ival, err := strconv.ParseUint(e, 10, 8) if err != nil { return err } (*ks)[i] = types.NomsKind(ival) } return nil } ================================================ FILE: samples/go/csv/kind_slice_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "encoding/json" "fmt" "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestKindSliceJSON(t *testing.T) { assert := assert.New(t) ks := KindSlice{types.NumberKind, types.StringKind, types.BoolKind} b, err := json.Marshal(&ks) assert.NoError(err) assert.Equal(fmt.Sprintf("[%d,%d,%d]", ks[0], ks[1], ks[2]), string(b)) var uks KindSlice err = json.Unmarshal(b, &uks) assert.NoError(err, "error with json.Unmarshal") assert.Equal(ks, uks) } ================================================ FILE: samples/go/csv/read.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "encoding/csv" "fmt" "io" "sort" "strconv" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) // StringToKind maps names of valid NomsKinds (e.g. Bool, Number, etc) to their associated types.NomsKind var StringToKind = func(kindMap map[types.NomsKind]string) map[string]types.NomsKind { m := map[string]types.NomsKind{} for k, v := range kindMap { m[v] = k } return m }(types.KindToString) // StringsToKinds looks up each element of strs in the StringToKind map and returns a slice of answers func StringsToKinds(strs []string) KindSlice { kinds := make(KindSlice, len(strs)) for i, str := range strs { k, ok := StringToKind[str] if !ok { d.Panic("StringToKind[%s] failed", str) } kinds[i] = k } return kinds } // KindsToStrings looks up each element of kinds in the types.KindToString map and returns a slice of answers func KindsToStrings(kinds KindSlice) []string { strs := make([]string, len(kinds)) for i, k := range kinds { strs[i] = k.String() } return strs } //EscapeStructFieldFromCSV removes special characters and replaces spaces with camelCasing (camel case turns to camelCase) func EscapeStructFieldFromCSV(input string) string { if types.IsValidStructFieldName(input) { return input } return types.CamelCaseFieldName(input) } // MakeStructTemplateFromHeaders creates a struct type from the headers using |kinds| as the type of each field. If |kinds| is empty, default to strings. func MakeStructTemplateFromHeaders(headers []string, structName string, kinds KindSlice) (temp types.StructTemplate, fieldOrder []int, kindMap []types.NomsKind) { useStringType := len(kinds) == 0 d.PanicIfFalse(useStringType || len(headers) == len(kinds)) fieldMap := make(map[string]types.NomsKind, len(headers)) origOrder := make(map[string]int, len(headers)) fieldNames := make(sort.StringSlice, len(headers)) for i, key := range headers { fn := EscapeStructFieldFromCSV(key) origOrder[fn] = i kind := types.StringKind if !useStringType { kind = kinds[i] } _, ok := fieldMap[fn] if ok { d.Panic(`Duplicate field name "%s"`, key) } fieldMap[fn] = kind fieldNames[i] = fn } sort.Sort(fieldNames) kindMap = make([]types.NomsKind, len(fieldMap)) fieldOrder = make([]int, len(fieldMap)) for i, fn := range fieldNames { kindMap[i] = fieldMap[fn] fieldOrder[origOrder[fn]] = i } temp = types.MakeStructTemplate(structName, fieldNames) return } // ReadToList takes a CSV reader and reads data into a typed List of structs. // Each row gets read into a struct named structName, described by headers. If // the original data contained headers it is expected that the input reader has // already read those and are pointing at the first data row. // If kinds is non-empty, it will be used to type the fields in the generated // structs; otherwise, they will be left as string-fields. // In addition to the list, ReadToList returns the typeDef of the structs in the // list. func ReadToList(r *csv.Reader, structName string, headers []string, kinds KindSlice, vrw types.ValueReadWriter, limit uint64) (l types.List) { temp, fieldOrder, kindMap := MakeStructTemplateFromHeaders(headers, structName, kinds) valueChan := make(chan types.Value, 128) // TODO: Make this a function param? listChan := types.NewStreamingList(vrw, valueChan) cnt := uint64(0) for { row, err := r.Read() if cnt >= limit || err == io.EOF { close(valueChan) break } else if err != nil { panic(err) } cnt++ fields := readFieldsFromRow(row, headers, fieldOrder, kindMap) valueChan <- temp.NewStruct(fields) } return <-listChan } type column struct { ch chan types.Value list <-chan types.List zeroValue types.Value hdr string } // ReadToColumnar takes a CSV reader and reads data from each column into a // separate list. Values from columns in each successive row are appended to the // column-specific lists whose type is described by headers. Finally, a new // "Columnar" struct is created that consists of one field for each column and // each field contains a list of values. // If the original data contained headers it is expected that the input reader // has already read those and are pointing at the first data row. // If kinds is non-empty, it will be used to type the fields in the generated // structs; otherwise, they will be left as string-fields. // In addition to the list, ReadToList returns the typeDef of the structs in the // list. func ReadToColumnar(r *csv.Reader, structName string, headers []string, kinds KindSlice, vrw types.ValueReadWriter, limit uint64) (s types.Struct) { valueChan := make(chan types.Value, 128) // TODO: Make this a function param? cols := []column{} fieldOrder := []int{} for i, hdr := range headers { ch := make(chan types.Value, 1024) cols = append(cols, column{ ch: ch, list: types.NewStreamingList(vrw, ch), hdr: hdr, }) fieldOrder = append(fieldOrder, i) } cnt := uint64(0) for { row, err := r.Read() if cnt >= limit || err == io.EOF { close(valueChan) break } else if err != nil { panic(err) } cnt++ fields := readFieldsFromRow(row, headers, fieldOrder, kinds) for i, v := range fields { cols[i].ch <- v } } sd := types.StructData{} for _, col := range cols { close(col.ch) r := vrw.WriteValue(<-col.list) sd[col.hdr] = r } return types.NewStruct("Columnar", sd) } // getFieldIndexByHeaderName takes the collection of headers and the name to search for and returns the index of name within the headers or -1 if not found func getFieldIndexByHeaderName(headers []string, name string) int { for i, header := range headers { if header == name { return i } } return -1 } // getPkIndices takes collection of primary keys as strings and determines if they are integers, if so then use those ints as the indices, otherwise it looks up the strings in the headers to find the indices; returning the collection of int indices representing the primary keys maintaining the order of strPks to the return collection func getPkIndices(strPks []string, headers []string) []int { result := make([]int, len(strPks)) for i, pk := range strPks { pkIdx, ok := strconv.Atoi(pk) if ok == nil { result[i] = pkIdx } else { result[i] = getFieldIndexByHeaderName(headers, pk) } if result[i] < 0 { d.Chk.Fail(fmt.Sprintf("Invalid pk: %v", pk)) } } return result } func readFieldsFromRow(row []string, headers []string, fieldOrder []int, kindMap []types.NomsKind) types.ValueSlice { fields := make(types.ValueSlice, len(headers)) for i, v := range row { if i < len(headers) { fieldOrigIndex := fieldOrder[i] val, err := StringToValue(v, kindMap[fieldOrigIndex]) if err != nil { d.Chk.Fail(fmt.Sprintf("Error parsing value for column '%s': %s", headers[i], err)) } fields[fieldOrigIndex] = val } } return fields } // primaryKeyValuesFromFields extracts the values of the primaryKey fields into // array. The values are in the user-specified order. This function returns 2 // objects: // 1) a ValueSlice containing the first n-1 keys. // 2) a single Value which will be used as the key in the leaf map created by // GraphBuilder func primaryKeyValuesFromFields(fields types.ValueSlice, fieldOrder, pkIndices []int) (types.ValueSlice, types.Value) { numPrimaryKeys := len(pkIndices) if numPrimaryKeys == 1 { return nil, fields[fieldOrder[pkIndices[0]]] } keys := make(types.ValueSlice, numPrimaryKeys-1) var value types.Value for i, idx := range pkIndices { k := fields[fieldOrder[idx]] if i < numPrimaryKeys-1 { keys[i] = k } else { value = k } } return keys, value } // ReadToMap takes a CSV reader and reads data into a typed Map of structs. Each // row gets read into a struct named structName, described by headers. If the // original data contained headers it is expected that the input reader has // already read those and are pointing at the first data row. // If kinds is non-empty, it will be used to type the fields in the generated // structs; otherwise, they will be left as string-fields. func ReadToMap(r *csv.Reader, structName string, headersRaw []string, primaryKeys []string, kinds KindSlice, vrw types.ValueReadWriter, limit uint64) types.Map { temp, fieldOrder, kindMap := MakeStructTemplateFromHeaders(headersRaw, structName, kinds) pkIndices := getPkIndices(primaryKeys, headersRaw) d.Chk.True(len(pkIndices) >= 1, "No primary key defined when reading into map") gb := types.NewGraphBuilder(vrw, types.MapKind) cnt := uint64(0) for { row, err := r.Read() if cnt >= limit || err == io.EOF { break } else if err != nil { panic(err) } cnt++ fields := readFieldsFromRow(row, headersRaw, fieldOrder, kindMap) graphKeys, mapKey := primaryKeyValuesFromFields(fields, fieldOrder, pkIndices) st := temp.NewStruct(fields) gb.MapSet(graphKeys, mapKey, st) } return gb.Build().(types.Map) } ================================================ FILE: samples/go/csv/read_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "bytes" "encoding/csv" "math" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) var LIMIT = uint64(math.MaxUint64) func TestReadToList(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := `a,1,true b,2,false ` r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A", "B", "C"} kinds := KindSlice{types.StringKind, types.NumberKind, types.BoolKind} l := ReadToList(r, "test", headers, kinds, db, LIMIT) assert.Equal(uint64(2), l.Len()) assert.True(l.Get(0).(types.Struct).Get("A").Equals(types.String("a"))) assert.True(l.Get(1).(types.Struct).Get("A").Equals(types.String("b"))) assert.True(l.Get(0).(types.Struct).Get("B").Equals(types.Number(1))) assert.True(l.Get(1).(types.Struct).Get("B").Equals(types.Number(2))) assert.True(l.Get(0).(types.Struct).Get("C").Equals(types.Bool(true))) assert.True(l.Get(1).(types.Struct).Get("C").Equals(types.Bool(false))) } func TestReadToMap(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := `a,1,true b,2,false ` r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A", "B", "C"} kinds := KindSlice{types.StringKind, types.NumberKind, types.BoolKind} m := ReadToMap(r, "test", headers, []string{"0"}, kinds, db, LIMIT) assert.Equal(uint64(2), m.Len()) assert.True(types.TypeOf(m).Equals( types.MakeMapType(types.StringType, types.MakeStructType("test", types.StructField{Name: "A", Type: types.StringType, Optional: false}, types.StructField{Name: "B", Type: types.NumberType, Optional: false}, types.StructField{Name: "C", Type: types.BoolType, Optional: false}, )))) assert.True(m.Get(types.String("a")).Equals(types.NewStruct("test", types.StructData{ "A": types.String("a"), "B": types.Number(1), "C": types.Bool(true), }))) assert.True(m.Get(types.String("b")).Equals(types.NewStruct("test", types.StructData{ "A": types.String("b"), "B": types.Number(2), "C": types.Bool(false), }))) } func testTrailingHelper(t *testing.T, dataString string) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db1 := datas.NewDatabase(storage.NewView()) defer db1.Close() r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A", "B"} kinds := KindSlice{types.StringKind, types.StringKind} l := ReadToList(r, "test", headers, kinds, db1, LIMIT) assert.Equal(uint64(3), l.Len()) storage = &chunks.MemoryStorage{} db2 := datas.NewDatabase(storage.NewView()) defer db2.Close() r = NewCSVReader(bytes.NewBufferString(dataString), ',') m := ReadToMap(r, "test", headers, []string{"0"}, kinds, db2, LIMIT) assert.Equal(uint64(3), m.Len()) } func TestReadTrailingHole(t *testing.T) { dataString := `a,b, d,e, g,h, ` testTrailingHelper(t, dataString) } func TestReadTrailingHoles(t *testing.T) { dataString := `a,b,, d,e g,h ` testTrailingHelper(t, dataString) } func TestReadTrailingValues(t *testing.T) { dataString := `a,b d,e,f g,h,i,j ` testTrailingHelper(t, dataString) } func TestEscapeStructFieldFromCSV(t *testing.T) { assert := assert.New(t) cases := []string{ "a", "a", "1a", "a", "AaZz19_", "AaZz19_", "Q", "Q", "AQ", "AQ", "_content", "content", "Few ¢ents Short", "fewEntsShort", "CAMEL💩case letTerS", "camelcaseLetters", "https://picasaweb.google.com/data", "httpspicasawebgooglecomdata", "💩", "", "11 1💩", "", "-- A B", "aB", "-- A --", "a", "-- A -- B", "aB", } for i := 0; i < len(cases); i += 2 { orig, expected := cases[i], cases[i+1] assert.Equal(expected, EscapeStructFieldFromCSV(orig)) } } func TestReadParseError(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := `a,"b` r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A", "B"} kinds := KindSlice{types.StringKind, types.StringKind} func() { defer func() { r := recover() assert.NotNil(r) _, ok := r.(*csv.ParseError) assert.True(ok, "Should be a ParseError") }() ReadToList(r, "test", headers, kinds, db, LIMIT) }() } func TestDuplicateHeaderName(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := "1,2\n3,4\n" r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A", "A"} kinds := KindSlice{types.StringKind, types.StringKind} assert.Panics(func() { ReadToList(r, "test", headers, kinds, db, LIMIT) }) } func TestEscapeFieldNames(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := "1,2\n" r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A A", "B"} kinds := KindSlice{types.NumberKind, types.NumberKind} l := ReadToList(r, "test", headers, kinds, db, LIMIT) assert.Equal(uint64(1), l.Len()) assert.Equal(types.Number(1), l.Get(0).(types.Struct).Get(EscapeStructFieldFromCSV("A A"))) r = NewCSVReader(bytes.NewBufferString(dataString), ',') m := ReadToMap(r, "test", headers, []string{"1"}, kinds, db, LIMIT) assert.Equal(uint64(1), l.Len()) assert.Equal(types.Number(1), m.Get(types.Number(2)).(types.Struct).Get(EscapeStructFieldFromCSV("A A"))) } func TestDefaults(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := "42,,,\n" r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"A", "B", "C", "D"} kinds := KindSlice{types.NumberKind, types.NumberKind, types.BoolKind, types.StringKind} l := ReadToList(r, "test", headers, kinds, db, LIMIT) assert.Equal(uint64(1), l.Len()) row := l.Get(0).(types.Struct) assert.Equal(types.Number(42), row.Get("A")) assert.Equal(types.Number(0), row.Get("B")) assert.Equal(types.Bool(false), row.Get("C")) assert.Equal(types.String(""), row.Get("D")) } func TestBooleanStrings(t *testing.T) { assert := assert.New(t) storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) dataString := "true,false\n1,0\ny,n\nY,N\nY,\n" r := NewCSVReader(bytes.NewBufferString(dataString), ',') headers := []string{"T", "F"} kinds := KindSlice{types.BoolKind, types.BoolKind} l := ReadToList(r, "test", headers, kinds, db, LIMIT) assert.Equal(uint64(5), l.Len()) for i := uint64(0); i < l.Len(); i++ { row := l.Get(i).(types.Struct) assert.True(types.Bool(true).Equals(row.Get("T"))) assert.True(types.Bool(false).Equals(row.Get("F"))) } } ================================================ FILE: samples/go/csv/schema.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "encoding/csv" "fmt" "io" "math" "strconv" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) type schemaOptions []*typeCanFit func newSchemaOptions(fieldCount int) schemaOptions { options := make([]*typeCanFit, fieldCount, fieldCount) for i := 0; i < fieldCount; i++ { options[i] = &typeCanFit{true, true, true} } return options } func (so schemaOptions) Test(fields []string) { for i, t := range so { if i < len(fields) { t.Test(fields[i]) } } } func (so schemaOptions) MostSpecificKinds() KindSlice { kinds := make(KindSlice, len(so)) for i, t := range so { kinds[i] = t.MostSpecificKind() } return kinds } func (so schemaOptions) ValidKinds() []KindSlice { kinds := make([]KindSlice, len(so)) for i, t := range so { kinds[i] = t.ValidKinds() } return kinds } type typeCanFit struct { boolType bool numberType bool stringType bool } func (tc *typeCanFit) MostSpecificKind() types.NomsKind { if tc.boolType { return types.BoolKind } else if tc.numberType { return types.NumberKind } else { return types.StringKind } } func (tc *typeCanFit) ValidKinds() (kinds KindSlice) { if tc.numberType { kinds = append(kinds, types.NumberKind) } if tc.boolType { kinds = append(kinds, types.BoolKind) } kinds = append(kinds, types.StringKind) return kinds } func (tc *typeCanFit) Test(value string) { tc.testNumbers(value) tc.testBool(value) } func (tc *typeCanFit) testNumbers(value string) { if !tc.numberType { return } fval, err := strconv.ParseFloat(value, 64) if err != nil { tc.numberType = false return } if fval > math.MaxFloat64 { tc.numberType = false } } func (tc *typeCanFit) testBool(value string) { if !tc.boolType { return } _, err := strconv.ParseBool(value) tc.boolType = err == nil } func GetSchema(r *csv.Reader, numSamples int, numFields int) KindSlice { so := newSchemaOptions(numFields) for i := 0; i < numSamples; i++ { row, err := r.Read() if err == io.EOF { break } so.Test(row) } return so.MostSpecificKinds() } func GetFieldNamesFromIndices(headers []string, indices []int) []string { result := make([]string, len(indices)) for i, idx := range indices { result[i] = headers[idx] } return result } // combinations - n choose m combination without repeat - emit all possible `length` combinations from values func combinationsWithLength(values []int, length int, emit func([]int)) { n := len(values) if length > n { return } indices := make([]int, length) for i := range indices { indices[i] = i } result := make([]int, length) for i, l := range indices { result[i] = values[l] } emit(result) for { i := length - 1 for ; i >= 0 && indices[i] == i+n-length; i -= 1 { } if i < 0 { return } indices[i] += 1 for j := i + 1; j < length; j += 1 { indices[j] = indices[j-1] + 1 } for ; i < len(indices); i += 1 { result[i] = values[indices[i]] } emit(result) } } // combinationsLengthsFromTo - n choose m combination without repeat - emit all possible combinations of all lengths from smallestLength to largestLength (inclusive) func combinationsLengthsFromTo(values []int, smallestLength, largestLength int, emit func([]int)) { for i := smallestLength; i <= largestLength; i++ { combinationsWithLength(values, i, emit) } } func makeKeyString(row []string, indices []int, separator string) string { var result string for _, i := range indices { result += separator result += row[i] } return result } // FindPrimaryKeys reads numSamples from r, using the first numFields and returns slices of []int indices that are primary keys for those samples func FindPrimaryKeys(r *csv.Reader, numSamples, maxLenPrimaryKeyList, numFields int) [][]int { dataToTest := make([][]string, 0, numSamples) for i := int(0); i < numSamples; i++ { row, err := r.Read() if err == io.EOF { break } dataToTest = append(dataToTest, row) } indices := make([]int, numFields) for i := int(0); i < numFields; i++ { indices[i] = i } pksFound := make([][]int, 0) combinationsLengthsFromTo(indices, 1, maxLenPrimaryKeyList, func(combination []int) { keys := make(map[string]bool, numSamples) for _, row := range dataToTest { key := makeKeyString(row, combination, "$&$") if _, ok := keys[key]; ok { return } keys[key] = true } // need to copy the combination because it will be changed by caller pksFound = append(pksFound, append([]int{}, combination...)) }) return pksFound } // StringToValue takes a piece of data as a string and attempts to convert it to a types.Value of the appropriate types.NomsKind. func StringToValue(s string, k types.NomsKind) (types.Value, error) { switch k { case types.NumberKind: if s == "" { return types.Number(float64(0)), nil } fval, err := strconv.ParseFloat(s, 64) if err != nil { return nil, fmt.Errorf("Could not parse '%s' into number (%s)", s, err) } return types.Number(fval), nil case types.BoolKind: // TODO: This should probably be configurable. switch s { case "true", "1", "y", "yes", "Y", "YES": return types.Bool(true), nil case "false", "0", "n", "no", "N", "NO", "": return types.Bool(false), nil default: return nil, fmt.Errorf("Could not parse '%s' into bool", s) } case types.StringKind: return types.String(s), nil default: d.Panic("Invalid column type kind:" + types.KindToString[k]) } panic("not reached") } ================================================ FILE: samples/go/csv/schema_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "fmt" "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestSchemaDetection(t *testing.T) { assert := assert.New(t) test := func(input [][]string, expect []KindSlice) { options := newSchemaOptions(len(input[0])) for _, values := range input { options.Test(values) } assert.Equal(expect, options.ValidKinds()) } test( [][]string{ {"foo", "1", "5"}, {"bar", "0", "10"}, {"true", "1", "23"}, {"1", "1", "60"}, {"1.1", "false", "75"}, }, []KindSlice{ {types.StringKind}, {types.BoolKind, types.StringKind}, { types.NumberKind, types.StringKind, }, }, ) test( [][]string{ {"foo"}, {"bar"}, {"true"}, {"1"}, {"1.1"}, }, []KindSlice{ {types.StringKind}, }, ) test( [][]string{ {"true"}, {"1"}, {"1.1"}, }, []KindSlice{ {types.StringKind}, }, ) test( [][]string{ {"true"}, {"false"}, {"True"}, {"False"}, {"TRUE"}, {"FALSE"}, {"1"}, {"0"}, }, []KindSlice{ {types.BoolKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"1.1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"1.1"}, {"4.940656458412465441765687928682213723651e-50"}, {"-4.940656458412465441765687928682213723651e-50"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"1.1"}, {"1.797693134862315708145274237317043567981e+102"}, {"-1.797693134862315708145274237317043567981e+102"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"1.1"}, {"1.797693134862315708145274237317043567981e+309"}, {"-1.797693134862315708145274237317043567981e+309"}, }, []KindSlice{ { types.StringKind}, }, ) test( [][]string{ {"1"}, {"0"}, }, []KindSlice{ { types.NumberKind, types.BoolKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"0"}, {"-0"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"280"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"-180"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"33000"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"-44000"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"2547483648"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {"1"}, {"-4347483648"}, {"0"}, {"-1"}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {fmt.Sprintf("%d", uint64(1<<63))}, {fmt.Sprintf("%d", uint64(1<<63)+1)}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) test( [][]string{ {fmt.Sprintf("%d", uint64(1<<32))}, {fmt.Sprintf("%d", uint64(1<<32)+1)}, }, []KindSlice{ { types.NumberKind, types.StringKind}, }, ) } func TestCombinationsWithLength(t *testing.T) { assert := assert.New(t) test := func(input []int, length int, expect [][]int) { combinations := make([][]int, 0) combinationsWithLength(input, length, func(combination []int) { combinations = append(combinations, append([]int{}, combination...)) }) assert.Equal(expect, combinations) } test([]int{0}, 1, [][]int{ {0}, }) test([]int{1}, 1, [][]int{ {1}, }) test([]int{0, 1}, 1, [][]int{ {0}, {1}, }) test([]int{0, 1}, 2, [][]int{ {0, 1}, }) test([]int{70, 80, 90, 100}, 1, [][]int{ {70}, {80}, {90}, {100}, }) test([]int{70, 80, 90, 100}, 2, [][]int{ {70, 80}, {70, 90}, {70, 100}, {80, 90}, {80, 100}, {90, 100}, }) test([]int{70, 80, 90, 100}, 3, [][]int{ {70, 80, 90}, {70, 80, 100}, {70, 90, 100}, {80, 90, 100}, }) } func TestCombinationsWithLengthFromTo(t *testing.T) { assert := assert.New(t) test := func(input []int, smallestLength, largestLength int, expect [][]int) { combinations := make([][]int, 0) combinationsLengthsFromTo(input, smallestLength, largestLength, func(combination []int) { combinations = append(combinations, append([]int{}, combination...)) }) assert.Equal(expect, combinations) } test([]int{0}, 1, 1, [][]int{ {0}, }) test([]int{1}, 1, 1, [][]int{ {1}, }) test([]int{0, 1}, 1, 2, [][]int{ {0}, {1}, {0, 1}, }) test([]int{0, 1}, 2, 2, [][]int{ {0, 1}, }) test([]int{70, 80, 90, 100}, 1, 3, [][]int{ {70}, {80}, {90}, {100}, {70, 80}, {70, 90}, {70, 100}, {80, 90}, {80, 100}, {90, 100}, {70, 80, 90}, {70, 80, 100}, {70, 90, 100}, {80, 90, 100}, }) } ================================================ FILE: samples/go/csv/write.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "encoding/csv" "fmt" "io" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/types" ) func getElemDesc(s types.Collection, index int) types.StructDesc { t := types.TypeOf(s).Desc.(types.CompoundDesc).ElemTypes[index] if types.StructKind != t.TargetKind() { d.Panic("Expected StructKind, found %s", t.Kind()) } return t.Desc.(types.StructDesc) } // GetListElemDesc ensures that l is a types.List of structs, pulls the types.StructDesc that describes the elements of l out of vr, and returns the StructDesc. func GetListElemDesc(l types.List, vr types.ValueReader) types.StructDesc { return getElemDesc(l, 0) } // GetMapElemDesc ensures that m is a types.Map of structs, pulls the types.StructDesc that describes the elements of m out of vr, and returns the StructDesc. // If m is a nested types.Map of types.Map, then GetMapElemDesc will descend the levels of the enclosed types.Maps to get to a types.Struct func GetMapElemDesc(m types.Map, vr types.ValueReader) types.StructDesc { t := types.TypeOf(m).Desc.(types.CompoundDesc).ElemTypes[1] if types.StructKind == t.TargetKind() { return t.Desc.(types.StructDesc) } else if types.MapKind == t.TargetKind() { _, v := m.First() return GetMapElemDesc(v.(types.Map), vr) } panic(fmt.Sprintf("Expected StructKind or MapKind, found %s", t.Kind().String())) } func writeValuesFromChan(structChan chan types.Struct, sd types.StructDesc, comma rune, output io.Writer) { fieldNames := getFieldNamesFromStruct(sd) csvWriter := csv.NewWriter(output) csvWriter.Comma = comma if csvWriter.Write(fieldNames) != nil { d.Panic("Failed to write header %v", fieldNames) } record := make([]string, len(fieldNames)) for s := range structChan { i := 0 s.WalkValues(func(v types.Value) { record[i] = fmt.Sprintf("%v", v) i++ }) if csvWriter.Write(record) != nil { d.Panic("Failed to write record %v", record) } } csvWriter.Flush() if csvWriter.Error() != nil { d.Panic("error flushing csv") } } // WriteList takes a types.List l of structs (described by sd) and writes it to output as comma-delineated values. func WriteList(l types.List, sd types.StructDesc, comma rune, output io.Writer) { structChan := make(chan types.Struct, 1024) go func() { l.IterAll(func(v types.Value, index uint64) { structChan <- v.(types.Struct) }) close(structChan) }() writeValuesFromChan(structChan, sd, comma, output) } func sendMapValuesToChan(m types.Map, structChan chan<- types.Struct) { m.IterAll(func(k, v types.Value) { if subMap, ok := v.(types.Map); ok { sendMapValuesToChan(subMap, structChan) } else { structChan <- v.(types.Struct) } }) } // WriteMap takes a types.Map m of structs (described by sd) and writes it to output as comma-delineated values. func WriteMap(m types.Map, sd types.StructDesc, comma rune, output io.Writer) { structChan := make(chan types.Struct, 1024) go func() { sendMapValuesToChan(m, structChan) close(structChan) }() writeValuesFromChan(structChan, sd, comma, output) } func getFieldNamesFromStruct(structDesc types.StructDesc) (fieldNames []string) { structDesc.IterFields(func(name string, t *types.Type, optional bool) { if !types.IsPrimitiveKind(t.TargetKind()) { d.Panic("Expected primitive kind, found %s", t.TargetKind().String()) } fieldNames = append(fieldNames, name) }) return } ================================================ FILE: samples/go/csv/write_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package csv import ( "bytes" "encoding/csv" "fmt" "io" "io/ioutil" "os" "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/clienttest" "github.com/stretchr/testify/suite" ) const ( TEST_ROW_STRUCT_NAME = "row" TEST_ROW_FIELDS = "anid,month,rainfall,year" TEST_DATA_SIZE = 200 TEST_YEAR = 2012 ) func TestCSVWrite(t *testing.T) { suite.Run(t, &csvWriteTestSuite{}) } type csvWriteTestSuite struct { clienttest.ClientTestSuite fieldTypes []*types.Type rowStructDesc types.StructDesc comma rune tmpFileName string } func typesToKinds(ts []*types.Type) KindSlice { kinds := make(KindSlice, len(ts)) for i, t := range ts { kinds[i] = t.TargetKind() } return kinds } func (s *csvWriteTestSuite) SetupTest() { input, err := ioutil.TempFile(s.TempDir, "") d.Chk.NoError(err) s.tmpFileName = input.Name() defer input.Close() fieldNames := strings.Split(TEST_ROW_FIELDS, ",") s.fieldTypes = []*types.Type{types.StringType, types.NumberType, types.NumberType, types.NumberType} fields := make([]types.StructField, len(fieldNames)) for i, name := range fieldNames { fields[i] = types.StructField{ Name: name, Type: s.fieldTypes[i], } } rowStructType := types.MakeStructType(TEST_ROW_STRUCT_NAME, fields...) s.rowStructDesc = rowStructType.Desc.(types.StructDesc) s.comma, _ = StringToRune(",") createCsvTestExpectationFile(input) } func (s *csvWriteTestSuite) TearDownTest() { os.Remove(s.tmpFileName) } func createCsvTestExpectationFile(w io.Writer) { _, err := io.WriteString(w, TEST_ROW_FIELDS) d.Chk.NoError(err) _, err = io.WriteString(w, "\n") d.Chk.NoError(err) for i := 0; i < TEST_DATA_SIZE; i++ { _, err = io.WriteString(w, fmt.Sprintf("a - %3d,%d,%d,%d\n", i, i%12, i%32, TEST_YEAR+i%4)) d.Chk.NoError(err) } } func startReadingCsvTestExpectationFile(s *csvWriteTestSuite) (cr *csv.Reader, headers []string) { res, err := os.Open(s.tmpFileName) d.PanicIfError(err) cr = NewCSVReader(res, s.comma) headers, err = cr.Read() d.PanicIfError(err) return } func createTestList(s *csvWriteTestSuite) types.List { storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) cr, headers := startReadingCsvTestExpectationFile(s) l := ReadToList(cr, TEST_ROW_STRUCT_NAME, headers, typesToKinds(s.fieldTypes), db, LIMIT) return l } func createTestMap(s *csvWriteTestSuite) types.Map { storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) cr, headers := startReadingCsvTestExpectationFile(s) return ReadToMap(cr, TEST_ROW_STRUCT_NAME, headers, []string{"anid"}, typesToKinds(s.fieldTypes), db, LIMIT) } func createTestNestedMap(s *csvWriteTestSuite) types.Map { storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) cr, headers := startReadingCsvTestExpectationFile(s) return ReadToMap(cr, TEST_ROW_STRUCT_NAME, headers, []string{"anid", "year"}, typesToKinds(s.fieldTypes), db, LIMIT) } func verifyOutput(s *csvWriteTestSuite, r io.Reader) { res, err := os.Open(s.tmpFileName) d.PanicIfError(err) actual, err := ioutil.ReadAll(r) d.Chk.NoError(err) expected, err := ioutil.ReadAll(res) d.Chk.NoError(err) s.True(string(expected) == string(actual), "csv files are different") } func (s *csvWriteTestSuite) TestCSVWriteList() { l := createTestList(s) w := new(bytes.Buffer) s.True(TEST_DATA_SIZE == l.Len(), "list length") WriteList(l, s.rowStructDesc, s.comma, w) verifyOutput(s, w) } func (s *csvWriteTestSuite) TestCSVWriteMap() { m := createTestMap(s) w := new(bytes.Buffer) s.True(TEST_DATA_SIZE == m.Len(), "map length") WriteMap(m, s.rowStructDesc, s.comma, w) verifyOutput(s, w) } func (s *csvWriteTestSuite) TestCSVWriteNestedMap() { m := createTestNestedMap(s) w := new(bytes.Buffer) s.True(TEST_DATA_SIZE == m.Len(), "nested map length") WriteMap(m, s.rowStructDesc, s.comma, w) verifyOutput(s, w) } ================================================ FILE: samples/go/decent/README.md ================================================ # About This directory contains two sample applications that demonstrate using Noms in a decentralized environment. Both applications implement multiuser chat, using different strategies. `p2p-chat` is the simplest possible example: a fully local noms replica is run on each node, and all nodes synchronize continuously with each other over HTTP. `ipfs-chat` backs Noms with the [IPFS](https://ipfs.io/) network, so that nodes don't have to keep a full local replica of all data. However, because [Filecoin](http://filecoin.io/) doesn't yet exist, *some node* does have to keep a full replica, so ipfs-chat has a `daemon` mode so that you can run a persistent node somewhere to be the replica of last resort. ================================================ FILE: samples/go/decent/data/godfather.html ================================================

	THE GODFATHER
	_____________

	Screenplay

	by

	MARIO PUZO

	and

	FRANCIS FORD COPPOLA



















THIRD DRAFT				PARAMOUNT PICTURES
					1 Gulf and Western Plaza
March 29, 1971				New York, New York 10019





	INT DAY: DON'S OFFICE (SUMMER 1945)

	The PARAMOUNT Logo is presented austerely over a black
	background.  There is a moment's hesitation, and then the
	simple words in white lettering:

				  THE GODFATHER

	While this remains, we hear: "I believe in America."
	Suddenly we are watching in CLOSE VIEW, AMERIGO BONASERA, a
	man of sixty, dressed in a black suit, on the verge of great
	emotion.

				BONASERA
		America has made my fortune.

	As he speaks, THE VIEW imperceptibly begins to loosen.

				BONASERA
		I raised my daughter in the American
		fashion; I gave her freedom, but
		taught her never to dishonor her
		family.  She found a boy friend,
		not an Italian.  She went to the
		movies with him, stayed out late.
		Two months ago he took her for a
		drive, with another boy friend.
		They made her drink whiskey and
		then they tried to take advantage
		of her.  She resisted; she kept her
		honor.  So they beat her like an
		animal.  When I went to the hospital
		her nose was broken, her jaw was
		shattered and held together by
		wire, and she could not even weep
		because of the pain.

	He can barely speak; he is weeping now.

				BONASERA
		I went to the Police like a good
		American.  These two boys were
		arrested and brought to trial.  The
		judge sentenced them to three years
		in prison, and suspended the
		sentence.  Suspended sentence!
		They went free that very day.  I
		stood in the courtroom like a fool,
		and those bastards, they smiled at
		me.  Then I said to my wife, for
		Justice, we must go to The Godfather.

	By now, THE VIEW is full, and we see Don Corleone's office
	in his home.

	The blinds are closed, and so the room is dark, and with
	patterned shadows.  We are watching BONASERA over the
	shoulder of DON CORLEONE.  TOM HAGEN sits near a small
	table, examining some paperwork, and SONNY CORLEONE stands
	impatiently by the window nearest his father, sipping from a
	glass of wine.  We can HEAR music, and the laughter and
	voices of many people outside.

				DON CORLEONE
		Bonasera, we know each other for
		years, but this is the first time
		you come to me for help.  I don't
		remember the last time you invited
		me to your house for coffee...even
		though our wives are friends.

				BONASERA
		What do you want of me?  I'll give
		you anything you want, but do what
		I ask!

				DON CORLEONE
		And what is that Bonasera?

	BONASERA whispers into the DON's ear.

				DON CORLEONE
		No.  You ask for too much.

				BONASERA
		I ask for Justice.

				DON CORLEONE
		The Court gave you justice.

				BONASERA
		An eye for an eye!

				DON CORLEONE
		But your daughter is still alive.

				BONASERA
		Then make them suffer as she
		suffers.  How much shall I pay you.

	Both HAGEN and SONNY react.

				DON CORLEONE
		You never think to protect yourself
		with real friends.  You think it's
		enough to be an American.  All
		right, the Police protects you,
		there are Courts of Law, so you
		don't need a friend like me.
		But now you come to me and say Don
		Corleone, you must give me justice.
		And you don't ask in respect or
		friendship.  And you don't think to
		call me Godfather; instead you come
		to my house on the day my daughter
		is to be married and you ask me to
		do murder...for money.

				BONASERA
		America has been good to me...

				DON CORLEONE
		Then take the justice from the
		judge, the bitter with the sweet,
		Bonasera.  But if you come to me
		with your friendship, your loyalty,
		then your enemies become my enemies,
		and then, believe me, they would
		fear you...

	Slowly, Bonasera bows his head and murmurs.

				BONASERA
		Be my friend.

				DON CORLEONE
		Good.  From me you'll get Justice.

				BONASERA
		Godfather.

				DON CORLEONE
		Some day, and that day may never
		come, I would like to call upon you
		to do me a service in return.

	EXT DAY: MALL (SUMMER 1945)

	A HIGH ANGLE of the CORLEONE MALL in bright daylight.  There
	are at least five hundred guests filling the main courtyard
	and gardens.  There is music and laughing and dancing and
	countless tables covered with food and wine.

	DON CORLEONE stands at the Gate, flanked on either side by a
	son: FREDO and SONNY, all dressed in the formal attire of
	the wedding party.  He warmly shakes the hands, squeezes the
	hands of the friends and guests, pinches the cheeks of the
	children, and makes them all welcome.  They in turn carry
	with them gallons of homemade wine, cartons of freshly baked
	bread and pastries, and enormous trays of Italian delicacies.

	The entire family poses for a family portrait: DON CORLEONE,
	MAMA, SONNY, his wife, SANDRA, and their children, TOM HAGEN
	and his wife, THERESA, and their BABY; CONSTANZIA, the
	bride, and her bridegroom, CARLO RIZZI.  As they move into
	the pose, THE DON seems preoccupied.

				DON CORLEONE
		Where's Michael?

				SONNY
		He'll be here Pop, it's still early.

				DON CORLEONE
		Then the picture will wait for him.

	Everyone in the group feels the uneasiness as the DON moves
	back to the house.  SONNY gives a delicious smile in the
	direction of the Maid-of-Honor, LUCY MANCINI.  She returns
	it.  Then he moves to his wife.

				SONNY
		Sandra, watch the kids.  They're
		running wild.

				SANDRA
		You watch yourself.

	HAGEN kisses his WIFE, and follows THE DON, passing the wine
	barrels, where a group of FOUR MEN nervously wait.  TOM
	crooks a finger at NAZORINE, who doublechecks that he is
	next, straightens, and follows HAGEN.

	EXT DAY: MALL ENTRANCE (SUMMER 1945)

	Outside the main gate of the Mall, SEVERAL MEN in suits,
	working together with a MAN in a dark sedan, walk in and out
	of the rows of parked cars, writing license plate numbers
	down in their notebooks.  We HEAR the music and laughter
	coming from the party in the distance.

	A MAN stops at a limousine and copies down the number.

	BARZINI, dignified in a black homburg, is always under the
	watchful eyes of TWO BODYGUARDS as he makes his way to
	embrace DON CORLEONE in the courtyard.

	The MEN walk down another row of parked cars.  Put another
	number in the notebook.  A shiney new Cadillac with wooden
	bumpers.

	PETER CLEMENZA, dancing the Tarantella joyously, bumping
	bellies with the ladies.

				CLEMENZA
		Paulie...wine...WINE.

	He mops his sweating forehead with a big handkerchief.
	PAULIE hustles, gets a glass of icy black wine, and brings
	it to him.

				PAULIE
		You look terrif on the floor!

				CLEMENZA
		What are you, a dance judge?  Go do
		your job; take a walk around the
		neighborhood... see everything is
		okay.

	PAULIE nods and leaves; CLEMENZA takes a breath, and leaps
	back into the dance.

	The MEN walk down another row of parked cars.  Put another
	number in the notebook.

	TESSIO, a tall, gentle-looking man, dances with a NINE-YEAR-
	OLD GIRL, her little black party shoes planted on his
	enormous brown shoes.

	The MEN move on to other parked cars, when SONNY storms out
	of the gate, his face flushed with anger, followed by
	CLEMENZA and PAULIE.

				SONNY
		Buddy, this is a private party.

	The MAN doesn't answer, but points to the DRIVER of the
	sedan.  SONNY menacingly thrusts his reddened face at him.
	The DRIVER merely flips open his wallet to a greed card,
	without saying a word.  SONNY steps back, spits on the
	ground, turns, and walks away, followed by CLEMENZA, PAULIE,
	and another TWO MEN.  He doesn't say a thing for most of the
	walk back into the courtyard, and then, muttered to PAULIE.

				SONNY
		Goddamn FBI...don't respect nothing.

	INT DAY: DON'S OFFICE (SUMMER 1945)

	DON CORLEONE sits quietly behind his massive desk in the
	dark study.

				NAZORINE
		...a fine boy from Sicily, captured
		by the American Army, and sent to
		New Jersey as a prisoner of war...

				DON CORLEONE
		Nazorine, my friend, tell me what I
		can do.

				NAZORINE
		Now that the war is over, Enzo,
		this boy is being repatriated to
		Italy.  And you see, Godfather...
			  (he wrings his hands,
			  unable to express himself)
		He...my daughter...they...

				DON CORLEONE
		You want him to stay in this country.

				NAZORINE
		Godfather, you understand everything.

				DON CORLEONE
		Tom, what we need is an Act of
		Congress to allow Enzo to become a
		citizen.

				NAZORINE
			  (impressed)
		An Act of Congress!

				HAGEN
			  (nodding)
		It will cost.

	The DON shrugs; such are the way with those things; NAZORINE
	nods.

				NAZORINE
		Is that all?  Godfather, thank
		you...
			  (backing out, enthusiastically)
		Oh, wait till you see the cake I
		made for your beautiful daughter!

	NAZORINE backs out, all smiles, and nods to the GODFATHER.
	DON CORLEONE rises and moves to the Venetian blinds.

				HAGEN
		Who do I give this job to?

	The DON moves to the windows, peeking out through the blinds.

				DON CORLEONE
		Not to one of our paisans...give it
		to a Jew Congressman in another
		district.  Who else is on the list
		for today?

	The DON is peeking out to the MEN around the barrel, waiting
	to see him.

				HAGEN
		Francesco Nippi.  His nephew has
		been refused parole.  A bad case.

	EXT DAY: MALL (SUMMER 1945)

	WHAT HE SEES:

	NIPPI waits nervously by the barrel.

				HAGEN (O.S.)
		His father worked with you in the
		freight yards when you were young.

	LUCA BRASI sitting alone, grotesque and quiet.

				HAGEN (O.S.)
		He's not on the list, but Luca
		Brasi wants to see you.

	INT DAY: DON'S OFFICE (SUMMER 1945)

	The DON turns to HAGEN.

				DON CORLEONE
		Is it necessary?

				HAGEN
		You understand him better than
		anyone.

	The DON nods to this.  Turns back to the blinds and peeks out.

	EXT DAY: MALL (SUMMER 1945)

	WHAT HE SEES:

	MICHAEL CORLEONE, dressed in the uniform of a Marine Captain,
	leads KAY ADAMS through the wedding crowd, occasionally
	stopped and greeted by FRIENDS of the family.

	INT DAY: DON'S OFFICE (SUMMER 1945)

	The DON, inside the office, peering through the blinds,
	following them.

	EXT DAY: MALL (SUMMER 1945)

	MICHAEL moves through the crowd, embraces MAMA and introduces
	her to his GIRL.

	EXT DAY: OFFICE WINDOW (SUMMER 1945)

	The DON's eyes peering through the blinds.

	EXT DAY: MALL TABLES (SUMMER 1945)

	KAY and MICHAEL settle by a table on the edge of the wedding,
	burdened down with plates of food and glasses and wine.  She
	is exhilarated by the enormity of the affair, the music and
	the vitality.

				KAY
		I've never seen anything like it.

				MICHAEL
		I told you I had a lot of relatives.

	KAY looking about, a young and lively thing in a gift shop.
	We see what she sees:

	Her interest is caught by THREE MEN standing by the wine
	barrels.

				KAY
			  (amused)
		Michael, what are those men doing?

				MICHAEL
		They're waiting to see my father.

				KAY
		They're talking to themselves.

				MICHAEL
		They're going to talk to my father,
		which means they're going to ask
		him for something, which means they
		better get it right.

				KAY
		Why do they bother him on a day
		like this?

				MICHAEL
		Because they know that no Sicilian
		will refuse a request on his
		daughter's wedding day.

	EXT DAY: WEDDING PARTY (SUMMER 1945)

	CONNIE CORLEONE, the Bride, is pressing the bodice of her
	overly-fluffy white gown against the groom, CARLO RIZZI.  He
	is bronzed, with curly blondish hair and lovely dimples.
	She absolutely adores him and can barely take her eyes from
	him long enough to thank the various GUESTS for the white
	envelopes they are putting into the large white purse she
	holds.  In fact, if we watch carefully, we can see that one
	of her hands is slid under his jacket, and into his shirt,
	where she is provocatively rubbing the hair on his chest.
	CARLO, on the other hand, has his blue eyes trained on the
	bulging envelopes, and is trying to guess how much cash the
	things hold.

	Discreetly, he moves her hand off of his skin.

				CARLO
			  (whispered)
		Cut it out, Connie.

	The purse, looped by a ribbon of silk around CONNIE's arm,
	is fat with money.

				PAULIE (O.S.)
		What do you think?  Twenty grand?

	A little distance away, a young man, PAULIE GATTO, catches a
	prosciutto sandwich thrown by a friend, without once taking
	eyes from the purse.

				PAULIE
		Who knows?  Maybe more.  Twenty,
		thirty grand in small bills cash in
		that silk purse.  Holy Toledo, if
		this was somebody else's wedding!

	SONNY is sitting at the Wedding Dias, talking to LUCY
	MANCINI, the Maid of Honor.  Every once in a while he
	glances across the courtyard, where his WIFE is talking with
	some WOMEN.

	He bends over and whispers something into LUCY's ear.

	SANDRA and the WOMEN are in the middle of a big, ribald laugh.

				WOMAN
		Is it true what they say about your
		husband, Sandra?

	SANDRA's hands separate with expanding width further and
	further apart until she bursts into a peal of laughter.
	Through her separated hands she sees the Wedding Dais.
	SONNY and LUCY are gone.

	INT DAY: DON'S HALL & STAIRS (SUMMER 1945)

	The empty hallway.  The bathroom door opens and LUCY
	surreptitiously steps out.

	She looks up where SONNY is standing on the second landing,
	motioning for her to come up.

	She lifts her petticoats off the ground and hurries upstairs.

	EXT DAY: MALL TABLES (SUMMER 1945)

	KAY and MICHAEL.

				KAY
			  (in a spooky low tone)
		Michael, that scarey guy...Is he a
		relative?

	She has picked out LUCA BRASI.

				MICHAEL
		No.  His name is Luca Brasi.  You
		wouldn't like him.

				KAY
			  (Excited)
		Who is he?

				MICHAEL
			  (Sizing her up)
		You really want to know?

				KAY
		Yes.  Tell me.

				MICHAEL
		You like spaghetti?

				KAY
		You know I love spaghetti.

				MICHAEL
		Then eat your spaghetti and I'll
		tell you a Luca Brasi story.

	She starts to eat her spaghetti.

	She begins eating, looking at him eagerly.

				MICHAEL
		Once upon a time, about fifteen
		years ago some people wanted to
		take over my father's olive oil
		business.  They had Al Capone send
		some men in from Chicago to kill my
		father, and they almost did.

				KAY
		Al Capone!

				MICHAEL
		My Father sent Luca Brasi after
		them.  He tied the two Capone men
		hand and foot, and stuffed small
		bath towels into their mouths.
		Then he took an ax, and chopped one
		man's feet off...

				KAY
		Michael...

				MICHAEL
		Then the legs at the knees...

				KAY
		Michael you're trying to scare me...

				MICHAEL
		Then the thighs where they joined
		the torso.

				KAY
		Michael, I don't want to hear
		anymore...

				MICHAEL
		Then Luca turned to the other man...

				KAY
		Michael, I love you.

				MICHAEL
		...who out of sheer terror had
		swallowed the bath towel in his
		mouth and suffocated.

	The smile on his face seems to indicate that he is telling a
	tall story.

				KAY
		I never know when you're telling me
		the truth.

				MICHAEL
		I told you you wouldn't like him.

				KAY
		He's coming over here!

	LUCA comes toward them to meet TOM HAGEN halfway, just near
	their table.

				MICHAEL
		Tom...Tom, I'd like you to meet Kay
		Adams.

				KAY
			  (having survived LUCA)
		How do you do.

				MICHAEL
		My brother, Tom Hagen.

				HAGEN
		Hello Kay.  Your father's inside,
		doing some business.
			  (privately)
		He's been asking for you.

				MICHAEL
		Thanks Tom.

	HAGEN smiles and moves back to the house, LUCA ominously
	following.

				KAY
		If he's your brother, why does he
		have a different name?

				MICHAEL
		My brother Sonny found him living
		in the streets when he was a kid,
		so my father took him in.  He's a
		good lawyer.

	INT DAY: DON'S OFFICE (SUMMER 1945)

	DON CORLEONE at the window.  He has seen the intimacy of the
	YOUNG COUPLE.

				LUCA (O.S.)
		Don Corleone...

	THE DON turns to the stiffly formal LUCA, and he moves
	forward to kiss his hand.  He takes the envelope from his
	jacket, holds it out, but does not release it until he makes
	a formal speech.

				LUCA
			  (with difficulty)
		Don Corleone...I am honored, and
		grateful...that you invited me to
		your home...on the wedding day of
		your...daughter.
		May their first child...be a
		masculine child.  I pledge my never
		ending loyalty.
			  (he offers the envelope)
		For your daughter's bridal purse.

				DON CORLEONE
		Thank you, Luca, my most valued
		friend.

	THE DON takes it, and then LUCA's hand, which he squeezes so
	tightly we might imagine it to be painful.

				LUCA
		Let me leave you, Don Corleone.  I
		know you are busy.

	He turns, almost an about-face, and leaves the study with
	the same formality he entered with.  DON CORLEONE breathes
	more easily, and gives the thick envelope to HAGEN.

				DON CORLEONE
		I'm sure it's the most generous
		gift today.

				HAGEN
		The Senator called--apologized for
		not coming personally, but said
		you'd understand.  Also, some of
		the Judges...they've all sent gifts.
		And another call from Virgil
		Sollozzo.

	DON CORLEONE is not pleased.

				HAGEN
		The action is narcotics.  Sollozzo
		has contacts in Turkey for the
		poppy, in Sicily for the plants to
		process down to morphine or up to
		heroin.  Also he has access to this
		country.  He's coming to us for
		financial help, and some sort of
		immunity from the law.  For that we
		get a piece of the action, I
		couldn't find out how much.
		Sollozzo is vouched for by the
		Tattaglia family, and they may have
		a piece of the action.  They call
		Sollozzo the Turk.
		He's spent a lot of time in Turkey
		and is suppose to have a Turkish
		wife and kids.  He's suppose to be
		very quick with the knife, or was,
		when he was younger.  Only in
		matters of business and with some
		reasonable complaint.  Also he has
		an American wife and three children
		and he is a good family man.

	THE DON nods.

				HAGEN
		He's his own boss, and very
		competent.

				DON CORLEONE
		And with prison record.

				HAGEN
		Two terms; one in Italy, one in the
		United States.  He's known to the
		Government as a top narcotics man.
		That could be a plus for us; he
		could never get immunity to testify.

				DON CORLEONE
		When did he call?

				HAGEN
		This morning.

				DON CORLEONE
		On a day like this.  Consiglero, do
		you also have in your notes the the
		Turk made his living from
		Prostitution before the war, like
		the Tattaglias do now.  Write that
		down before you forget it.  The
		Turk will wait.

	We now begin to hear a song coming over the loud-speakers
	from outside.  In Italian, with unmistakable style.

				DON CORLEONE
		What that?  It sounds like Johnny.

	He moves to the window, pulls the blinds up, flooding the
	room with light.

				DON CORLEONE
		It is Johnny.  He came all the way
		from California to be at the wedding.

				HAGEN
		Should I bring him in.

				DON CORLEONE
		No.  Let the people enjoy him.  You
		see?  He is a good godson.

				HAGEN
		It's been two years.  He's probably
		in trouble again.

	EXT DAY: MALL (SUMMER 1945)

	JOHNNY FONTANE on the bandstand, singing to the delight and
	excitement of the wedding GUESTS.

				KAY
		I didn't know your family knew
		Johnny Fontane.

				MICHAEL
		Sure.

				KAY
		I used to come down to New York
		whenever he sang at the Capitol and
		scream my head off.

				MICHAEL
		He's my father's godson; he owes
		him his whole career.

	JOHNNY finishes the song and the CROWD screams with delight.
	They call out for another when DON CORLEONE appears.

				DON CORLEONE
		My Godson has come three thousand
		miles to do us honor and no one
		thinks to wet his throat.

	At once a dozen wine glasses are offered to JOHNNY, who
	takes a sip from each as he moves to embrace his GODFATHER.

				JOHNNY
		I kept trying to call you after my
		divorce and Tom always said you
		were busy.  When I got the Wedding
		invitation I knew you weren't sore
		at me anymore, Godfather.

				DON CORLEONE
		Can I do something for you still?
		You're not too rich, or too famous
		that I can't help you?

				JOHNNY
		I'm not rich anymore, Godfather,
		and...my career, I'm almost washed
		up...

	He's very disturbed.  The GODFATHER indicates that he come
	with him to the office so no one will notice.  He turns to
	HAGEN.

				DON CORLEONE
		Tell Santino to come in with us.
		He should hear some things.

	They go, leaving HAGEN scanning the party looking for SONNY.

	INT DAY: DON'S OFFICE (SUMMER 1945)

	HAGEN glances up the staircase.

				HAGEN
		Sonny?

	Then he goes up.

	INT DAY: DON'S UPSTAIRS ROOM (SUMMER 1945)

	SONNY and LUCY are in a room upstairs; he has lifted her
	gown's skirts almost over her head, and has her standing
	against the door.  Her face peeks out from the layers of
	petticoats around it like a flower in ecstasy.

				LUCY
		Sonnyeeeeeeee.

	Her head bouncing against the door with the rhythm of his
	body.  But there is a knocking as well.  They stop, freeze
	in that position.

				HAGEN (O.S.)
		Sonny?  Sonny, you in there?

	INT DAY: DON'S UPSTAIRS HALLWAY (SUMMER 1945)

	Outside, HAGEN by the door.

				HAGEN
		The old man wants you; Johnny's
		here...he's got a problem.

				SONNY (O.S.)
		Okay.  One minute.

	HAGEN hesitates.  We HEAR LUCY's head bouncing against the
	door again.  TOM leaves.

	INT DAY: DON'S OFFICE (SUMMER 1945)

				DON CORLEONE
		ACT LIKE A MAN!  By Christ in
		Heaven, is it possible you turned
		out no better than a Hollywood
		finocchio.

	Both HAGEN and JOHNNY cannot refrain from laughing.  The DON
	smiles.  SONNY enters as noiselessly as possible, still
	adjusting his clothes.

				DON CORLEONE
		All right, Hollywood...Now tell me
		about this Hollywood Pezzonovanta
		who won't let you work.

				JOHNNY
		He owns the studio.  Just a month
		ago he bought the movie rights to
		this book, a best seller.  And the
		main character is a guy just like
		me.  I wouldn't even have to act,
		just be myself.

	The DON is silent, stern.

				DON CORLEONE
		You take care of your family?

				JOHNNY
		Sure.

	He glances at SONNY, who makes himself as inconspicuous as
	he can.

				DON CORLEONE
		You look terrible.  I want you to
		eat well, to rest.  And spend time
		with your family.  And then, at the
		end of the month, this big shot
		will give you the part you want.

				JOHNNY
		It's too late.  All the contracts
		have been signed, they're almost
		ready to shoot.

				DON CORLEONE
		I'll make him an offer he can't
		refuse.

	He takes JOHNNY to the door, pinching his cheek hard enough
	to hurt.

				DON CORLEONE
		Now go back to the party and leave
		it to me.

	He closes the door, smiling to himself.  Turns to HAGEN.

				DON CORLEONE
		When does my daughter leave with
		her bridegroom?

				HAGEN
		They'll cut the cake in a few
		minutes...leave right after that.
		Your new son-in-law, do we give him
		something important?

				DON CORLEONE
		No, give him a living.  But never
		let him know the family's business.
		What else, Tom?

				HAGEN
		I've called the hospital; they've
		notified Consiglere Genco's family
		to come and wait.  He won't last
		out the night.

	This saddens the DON.  He sighs.

				DON CORLEONE
		Genco will wait for me.  Santino,
		tell your brothers they will come
		with me to the hospital to see
		Genco.  Tell Fredo to drive the big
		car, and ask Johnny to come with us.

				SONNY
		And Michael?

				DON CORLEONE
		All my sons.
			  (to HAGEN)
		Tom, I want you to go to California
		tonight.  Make the arrangements.
		But don't leave until I come back
		from the hospital and speak to you.
		Understood?

				HAGEN
		Understood.

	EXT DAY: MALL (SUMMER 1945)

	Now all the wedding GUESTS excitedly clap their hands over
	the entrance of the cake: NAZORINE is beaming as he wheels
	in a serving table containing the biggest, gaudiest, most
	extravagant wedding cake ever baked, an incredible monument
	of his gratitude.  The CROWD is favorably impressed: they
	begin to clink their knives or forks against their glasses,
	in the traditional request for the Bride to cut the cake and
	kiss the Groom.  Louder and louder, five hundred forks
	hitting five hundred glasses.

	EXT DAY: MALL (SUMMER 1945)

	Silence.

	HIGH ANGLE ON THE MALL, late day.  The GUESTS are gone.  A
	single black car is in the courtyard.  FREDDIE is behind the
	driver's seat: the DON enters the car, looks at MICHAEL, who
	sits between SONNY and JOHNNY in the rear seat.

				DON CORLEONE
		Will your girl friend get back to
		the city all right?

				MICHAEL
		Tom said he'd take care of it.

	The DON pulls the door shut; and the car pulls out, through
	the gate of the great Corleone Mall.

	INT DAY: HOSPITAL CORRIDOR (SUMMER 1945)

	A long white hospital corridor, at the end of which we can
	see a grouping of FIVE WOMEN, some old and some young, but
	all plump and dressed in black.

	DON CORLEONE and his SONS move toward the end.  But then the
	DON slows, putting his hand on MICHAEL's shoulder.  MICHAEL
	stops and turns toward his FATHER.  The two looks at one
	another for some time.  SILENCE.  DON CORLEONE then lifts
	his hand, and slowly touches a particular medal on MICHAEL's
	uniform.

				DON CORLEONE
		What was this for?

				MICHAEL
		For bravery.

				DON CORLEONE
		And this?

				MICHAEL
		For killing a man.

				DON CORLEONE
		What miracles you do for strangers.

				MICHAEL
		I fought for my country.  It was my
		choice.

				DON CORLEONE
		And now, what do you choose to do?

				MICHAEL
		I'm going to finish school.

				DON CORLEONE
		Good.  When you are finished, come
		and talk to me.  I have hopes for
		you.

	Again they regard each other without a word.  MICHAEL turns,
	and continues on.  DON CORLEONE watches a moment, and then
	follows.

	INT DAY: HOSPITAL ROOM (SUMMER 1945)

	DON CORLEONE enters the hospital room, moving closest to OUR
	VIEW.  He is followed by his SONS, JOHNNY and the WOMEN.

				DON CORLEONE
			  (whispered)
		Genco, I've brought my sons to pay
		their respects.  And look, even
		Johnny Fontane, all the way from
		Hollywood.

	GENCO is a tiny, wasted skeleton of a man.  DON CORLEONE
	takes his bony hand, as the others arrange themselves around
	his bed, each clasping the other hand in turn.

				GENCO
		Godfather, Godfather, it's your
		daughter's wedding day, you cannot
		refuse me.  Cure me, you have the
		power.

				DON CORLEONE
		I have no such power...but Genco,
		don't fear death.

				GENCO
			  (with a sly wink)
		It's been arranged, then?

				DON CORLEONE
		You blaspheme.  Resign yourself.

				GENCO
		You need your old Consigliere.  Who
		will replace me?
			  (suddenly)
		Stay with me Godfather.  Help me
		meet death.  If he sees you, he
		will be frightened and leave me in
		peace.  You can say a word, pull a
		few strings, eh?  We'll outwit that
		bastard as we outwitted all those
		others.
			  (clutching his hand)
		Godfather, don't betray me.

	The DON motions all the others to leave the room.  They do.
	He returns his attention to GENCO, holding his hand and
	whispering things we cannot hear, as they wait for death.

	INT NIGHT: AIRPLANE (SUMMER 1945)

	FADE IN:

	The interior of a non-stop Constellation.  HAGEN is one of
	the very few passengers on this late flight.  He looks like
	any young lawyer on a business trip.  He is tired from the
	difficult preparation and duties that he has just executed
	during the wedding.  On the seat next to him is an enormous,
	bulging briefcase.  He closes his eyes.

	INT NIGHT: HONEYMOON HOTEL (SUMMER 1945)

	The honeymoon hotel: CARLO and CONNIE.  CARLO is in his
	undershorts, sitting up on the bed, anxiously taking the
	envelopes out of the silk bridal purse and counting the
	contents.  CONNIE prepares herself in the large marble
	bathroom.  She rubs her hands over his bronze shoulders, and
	tries to get his interest.

	INT NIGHT: DON'S OFFICE (SUMMER 1945)

	DON CORLEONE in his office.  LUCA BRASI sitting near to him.

				DON CORLEONE
		Luca, I am worried about this man
		Sollozzo.  Find out what you can,
		through the Tattaglias.  Let them
		believe you could be tempted away
		from the Corleone Family, if the
		right offer was made.  Learn what
		he has under his fingernails...

	INT NIGHT: MANCINI APT. HALL (SUMMER 1945)

	The hallway of an apartment building.  SONNY enters, climbs
	two steps at a time.  He knocks, and then whispers.

				SONNY
		It's me, Sonny.

	The door opens, and two lovely arms are around him, pulling
	him into the apartment.

	INT NIGHT: LUCA'S ROOM (WINTER 1945)

	LUCA BRASI's tiny room.  He is partly dressed.  He kneels
	and reaches under his bed and pulls out a small, locked
	trunk.  He opens it, and takes out a heavy, bullet-proof
	vest.  He puts it on, over his wool undershirt, and then
	puts on his shirt and jacket.  He takes his gun, quickly
	disassembles, checks, and reassembles it.  And leaves.

	INT NIGHT: DON'S OFFICE (SUMMER 1945)

	A CLOSE VIEW of DON CORLEONE thinking quietly.

	INT NIGHT: MOVING TRAIN (SUMMER 1945)

	MICHAEL and KAY on a train, speeding on their way to New
	Hampshire.

	INT NIGHT: SUBWAY (WINTER 1945)

	LUCA, in his bulky jacket, sitting quietly on an empty
	subway train.

	INT NIGHT: AIRPLANE (SUMMER 1945)

	HAGEN on the Constellation.  He reaches into his briefcase,
	and takes out several pictures and papers.

	One photograph is of a smiling man, JACK WOLTZ, linked arm
	in arm with fifteen movie stars on either side, including a
	lovely young child star to his immediate right.

	HAGEN considers other papers.

	INT NIGHT: DON'S OFFICE (SUMMER 1945)

	DON CORLEONE looks, and then moves HAGEN into an embrace.
	He straightens his arms and looks at TOM deeply.

				DON CORLEONE
		Remember my new Consigliere, a
		lawyer with his briefcase can steal
		more than a hundred men with guns.

	EXT DAY: WOLTZ ESTATE GATE (SUMMER 1945)

	JACK WOLTZ ESTATE.  HAGEN stands before the impressive gate,
	armed only with his briefcase.  A GATEMAN opens the gate,
	and TOM enters.

	EXT DAY: WOLTZ GARDENS (SUMMER 1945)

	HAGEN and WOLTZ comfortably stroll along beautiful formal
	gardens, martinis in hand.

				WOLTZ
		You should have told me your boss
		was Corleone, Tom, I had to check
		you out.  I thought you were just
		some third rate hustler Johnny was
		running in to bluff me.
			  (a piece of statuary)
		Florence, thirteenth century.
		Decorated the garden of a king.

	They cross the garden and head toward the stables.

				WOLTZ
		I'm going to show you something
		beautiful.

	They pass the stables, and come to rest by a stall with a
	huge bronze plaque attached to the outside wall: "KHARTOUM."
	TWO SECURITY GUARDS are positioned in chairs nearby; they
	rise as WOLTZ approaches.

				WOLTZ
		You like horses?  I like horses, I
		love 'em.  Beautiful, expensive
		Racehorses.

	The animal inside is truly beautiful.  WOLTZ whispers to him
	with true love in his voice.

				WOLTZ
		Khartoum...Kartoum...You are
		looking at six hundred thousand
		dollars on four hoofs.  I bet even
		Russian Czars never paid that kind
		of dough for a single horse.  But
		I'm not going to race him I'm going
		to put him out to Stud.

	INT NIGHT: WOLTZ DINING ROOM (SUMMER 1945)

	HAGEN and WOLTZ sit at an enormous dining room table,
	attended by SEVERAL SERVANTS.  Great paintings hang on the
	walls.  The meal is elaborate and sumptuous.

				HAGEN
		Mr. Corleone is Johnny's Godfather.
		That is very close, a very sacred
		religious relationship.

				WOLTZ
		Okay, but just tell him this is one
		favor I can't give.  But he should
		try me again on anything else.

				HAGEN
		He never asks a second favor when
		he has been refused the first.
		Understood?

				WOLTZ
		You smooth son of a bitch, let me
		lay it on the line for you, and
		your boss.  Johnny Fontane never
		gets that movie.  I don't care how
		many Dago, Guinea, wop Greaseball
		Goombahs come out of the woodwork!

				HAGEN
		I'm German-Irish.

				WOLTZ
		Okay my Kraut-Mick friend, Johnny
		will never get that part because I
		hate that pinko punk and I'm going
		to run him out of the Movies.  And
		I'll tell you why.  He ruined one
		of Woltz Brothers' most valuable
		proteges.  For five years I had
		this girl under training; singing
		lessons!  Acting lessons!  Dancing
		lessons!  We spent hundreds of
		thousands of dollars--I was going
		to make her a star.  I'll be even
		more frank, just to show you that
		I'm not a hard-hearted man, that it
		wasn't all dollars and cents.  That
		girl was beautiful and young and
		innocent and she was the greatest
		piece of ass I've ever ad and I've
		had them all over the world.  Then
		Johnny comes along with that olive
		oil voice and guinea charm and she
		runs off.  She threw it all away to
		make me look ridiculous.  A MAN IN
		MY POSITION CANNOT AFFORD TO BE
		MADE TO LOOK RIDICULOUS!

	EXT DAY: GENCO OLIVE OIL CO. (SUMMER 1945)

	An unimposing little building in New York City on Mott
	Street with a large old sign: "GENCO OLIVE OIL IMPORTS,
	INC." next to an open-faced fruit market.

	A dark Buick pulls up, and a single small man, whom we
	cannot see well because of the distance, gets out and enters
	the building.  This is VIRGIL SOLLOZZO.

	INT DAY: OLIVE OIL OFFICES (SUMMER 1945)

	Looking toward the staircase we can hear SOLLOZZO's footsteps
	before he actually rises into view.  He is a small man, very
	dark, with curly black hair.  But wiry, and tight and hard,
	and obviously very dangerous.  He is greeted at the head of
	the stairs by SONNY, who takes his hand and shakes it,
	introducing himself.  For a moment, there is a complex of
	handshaking quite formal, and whispered respectful
	introductions.  Finally, SOLLOZZO is taken into the DON's
	glass paneled office; the two principals are introduced.
	They are very respectful of one another.  Folding chairs are
	brought in by FREDDIE, and soon they are all sitting around
	in a circle; the DON, SOLLOZZO, SONNY, HAGEN, FREDDIE,
	CLEMENZA and TESSIO.  The DON is the slightest bit foolish
	with all his compatriots, whereas SOLLOZZO has brought no
	one.  Throughout all that transpires, however, it is clear
	that this scene is between two men: SOLLOZZO and DON CORLEONE.

				SOLLOZZO
		My business is heroin, I have poppy
		fields, laboratories in Narseilles
		and Sicily, ready to go into
		production.  My importing methods
		are as safe as these things can be,
		about five per cent loss.  The risk
		is nothing, the profits enormous.

				DON CORLEONE
		Why do you come to me?  Why do I
		deserve your generosity?

				SOLLOZZO
		I need two million dollars in
		cash...more important, I need a
		friend who has people in high
		places; a friend who can guarantee
		that if one of my employees be
		arrested, they would get only light
		sentences.  Be my friend.

				DON CORLEONE
		What percentages for my family?

				SOLLOZZO
		Thirty per cent.  In the first year
		your share would be four million
		dollars; then it would go up.

				DON CORLEONE
		And what is the percentage of the
		Tattaglia family?

	SOLLOZZO nods toward HAGEN.

				SOLLOZZO
		My compliments.  I'll take care of
		them from my share.

				DON CORLEONE
		So.  I receive 30 per cent just for
		finance and legal protection.  No
		worries about operations, is that
		what you tell me?

				SOLLOZZO
		If you think two million dollars in
		cash is just finance, I congratulate
		you Don Corleone.

	There is a long silence; in which each person present feels
	the tension.  The DON is about to give his answer.

				DON CORLEONE
		I said I would see you because I've
		heard you're a serious man, to be
		treated with respect...
			  (pause)
		But I'll say no to you.

	We feel this around the room.

				DON CORLEONE
		I'll give you my reasons.  I have
		many, many friends in Politics.
		But they wouldn't be so friendly if
		my business was narcotics instead
		of gambling.  They think gambling
		is something like liquor, a harmless
		vice...and they think narcotics is
		dirty business.

	SOLLOZZO takes a breath.

				DON CORLEONE
		No...how a man makes his living is
		none of my business.  But this
		proposition of yours is too risky.
		All the people in my family lived
		well the last ten years, I won't
		risk that out of greed.

				SOLLOZZO
		Are you worried about security for
		your million?

				DON CORLEONE
		No.

				SOLLOZZO
		The Tattaglias will guarantee your
		investment also.

	This startles SONNY; he blurts out.

				SONNY
		The Tattaglia family guarantees our
		investment?

	SOLLOZZO hears him first, and then very slowly turns to face
	him.  Everyone is the room knows that SONNY has stepped out
	of line.

				DON CORLEONE
		Young people are greedy, and they
		have no manners.  They speak when
		they should listen.  But I have a
		sentimental weakness for my
		children, and I've spoiled them, as
		you see.  But Signor Sollozzo, my
		no is final.

	SOLLOZZO nods, understands that this is the dismissal.  He
	glances one last time at SONNY.  He rises; all the others do
	as well.  He bows to the DON, shakes his hand, and formally
	takes his leave.  When the footsteps can no longer be heard:

	The DON turns to SONNY.

				DON CORLEONE
		Santino, never let anyone outside
		the family know what you are
		thinking.  I think your brain is
		going soft from all that comedy you
		play with that young girl.

	TWO OFFICE WORKERS are carrying an enormous floral display
	with the word "THANK YOU" spelled out in flowers.

				DON CORLEONE
		What is this nonsense?

				HAGEN
		It's from Johnny.  It was announced
		this morning.  He's going to play
		the lead in the new Woltz Brothers
		film.

	INT DAY: WOLTZ'S BEDROOM (SUMMER 1945)

	It is large, dominated by a huge bed, in which a man,
	presumably WOLTZ, is sleeping.  Soft light bathes the room
	from the large windows.  We move closer to him until we see
	his face, and recognize JACK WOLTZ.  He turns uncomfortably;
	mutters, feels something strange in his bedsheets.  Something
	wet.

	He wakens, feels the sheets with displeasure; they are wet.
	He looks at his hand; the wetness is blood.  He is
	frightened, pulls aside the covers, and sees fresh blood on
	his sheets and pajamas.  He grunts, pulls the puddle of
	blood in his bed.  He feels his own body frantically,
	moving, down, following the blood, until he is face to face
	with the great severed head of Khartoum lying at the foot of
	his bed.  Just blood from the hacked neck.  White reedy
	tendons show.  He struggles up to his elbows in the puddle
	of blood to see more clearly.  Froth covers the muzzle, and
	the enormous eyes of the animal are yellowed and covered
	with blood.

	WOLTZ tries to scream; but cannot.  No sound comes out.
	Then, finally and suddenly an ear-splitting scream of pure
	terror escapes from WOLTZ, who is rocking on his hands and
	knees in an uncontrolled fit, blood all over him.

	INT DAY: OLIVE OIL OFFICES (SUMMER 1945)

	CLOSE VIEW on the GODFATHER.  Nodding.

				DON CORLEONE
		Send Johnny my congratulations.

	----------------------------------------FADE OUT--------

	(SCENES 12 & 12 OMITTED)

	FADE IN:

	EXT DAY: FIFTH AVENUE (WINTER 1945)

	Fifth Avenue in the snow.  Christmas week.  People are
	bundled up with rosy faces, rushing to buy presents.

	KAY and MICHAEL exit a Fifth Avenue department store,
	carrying a stack of gaily wrapped gifts, arm in arm.

				KAY
		We have something for your mother,
		for Sonny, we have the tie for
		Fredo and Tom Hagen gets the
		Reynolds pen...

				MICHAEL
		And what do you want for Christmas?

				KAY
		Just you.

	They kiss.

	INT DAY: HOTEL ROOM (WINTER 1945)

	CLOSE ON a wooden radio, playing quiet Music.  THE VIEW PANS
	AROUND the dark hotel room, curtained against the daylight.

				MICHAEL (O.S.)
		We'll have a quiet, civil ceremony
		at the City Hall, no big fuss, no
		family, just a couple of friends as
		witnesses.

	The two are in each other's arms in a mess of bedsheets on
	the two single beds that they have pushed together.

				KAY
		What will your father say?

				MICHAEL
		As long as I tell him beforehand he
		won't object.  He'll be hurt, but
		he won't object.

				KAY
		What time do they expect us?

				MICHAEL
		For dinner.  Unless I call and tell
		them we're still in New Hampshire.

				KAY
		Michael.

				MICHAEL
		Then we can have dinner, see a
		show, and spend one more night.

	He moves to the telephone.

				MICHAEL (CONT'D.)
		Operator.  Get me
			  (fill in number)


				KAY
		Michael, what are you doing?

				MICHAEL
		Shhh, you be the long distance
		operator.  Here.

				KAY
		Hello...this is Long Distance.  I
		have a call from New Hampshire.  Mr.
		Michael Corleone.  One moment please.

	She hands the phone to MICHAEL who continues the deception.

				MICHAEL
		Hello, Tom?  Michael.  Yeah...
		listen, we haven't left yet.  I'm
		driving down to the city with Kay
		tomorrow morning.  There's something
		important I want to tell the old
		man before Christmas.  Will he be
		home tomorrow night?

	INT DAY: OLIVE OIL OFFICE (WINTER 1945)

	HAGEN in the Olive Oil Company office.  In the background,
	through the glass partitions, we can see the DON, at work in
	his office.  TOM is tired, and steeped in paperwork.

				HAGEN (O.S.)
		Sure.  Anything I can do for you.

				MICHAEL (O.S.)
		No.  I guess I'll see you Christmas.
		Everyone's going to be out at Long
		Beach, right?

				HAGEN
		Right.

	He smiles.  MICHAEL has hung up.  He looks at the piles of
	work, and can't face it.  He rises, puts on his coat and
	hat, and continues out.

	He peeks into the DON's office.

				HAGEN
		Michael called; he's not leaving
		New Hampshire until tomorrow
		morning.  I've got to go, I promised
		Theresa I'd pick up some toys for
		the kids.

	The DON smiles and nods.

	TOM smiles, and leaves; OUR VIEW remaining with DON CORLEONE.
	FREDDIE is sitting on a bench in the corner, reading the
	afternoon paper.  He puts aside the papers the office
	manager has prepared for him, and then moves to FREDDIE,
	raps his knuckles on his head to take his nose out of the
	paper.

				DON CORLEONE
		Tell Paulie to get the car from the
		lot; I'll be ready to go home in a
		few minutes.

				FREDO
		I'll have to get it myself; Paulie
		called in sick this morning.

				DON CORLEONE
		That's the third time this month.
		I think maybe you'd better get a
		healthier bodyguard for me.  Tell
		Tom.

				FREDO
			  (going)
		Paulie's a good kid.  If he's sick,
		he's sick.  I don't mind getting
		the car.

	FREDDIE leaves.  He slowly puts on his jacket.  Looks out
	his window.

	EXT DUSK: OLIVE OIL CO. (WINTER 1945)

	FREDDIE crosses the street.

	INT DUSK: OLIVE OIL OFFICE (WINTER 1945)

				OFFICE MANAGER
		Buon Watale, Don Corleone.

	The MANAGER helps him on with his overcoat.  Once again, the
	DON glances out his window.

	The black car pulls up; FREDDIE driving.

				DON CORLEONE
		Merry Christmas.
			  (handing the MANAGER
			  an envelope)


	And he starts down the stairs.

	EXT DUSK: OLIVE OIL CO. (WINTER 1945)

	The light outside is very cold, and beginning to fail.  When
	FREDDIE sees his FATHER coming, he moves back into the
	driver's seat.  The DON moves to the car, and is about to
	get in when he hesitates, and turns back to the long, open
	fruit stand near the corner.

	The PROPRIETOR springs to serve him.  The DON walks among
	the trays and baskets, and merely points to a particular
	piece of fruit.  As he selects, the MAN gingerly picks the
	pieces of fruit up and puts them into a paper bag.  The DON
	pays with a five dollar bill, waits for his change, and then
	turns back to the car.

	EXT DUSK: POLKS TOY STORE (WINTER 1945)

	TOM HAGEN exits carrying a stack of presents, all gift
	wrapped.  He continues past the windows.  As he walks,
	someone walks right in his way.  He looks up.  It is SOLLOZZO.

	He takes TOM by the arm and walks along with him.

				SOLLOZZO
			  (quietly)
		Don't be frightened.  I just want
		to talk to you.

	A car parked at the curb suddenly flings its rear door open.

				SOLLOZZO
			  (urgently)
		Get in; I want to talk to you.

	HAGEN pulls his arm free.  He is frightened.

				HAGEN
		I haven't got time.

	TWO MEN suddenly appear on either side of him.

				SOLLOZZO
		Get in the car.  If I wanted to
		kill you you'd be dead already.
		Trust me.

	HAGEN, sick to his stomach, moves with his ESCORTS, leaving
	our VIEW on the Mechanical windows gaily bobbing the story
	of Hansel and Gretel.  We HEAR the car doors shut, and the
	car drive off.

	EXT NIGHT: RADIO CITY - PHONE BOOTH (WINTER 1945)

	RADIO CITY MUSIC HALL during the Christmas show.  KAY and
	MICHAEL exit; tears are still streaming down her cheeks, and
	she sniffles, and dries her tears with Kleenex.  KAY
	nostalgically hums "The Bells of Saint Mary's," as they walk
	arm in arm.

				KAY
		Would you like me better if I were
		a nun?

				MICHAEL
		No.

				KAY
		Would you like me better if I were
		Ingrid Bergman?

	They have passed a little enclosed newsstand.  KAY sees
	something that terrifies her.  She doesn't know what to do.
	MICHAEL still walks, thinking about her question.

				KAY
			  (a little voice)
		Michael?

				MICHAEL
		I'm thinking about it.

				KAY
		Michael...

				MICHAEL
		No, I would not like you better if
		you were Ingrid Bergman.

	She cannot answer him.  Rather she pulls him by the arm,
	back to the newsstand, and points.  His face goes grave.

	The headlines read: "VITO CORLEONE SHOT, CHIEFTAN GUNNED
	DOWN."

	MICHAEL is petrified; quickly he takes each edition, drops a
	dollar in the tray, and hungrily reads through them.  KAY
	knows to remain silent.

				MICHAEL
			  (desperately)
		They don't say if he's dead or alive.

	EXT DUSK: OLIVE OIL CO. (WINTER 1945)

	DON CORLEONE by the fruit stand; he is about to move to the
	car, when TWO MEN step from the corner.  Suddenly, the DON
	drops the bag of fruit and darts with startling quickness
	toward the parked car.

				DON CORLEONE
		Fredo, Fredo!

	The paper bag has hit the ground, and the fruit begins
	rolling along the sidewalk, as we HEAR gunshots.

	Five bullets catch the DON in the back; he arches in pain,
	and continues toward the car.

	The PROPRIETOR of the fruit stand rushes for cover, knocking
	over an entire case of fruit.

	The TWO GUNMEN move in quickly, anxious to finish him off.

	Their feet careful to avoid the rolling fruit.  There are
	more GUNSHOTS.

	FREDDIE is hysterical; he tries to get out of the car;
	having difficulty opening the door.  He rushes out, a gun
	trembling in his hand; his mouth open.  He actually drops
	the gun.

	The gun falls amid the rolling fruit.

	The GUNMEN are panicked.  They fire once more at the downed
	DON CORLEONE.  His leg and arm twitch where they are hit;
	and pools of blood are beginning to form.

	The GUNMEN are obviously in a state of panic and confusion;
	they disappear around the corner as quickly as they came.

	The PEOPLE about the avenue have all but disappeared:
	rather, we catch glimpses of them, poking their heads safely
	from around corners, inside doorways and arches, and from
	windows.  But the street itself is now empty.

	FREDDIE is in shock; he looks at his FATHER; now great
	puddles of blood have formed, and the DON is lifeless and
	face down in them.

	FREDDIE falls back on to the curb and sits there, saying
	something we cannot understand.  He begins to weep profusely.

	INT NIGHT: SUBWAY (WINTER 1945)

	LUCA BRASI riding alone on a subway car, late at night.  He
	gets off.

	He emerges at a subway terminal, proceeds out.

	EXT NITE: NIGHT CLUB STREET (WINTER 1945)

	LUCA walks down the late night street.  He approaches an
	elegant New York Nightclub, whose gaudy neon sign is still
	winking this late at night.  He waits and watches.  Then the
	sign goes out; and he proceeds into the club.

	INT NITE: NIGHTCLUB (WINTER 1945)

	The main floor of the Nightclub is very large, with endless
	glistening wooden floors.  Now, at this late time, the
	chairs have been stacked on the tables and a NEGRO JANITOR
	is waxing them.  A single HAT-CHECK GIRL is counting her
	receipts.  LUCA moves past the empty bandstand, and sits at
	the bar.  ANOTHER MAN, dark and very well-built, moves
	behind the bar.

				MAN
		Luca...I'm Bruno Tattaglia.

				LUCA
		I know.

	LUCA looks up; and out of the shadows emerges SOLLOZZO.

				SOLLOZZO
		Do you know who I am?

	LUCA Nods.

				SOLLOZZO
		You've been talking to the
		Tattaglias.  They thought we could
		do business.

	LUCA listens.

				SOLLOZZO
		I need somebody strong to protect
		my operation, physically.  I've
		heard you're not happy with your
		family, you might make a switch.

				LUCA
		If the money is good enough.

				SOLLOZZO
		On the first shipment, I can
		guarantee you fifty thousand dollars.

	LUCA looks at him; he had no idea the offer would be so good.

	SOLLOZZO extends his hand, but LUCA pretends not to see it,
	rather, he busies himself putting a cigarette in his mouth.
	BRUNO TATTAGLIA, behind the bar, makes a cigarette lighter
	magically appear, and holds it to LUCA's cigarette.  Then,
	he does an odd thing; he drops the lighter on the bar, and
	puts his hand lightly on LUCA's, almost patting it.

	INT NITE: SONNY'S LIVING ROOM (WINTER 1945)

	The telephone in SONNY's house is ringing.  He approaches
	it, obviously fresh from a nap.

				SONNY
		Yeah.

				VOICE (O.S.)
		Do you recognize my voice?

				SONNY
		I think so.  Detective squad?

				VOICE (O.S.)
		Right.  Don't say my name, just
		listen.  Somebody shot your father
		outside his place fifteen minutes
		ago.

				SONNY
		Is he alive?

				VOICE (O.S.)
		I think so, but I can't get close
		enough.  There's a lot of blood.
		I'll try to find out more.

				SONNY
		Find out anything you can...you got
		a Grand coming.
			  (click)


	SONNY cradles the phone.  An incredible rage builds up in
	him, his face actually turning red.  He would like to rip
	the phone to pieces in his bare hands.  Then he controls it.
	Quickly, he dials another number.

				SONNY
		Theresa, let me talk to Tom.  Not
		yet?  Have him call me as soon as
		he gets home.

	He hangs up.

				SANDRA (O.S.)
		Sonny?  Sonny, who is it?
			  (she enters the room)
		What is it?

				SONNY
			  (calmly)
		They shot the old man.

				SANDRA
		Oh God...

				SONNY
		Honey...don't worry.  Nothing else
		is going to happen.

	There is a POUNDING on the door.  A BABY starts crying.

				SANDRA
			  (really frightened)
		SONNY?

	SONNY reaches into a cabinet drawer, takes out a gun, and
	moves quickly.  He opens the front door quickly.  It is
	CLEMENZA.  He enters, SONNY closes the door.  SANDRA goes to
	look after the baby.

				CLEMENZA
			  (excited)
		You heard about your father?

				SONNY
		Yeah.

				CLEMENZA
		The word is out in the streets that
		he's dead.

				SONNY
		Where the hell was Paulie, why
		wasn't he with the Don?

				CLEMENZA
		Paulie's been a little sick all
		winter...he was home.

				SONNY
		How many times did he stay home the
		last couple of months?

				CLEMENZA
		Maybe three, four times.  I always
		asked Freddie if he wanted another
		bodyguard, but he said no.  Things
		have been so smooth the last ten
		years...

				SONNY
		Go get Paulie, I don't care how
		sick he is.  Pick him up yourself,
		and bring him to my father's house.

				CLEMENZA
		That's all?  Don't you want me to
		send some people over here?

				SONNY
		No, just you and Paulie.

	CLEMENZA leaves; SONNY moves to SANDRA, who sits on the
	couch weeping quietly, comforting her BABY.

				SONNY
		A couple of our people will come to
		stay here.  Do whatever they say;
		I'm going over to the main house.
		If you want me, use Pop's special
		phone.

	The telephone rings again.  SONNY answers it.

				SONNY
		Hello.

				SOLLOZZO (O.S.)
		Santino Corleone?

	SANDRA moves behind him, anxious to know who it is.  SONNY
	indicates that she be quiet.

				SONNY
		Yeah.

				SOLLOZZO (O.S.)
		We have Tom Hagen.  In about three
		hours he'll be released with our
		proposition.  Don't do anything
		until you've heard what he has to
		say.  You can only cause a lot of
		trouble.  What's done is done.
			  (a pause)
		Don't lose that famous temper of
		yours.

				SONNY
			  (quietly)
		I'll wait.

	EXT NITE: MALL (WINTER 1945)

	FULL VIEW OF THE CORLEONE MALL.  It is night, but the
	courtyard is bathed with white light from floodlights on the
	tops of all the houses.  It is very cold.  We see the figure
	of SONNY cross the Mall, and let himself into the main house.

	INT NITE: DON'S KITCHEN (WINTER 1945)

	SONNY walks into the empty, darkened house.  Then he calls
	out.

				SONNY
		Ma?  Ma, where are you.

	The kitchen door swings open.  He moves quickly and takes
	her by the arm.  He is deliberately calm.

				SONNY
		Ma, I just got a call.  Pop's
		hurt...I don't know how bad.

				MAMA
			  (quietly)
		Santino?  Have they killed him?

				SONNY
			  (almost in tears)
		We don't know yet, Ma.

				MAMA
		I'll get dressed.  In case we can
		see him...

	She moves out of the kitchen, and continues upstairs.  SONNY
	turns the gas from the pan of peppers she was frying.  He
	takes some bread without thinking, and dips it in the oil,
	and sloppily eats some of the peppers, as he moves into his
	father's office.

	INT NITE: DON'S OFFICE (WINTER 1945)

	He switches the lights on in the DON's office.  The massive
	desk dominates the room.  SONNY moves quickly to the
	telephone, pulling a small chair to the side of the desk,
	and dials a number.

				SONNY
		Tessio...This is Santino Corleone.
		I want fifty reliable men out here.

				TESSIO (O.S.)
		I heard, Sonny...but what about
		Clemenza's regime?

				SONNY
		I don't want to use Clemenza's
		people right now.  Understood?

	He hangs up.  He moves quickly to a wall safe; operates the
	dial, and removes a small notebook.  He takes it back to the
	desk, and runs over the list of numbers with his forefinger.
	We follow the names, until the finger stops at one: LUCA
	BRASI.  SONNY dials the number.  There is no answer.

				SONNY
		Luca.

	INT NITE: BUILDING (WINTER 1945)

	The interior of an abandoned building.  SEVERAL MEN in suits
	and ties sit around in the booths.

	HAGEN sits in one: SOLLOZZO sits across from him.

				SOLLOZZO
		I know you're not in the muscle end
		of the family--so I don't want you
		to be afraid.  I want you to help
		the Corleones and I want you to
		help me.

	HAGEN's hands are trembling as he tries to put a cigarette
	in his mouth.  ONE of the BUTTON MEN brings a bottle of rye
	to the table, and pours a little into a delicate, flowered
	china cup.  HAGEN sips gratefully.

				SOLLOZZO
		Your boss is dead...

	HAGEN is overwhelmed: actual tears spring to his eyes.
	SOLLOZZO pauses respectfully.

				SOLLOZZO
			  (pushing the bottle)
		Have some more.  We got him outside
		his office, just before I picked
		you up.  You have to make the peace
		between me and Santino.

	HAGEN still is focused on the grief of losing the old man.

				SOLLOZZO
		Sonny was hot for my deal, right?
		You know it's the smart thing to
		do, too.  I want you to talk Sonny
		into it.

				HAGEN
			  (pulling himself together)
		Sonny will come after you with
		everything he's got.

	SOLLOZZO rises, impatiently.

				SOLLOZZO
		That's going to be his first
		reaction.  You have to talk some
		sense into him.  The Tattaglia
		family stands behind me with all
		their people.  The other New York
		Families will go along with anything
		that prevents a full scale war.

	He leans close to HAGEN.

				SOLLOZZO
		The Don was slipping; in the old
		days I could never have gotten to
		him.  Now he's dead, nothing can
		bring him back.  Talk to Sonny,
		talk to the Caporegimes, Clemenza
		and Tessio...it's good business.

				HAGEN
		Even Sonny won't be able to call
		off Luca Brasi.

				SOLLOZZO
		I'll worry about Luca.  You take
		care of Sonny and the other two kids.

				HAGEN
		I'll try...It's what the Don would
		want us to do.

				SOLLOZZO
			  (lifting his hands in
			  an expression of harmlessness)
		Good...then you can go...
			  (he escorts him to
			  the door)
		I don't like violence.  I'm a
		businessman, and blood is a big
		expense.

	He opens the door; they step out together.

	EXT NITE: BUILDING

	HAGEN, SOLLOZZO exit.

	But a car pulls up, and ONE of SOLLOZZO'S MEN rushes out.
	He indicates with some urgency that he wants to talk to
	SOLLOZZO in private.

	Then SOLLOZZO moves with a grave expression.  He opens the
	door, indicating that HAGEN should be led back in.

				SOLLOZZO
		The old man is still alive.  Five
		bullets in his Sicilian hide and
		he's still alive.
			  (he gives a fatalistic
			  shrug)
		Bad luck for me, bad luck for you.

	EXT NITE: MALL (WINTER 1945)

	MICHAEL driving during the night.  There is a little fog in
	the air, and moisture has formed on the windshield, making
	it difficult to see well.  The wipers move across the view,
	as the gate of the Corleone Mall appears before us, still
	decorated for Christmas.  The courtyard is bathed with white
	floodlight, giving this place a cold and isolated look.  The
	narrow entrance mouth of the Mall is sealed off with a link
	chain.  There are strange cars parked along the curving
	cement walk.  SEVERAL MEN are congregated about the gate and
	chain; ONE of them approaches MICHAEL's car.

				MAN
		Who're you?

	ANOTHER peeks his ugly face almost right up to MICHAEL, and
	then turns.

				MAN 2
		It's the Don's kid; take the car,
		I'll bring him inside.

	The FIRST MAN opens the car door, and MICHAEL steps out.

	INT NITE: HALL (WINTER 1945)

	The Hallway of the main house is filled with MEN MICHAEL
	doesn't recognize.  They pay little attention to him.  Most
	of them are waiting; sitting uncomfortably; no one is talking.

	INT NITE: DON'S LIVING ROOM (WINTER 1945)

	MICHAEL moves into the living room; there is a Christmas
	tree, and countless greeting cards taped to the walls.

	THERESA HAGEN is sitting stiffly on the sofa, smoking a
	cigarette; on the coffee table in front of her is a water
	glass half filled with whiskey.  On the other side of the
	sofa sits CLEMENZA; his face is impassive, but he is
	sweating, and the cigar in his hand glistens slickly black
	with his saliva.  PAULIE GATTO sits tensely and alone on the
	other side of the room.  CLEMENZA sees MICHAEL, looks up at
	him.

				CLEMENZA
		Your mother's at the hospital with
		the old man: He's gonna pull through.

	MICHAEL nods his relief.

				MICHAEL
		Thanks.

	He moves to THERESA.

				MICHAEL
			  (gently)
		You heard from Tom yet?

	Without looking up, she clings to him for a moment, and
	trembles.  Occasionally, STRANGE MEN will cross through the
	room; everyone speaks in a whisper.

				MICHAEL
			  (taking her hand)
		C'mon.

	He leads her into his father's office without knocking.

	INT NITE: DON'S OFFICE (WINTER 1945)

	SONNY and TESSIO are huddled around a yellow pad.  They look
	up, startled.

				SONNY
		Don't worry, Theresa; they just
		want to give Tom the proposition,
		then they're going to turn him loose.

	He reassuringly hugs THERESA, and then to MICHAEL's surprise,
	he kisses him on the cheek.

				SONNY
		I was worried when we couldn't get
		in touch with you in that hick town.

				MICHAEL
		How's Mom?

				SONNY
		Good.  She's been through it before.
		Me too.  You were too young to know
		about it.  You better wait outside;
		there're some things you shouldn't
		hear.

				MICHAEL
		I can help you out...

				SONNY
		Oh no you can't, the old man'd be
		sore as hell if I let you get mixed
		up in this.

				MICHAEL
		Jesus Christ, he's my father, Sonny.

				SONNY
		Theresa.

	She understands, and leaves them alone.

				SONNY
		All right, Mikey...who do we have
		to hit, Clemenza or Paulie?

				MICHAEL
		What?

				SONNY
		One of them fingered the old man.

	MICHAEL didn't realize that the men waiting outside were on
	trial for their lives.

				MICHAEL
		Clemenza?  No, I don't believe it.

				SONNY
		You're right, kid, Clemenza is okay.
		It was Paulie.

				MICHAEL
		How can you be sure?

				SONNY
		On the three days Paulie was sick
		this month, he got calls from a
		payphone across from the old man's
		building.  We got people in the
		phone company.
			  (he shrugs)
		Thank God it was Paulie...we'll
		need Clemenza bad.

	MICHAEL is just realizing the gravity and extent of the
	situation.

				MICHAEL
		Is it going to be all-out war, like
		last time?

				SONNY
		Until the old man tells me different.

				MICHAEL
		Then wait, Sonny.  Talk to Pop.

				SONNY
		Sollozzo is a dead man, I don't
		care what it costs.  I don't care
		if we have to fight all the five
		families in New York.  The Tattaglia
		family's going to eat dirt.  I
		don't care if we all go down
		together.

				MICHAEL
			  (softly)
		That's not how Pop would have
		played it.

				SONNY
		I know I'm not the man he was.  But
		I'll tell you this and he'll tell
		you too.  When it comes to real
		action, I can operate as good as
		anybody short range.

				MICHAEL
			  (calmly)
		All right, Sonny.  All right.

				SONNY
		Christ, if I could only contact Luca.

				MICHAEL
		Is it like they say?  Is he that
		good?

	Outside, we HEAR THERESA cry out, almost a scream of relief.
	Then open the door and rush out.

	Everyone is standing: in the doorway, TOM HAGEN is wrapped
	in a tight embrace with his WIFE.

				HAGEN
		If I plead before the Supreme
		Court, I'll never do better than I
		did tonight with that Turk.

	EXT NITE: MALL, FEATURING DON'S HOUSE (WINTER 1945)

	The windows of the main house are dark except for the DON's
	study.  It stands out against the cold, dark night.

	INT NITE: DON'S LIVING ROOM (WINTER 1945)

	The living room is empty, save for PAULIE GATTO sitting on
	the edge of the sofa.  The clock reads: 4:00 a.m.

	INT NITE: DON'S OFFICE (WINTER 1945)

	SONNY, MICHAEL, HAGEN, CLEMENZA and TESSIO; all exhausted,
	in shirtsleeves, about to fall asleep.  It is four in the
	morning; there is evidence of many cups of coffee and many
	snacks.  They can barely talk anymore.

				HAGEN
		Is the hospital covered?

				SONNY
		The cops have it locked in and I
		got my people there visiting Pop
		all the time.  What about the hit
		list.

	HAGEN widens his sleepy eyes, and looks at the yellow pad.

				HAGEN
		Too much, too far, too personal.
		The Don would consider this all
		purely a business dispute: Get rid
		of Sollozzo, and everything falls
		in line.  YOU don't have to go
		after the Tattaglias.

	CLEMENZA nods.

				HAGEN
		What about Luca?  Sollozzo didn't
		seem worried about Luca.  That
		worries me.

				SONNY
		If Luca sold out we're in real
		trouble.

				HAGEN
		Has anyone been able to get in
		touch with him?

				SONNY
		No, and I've been calling all night.
		Maybe he's shacked up.

				HAGEN
		Luca never sleeps over with a broad.
		He always goes home when he's
		through.  Mike, keep ringing Luca's
		number.

	MICHAEL, very tired, picks up the phone, and dials the
	number once again.  He can hear the phone ringing on the
	other end but no one answers.  Then hangs up.

				HAGEN
		Keep trying every fifteen minutes.
			  (exhausted)


				SONNY
		Tom, you're the Consigliere, what
		do we do if the old man dies?

				HAGEN
		Without your father's political
		contacts and personal influence,
		the Corleone family loses half its
		strength.  Without your father, the
		other New York families might wind
		up supporting Sollozzo, and the
		Tattaglias just to make sure there
		isn't a long destructive war.  The
		old days are over, this is 1946;
		nobody wants bloodshed anymore.  If
		your father dies...make the deal,
		Sonny.

				SONNY
			  (angry)
		That's easy to say; it's not your
		father.

				HAGEN
			  (quietly)
		I was as good a son to him as you
		or Mike.

				SONNY
		Oh Christ Tom, I didn't mean it
		that way.

				HAGEN
		We're all tired...

				SONNY
		OK, we sit tight until the old man
		can give us the lead.  But Tom, I
		want you to stay inside the Mall.
		You too, Mike, no chances.  Tessio,
		you hold your people in reserve,
		but have them nosing around the
		city.  The hospital is yours; I
		want it tight, fool-proof, 24 hours
		a day.

	There is a timid knock on the door.

				SONNY
		What is it?

	PAULIE GATTO looks in.

				CLEMENZA
		I tol' you to stay put, Paulie...

				PAULIE
		The guy at the gate's outside...says
		there's a package...

				SONNY
		Tessio, see what it is.

	TESSIO gets up, leaves.

				PAULIE
		You want me to hang around?

				SONNY
		Yeah.  Hang around.

				PAULIE
		Outside?

				CLEMENZA
		Outside.

				PAULIE
		Sure.

	He closes the door.

				SONNY
		Clemenza.  You take care of Paulie.
		I don't ever want to see him again.
		Understood?

				CLEMENZA
		Understood.

				SONNY
		Okay, now you can move your men
		into the Mall, replace Tessio's
		people.  Mike, tomorrow you take a
		couple of Clemenza's people and go
		to Luca's apartment and wait for
		him to show.  That crazy bastard
		might be going after Sollozzo right
		now if he's heard the news.

				HAGEN
		Maybe Mike shouldn't get mixed up
		in this so directly.  You know the
		old man doesn't want that.

				SONNY
		OK forget it, just stay on the phone.

	MICHAEL is embarrassed to be so protected.  He dials Luca
	Brasi's number once again.  The ring repeats, but no one
	answers.

	TESSIO comes back, carrying Luca Brasi's bullet-proof vest
	in his hand.  He unwraps it; there is a large fish wrapped
	inside.

				CLEMENZA
		A Sicilian message: Luca Brasi
		sleeps with the fishes.

	INT. NITE: NIGHTCLUB (WINTER 1945)

	LUCA sits at the Bar of the Tattaglia Nightclub, as we
	remember him.  BRUNO TATTAGLIA had just patted his hand.
	LUCA looks up at him.

	Then SOLLOZZO pats the other hand, almost affectionately.
	LUCA is just about to twist his hands away, when they both
	clamp down as hard as they can.  Suddenly, a garrote is
	thrown around his neck, and pulled violently tight.  His
	face begins to turn to purple blotches, and then totally
	purple, right before our eyes; his tongue hangs out, in a
	far more extreme way than a normal tongue could.  His eyes
	bulge.

	ONE of the MEN looks down at him in disgust as LUCA's
	strength leaves him.

				BRUNO
			  (making an ugly face)
		Oh Christ...all over the floor.

	SOLLOZZO lets LUCA's hand go with a victorious smile on his
	face.

	LUCA falls to the floor.

				SOLLOZZO
		The Godfather is next.

	----------------------------------------FADE OUT--------

	FADE IN:

	EXT DAY: CLEMENZA'S HOUSE (WINTER 1945)

	Morning in a simple Brooklyn suburb.  There are rows of
	pleasant houses; driveway after driveway, down the block.  A
	dark, somber young man of thirty-one or two walks with a
	noticeable limp down the sidewalk, and rings the bell.  This
	is ROCCO LAMPONE.  The woman of the house, MRS. CLEMENZA,
	talks to him through the screen door, and then points to the
	side of the house.  ROCCO moves to the garage, which is
	specially heated, and in which CLEMENZA is busy at work
	washing a shiny brand new Lincoln.  LAMPONE admires the car.

				LAMPONE
		Nice.

				CLEMENZA
		Crazy Detroit delivered it with a
		wooden bumper.  They're going to
		send me the chrome bumpers in a
		couple months.  I waited two years
		for this car to come with wooden
		bumpers!

	He scrubs and polishes with great affection.

				CLEMENZA
		Today you make your bones on Paulie.
		You understand everything?

				LAMPONE
		Sure.

	As he scrubs around the glove compartment, he opens it,
	unwraps a gun and gives it to LAMPONE.

				CLEMENZA
		.22 soft-nosed load.  Accurate up
		to five feet.

	LAMPONE expertly puts the gun away.  GATTO's car pulls into
	the driveway, and he sounds the horn.

	The two men walk to the car.  GATTO is driving, a bit
	nervous, like he doesn't know what is up.  LAMPONE gets in
	the rear seat; CLEMENZA in the front, making a grunt of
	recognition.  He looks at his wristwatch, as though wanting
	to chide PAULIE for being late.  PAULIE flinches a little
	when he sees LAMPONE will ride behind him; he half turns:

				PAULIE
		Rocco, sit on the other side.  A
		big guy like you blocks my rearview
		mirror.

	CLEMENZA turns sourly to PAULIE.

				CLEMENZA
		Goddamn Sonny.  He's running scared.
		He's already thinking of going to
		the mattresses.  We have to find a
		place on the West Side.  Paulie,
		you know a good location?

	PAULIE relaxes a bit; he thinks he's off any possible hook
	he was on.  Also there's the money he can make by selling
	Sollozzo any secret location.

				PAULIE
		I'll think about it.

				CLEMENZA
			  (grunting)
		Drive while you thinking; I wanna
		get to the City this month!

	The car pulls out.

	EXT DAY: PAULIE'S CAR - ON ROAD (WINTER 1945)

	Inside PAULIE drives; and CLEMENZA sits in a grump.  OUR
	VIEW does not show LAMPONE in the rear seat.

	EXT DAY: PAULIE'S CAR AT TUNNEL (WINTER 1945)

	The Car crosses to the Midtown Tunnel in the late Winter
	light.

	INT DAY: PAULIE'S CAR IN TUNNEL (WINTER 1945)

	Inside the tunnel; GATTO doesn't like not seeing LAMPONE.
	He tries to adjust his rearview mirror to catch a glimpse of
	him.

				CLEMENZA
		Pay attention!

	EXT DAY: PAULIE'S CAR AT MATTRESS (WINTER 1945)

	The car is parked in the City.  PAULIE comes down from an
	available apartment and gets back into the car.

				PAULIE
		Good for ten men...

				CLEMENZA
		OK, go to Arthur Avenue; I'm
		suppose to call when I found
		somethin'.

	The car pulls off.

	EXT DAY: RESTAURANT (WINTER 1945)

	New part of the city; the car pulls up in a parking lot.
	CLEMENZA get outs, glances at LAMPONE, then to PAULIE.

				CLEMENZA
		You wait; I'll call.

	He walks, tucking his shirt into his pants, around the
	corner and enters the Luna Restaurant.

	INT DAY: RESTAURANT (WINTER 1945)

	CLEMENZA enters the little restaurant, sits down at a table.
	The WAITERS know him; immediately put a bottle of wine, some
	bread--and then a plate of veal on his table.  He eats.

	EXT DAY: RESTAURANT (WINTER 1945)

	CLEMENZA exits the restaurant, belches, adjusts his pants;
	he is well fed.

	We move with him around the corner, not knowing what to
	expect has happened to Paulie.

	There is the car; PAULIE is still sitting behind the wheel,
	LAMPONE in the rear seat.  CLEMENZA steps in.

				CLEMENZA
		He talked my ear off.  Want us to
		go back to Long Beach; have another
		job for us.  Rocco, you live in the
		City, can we drop you off?

				LAMPONE (O.S.)
		Ah, I left my car at your place.

				CLEMENZA
		OK, then you gotta come back.

	The car pulls out.  By now, PAULIE is completely relaxed and
	secure.

				PAULIE
		You think we'll go for that last
		place?

				CLEMENZA
		Maybe, or you gotta know now.

				PAULIE
		Holy cow, I don't gotta know nothing.

	EXT DAY: PAULIE'S CAR ON CAUSEWAY (WINTER 1945)

	The car moves along the ready beach area of the causeway.
	Inside, CLEMENZA turns to PAULIE.

				CLEMENZA
		Paulie, pull over.  I gotta take a
		leak.

	The car pulls off the Causeway, into the reeds.  CLEMENZA
	steps out of the car, OUR VIEW MOVING with him.

	He turns his back three quarters from us (we can no longer
	see the car), unzips, and we hear the sound of urine hitting
	the ground.  We wait on this for a moment; and then there
	are two GUNSHOTS.  CLEMENZA finishes his leak, zips up and
	turns, moving back to the car.

	PAULIE is dead, bleeding from the mouth; the windows behind
	him are shattered.

				CLEMENZA
		Leave the gun.

	LAMPONE gets out, the two men walk through the reeds a few
	feet where there is another car.  They get in, and drive off.

	---------------------------------------FADE OUT---------

	EXT DAY: MALL (WINTER 1945)

	HIGH ANGLE OF THE MALL.  It is late afternoon.  Many strange
	cars are parked on the nearby streets.  We can see the group
	of BUTTON MEN, stationed here and there, obviously sentries
	with concealed weapons.

	MICHAEL walks along in the rear yard.

	He is bundled in a warm marine coat.  He looks at the
	strange men, regarding them with an uncertain awe.  They
	look back at him, at first suspiciously and then with the
	respect of his position.  He is like an exile Prince.  He
	wanders past them, and hesitates and looks at the yard.

	A rusted set of garden swings; and other home playground
	equipment.  The basketball ring now half coming off.  This
	is where he was a child.  Then a shout.

				CLEMENZA (O.S.)
		Mike.  Hey Mikey; telephone.

	CLEMENZA had shouted from the kitchen window.  MICHAEL
	hurries into the house.

	INT DAY: DON'S KITCHEN (WINTER 1945)

	CLEMENZA is in the kitchen, cooking over an enormous pot.
	He points to the kitchen wall phone which is hanging off the
	hook.

				CLEMENZA
		Some dame.

	MICHAEL picks it up.

				MICHAEL
		Hello.  Kay?

				KAY (O.S.)
		How is your father?

				MICHAEL
		He'll be OK.

				KAY (O.S.)
			  (pause)
		I love you.

	He glances at the THUGS in the kitchen.  Tries to shield the
	phone.

				KAY (O.S.)
		I LOVE YOU.

				MICHAEL
		Yeah Kay, I'm here.

				KAY (O.S.)
		Can you say it?

				MICHAEL
		Huh?

				KAY (O.S.)
		Tell me you love me.

	MICHAEL glances at the HOODS at the kitchen table.  He curls
	up in a corner, and in a quarter voice:

				MICHAEL
		I can't...

				KAY (O.S.)
		Please say it.

				MICHAEL
		Look.  I'll see you tonight, OK?

				KAY (O.S.)
		OK.
			  (click)


	CLEMENZA is getting ready to build a tomato sauce for all
	the button men stationed around the house.

				CLEMENZA
		How come you don't tell that nice
		girl you love her...here, learn
		something... you may have to feed
		fifty guys some day.  You start
		with olive oil...fry some garlic,
		see.  And then fry some sausage...or meat
		balls if you like...then you throw
		in the tomatoes, the tomato
		paste...some basil; and a little
		red wine...that's my trick.

	SONNY peeks into the kitchen; sees CLEMENZA.

				SONNY
		You take care of Paulie?

				CLEMENZA
		You won't see Paulie anymore.  He's
		sick for good this winter.

	MICHAEL starts to leave.

				SONNY
		Where are you going?

				MICHAEL
		To the city.

				SONNY
			  (to Clemenza; dipping
			  bread into the sauce)
		Send some bodyguards.

				MICHAEL
		I don't need them, Sonny.  I'm just
		going to see Pop in the hospital.
		Also, I got other things.

				CLEMENZA
		Sollozzo knows Mike's a civilian.

				SONNY
		OK, but be careful.

	EXT NITE: CAR

	MICHAEL sits in the rear seat, calmly, as he is being driven
	into the city.  THREE BUTTONMEN are crowded into the front
	seat.

	INT NITE: HOTEL LOBBY

	MICHAEL crosses the lobby, past lines of servicemen trying
	to book rooms.

	INT NITE: HOTEL

	MICHAEL and KAY eating a quiet dinner at the hotel.  He is
	preoccupied, she's concerned.

				MICHAEL
		Visiting hour ends at eight thirty.
		I'll just sit with him; I want to
		show respect.

				KAY
		Can I go to the hospital with you?

				MICHAEL
		I don't think so.  You don't want
		to end up on page 3 of the Daily
		News.

				KAY
		My parents don't read the Daily
		News.  All right, if you think I
		shouldn't.  I can't believe the
		things the papers are printing.
		I'm sure most of it's not true.

				MICHAEL
		I don't think so either.
			  (silence)
		I better go.

				KAY
		When will I see you again?

				MICHAEL
		I want you to go back to New
		Hampshire...think things over.

	He leans over her; kisses her.

				KAY
		When will I see you again?

				MICHAEL
		Goodbye.

	Quietly, he moves out the door.

	KAY lies on the bed a while, and then, to herself:

				KAY
		Goodbye.

	EXT NITE: DON'S HOSPITAL (WINTER 1945)

	A taxi pulls up in front of a hospital, marked clearly with
	a neon sign "HOSPITAL--EMERGENCY."  MICHAEL steps out, pays
	the fare...and then stops dead in his tracks.

	MICHAEL looks.

	He sees the hospital in the night; but it is deserted.  He
	is the only one on the street.  There are gay, twinkling
	Christmas decorations all over the building.  He walks,
	slowly at first, and then ever so quickly, up the steps.  He
	hesitates, looks around.  This area is empty.  He checks the
	address on a scrap of paper.  It is correct.  He tries the
	door, it is empty.

	He walks in.

	INT NITE: HOSPITAL LOBBY (WINTER 1945)

	MICHAEL stands in the center of an absolutely empty hospital
	lobby.  He looks to the right; there is a long, empty
	corridor.  To the left: the same.

	HIGH FULL ANGLE, as MICHAEL walks through the desolated
	building lit by eerie green neon lighting.  All we hear are
	his sole footsteps.

	He walks up to a desk marked "INFORMATION".  No one is there.
	He moves quickly to a door marked "OFFICE"; swings into it;
	no one is there.  He looks onto the desk:  There is half a
	sandwich, and a half-filled bottle of coke.

				MICHAEL
		Hello?  Hello?

	Now he knows something is happening, he moves quickly,
	alertly.  MICHAEL walking down the hospital corridors; all
	alone.  The floors have just been mopped.  They are still wet.

	INT NITE: HOSPITAL STAIRS

	Now he turns onto a staircase; ever quickening; up several
	flights.

	INT NITE: 4TH FLOOR CORRIDOR

	He steps out onto the fourth floor.  He looks.  There are
	merely empty corridors.  He takes out his scrap of paper;
	checks it.  "Room 4A."  Now he hurries, trying to follow the
	code of hospital rooms; following the right arrows, quicker
	and quicker they flash by him.  Now he stops, looks up "4A--
	Corleone".

	There is a special card table set up there with some
	magazines...and some smoking cigarettes still in the
	ashtray--but no detectives, no police, no bodyguards.

	INT NITE: DON'S ROOM 4A

	Slowly he pushes the door open, almost afraid at what he
	will find.  He looks.  Lit by the moonlight through the
	window, he can see a FIGURE in the hospital bed alone in the
	room, and under a transparent oxygen tent.  All that can be
	heard is the steady though strained breathing.  Slowly
	MICHAEL walks up to it, and is relieved to see his FATHER,
	securely asleep.  Tubes hang from a steel gallows beside the
	bed, and run to his nose and mouth.

				VOICE (O.S.)
		What are you doing here?

	This startles MICHAEL; who almost jumps around.  It is a
	NURSE lit from the light behind her in the hallway.

				NURSE
		You're not supposed to be here now.

	MICHAEL calms himself, and moves to her.

				MICHAEL
		I'm Michael Corleone--this is my
		father.  What happened to the
		detectives who were guarding him?

				NURSE
		Oh your father just had too many
		visitors.  It interfered with the
		hospital service.  The police came
		and made them all leave just ten
		minutes ago.
			  (comfortingly)
		But don't worry.  I look in on him.

				MICHAEL
		You just stand here one minute...

	Quickly he moves to the telephone, dials a number.

				MICHAEL
		Sonny...Sonny--Jesus Christ, I'm
		down at the hospital.  I came down
		late.  There's no one here.  None
		of Tessio's people--no detectives,
		no one.  The old man is completely
		unprotected.

				SONNY (O.S.)
		All right, get him in a different
		room; lock the door from the inside.
		I'll have some men there inside of
		fifteen minutes.  Sit tight, and
		don't panic.

				MICHAEL
			  (furiously, but kept inside)
		I won't panic.

	He hangs up; returns to the NURSE...

				NURSE
		You cannot stay here...I'm sorry.

				MICHAEL
			  (coldly)
		You and I are going to move my
		father right now...to another room
		on another floor...Can you
		disconnect those tubes so we can
		wheel the bed out?

				NURSE
		Absolutely not!  We have to get
		permission from the Doctor.

				MICHAEL
		You've read about my father in the
		papers.  You've seen that no one's
		here to guard him.  Now I've just
		gotten word that men are coming to
		this hospital to kill him.  Believe
		me and help me.

				NURSE
			  (frightened)
		We don't have to disconnect them,
		we can wheel the stand with the bed.

	She does so...and they perform the very difficult task of
	moving the bed and the apparatus, out of the room.

	INT NITE: 4TH FLOOR HOSPITAL (WINTER 1945)

	They roll the bed, the stand, and all the tubes silently
	down the corridor.  We hear FOOTSTEPS coming up the stairs.
	MICHAEL hears them, stops.

				MICHAEL
		Hurry, into there.

	They push it into the first available room.  MICHAEL peeks
	out from the door.  The footsteps are louder; then they
	emerge.  It is ENZO, NAZORINE's helper, carrying a bouquet
	of flowers.

				MICHAEL
			  (stepping out)
		Who is it?

				ENZO
		Michael...do you remember me, Enzo,
		the baker's helper to Nazorine, now
		his son-in-law.

				MICHAEL
		Enzo, get out of here.  There's
		going to be trouble.

	A look of fear sweeps through ENZO's face.

				ENZO
		If there...will be trouble...I stay
		with you, to help.  I owe it to the
		Godfather.

	MICHAEL thinks, realizes he needs all the help he can get.

				MICHAEL
		Go outside; stand in front...I'll
		be out in a minute.

	INT NITE: DON'S SECOND HOSPITAL ROOM (WINTER 1945)

	They part.  MICHAEL moves into the hospital room where they
	put his FATHER.

				NURSE
			  (frightened)
		He's awake.

	MICHAEL looks at the OLD MAN, his eyes are open, though he
	cannot speak.  MICHAEL touches his face tenderly.

				MICHAEL
		Pop...Pop, it's me Michael.  Shhhh,
		don't try to speak.  There are men
		who are coming to try to kill you.
		But I'm with you...I'm with you
		now...

	The OLD MAN tries to speak...but cannot.  MICHAEL tenderly
	puts his finger to his FATHER's lips.

	EXT NITE: DON'S HOSPITAL STREET (WINTER 1945)

	Outside the hospital is empty save for a nervous ENZO,
	pacing back and forth brandishly the flowers as his only
	weapon.  MICHAEL exits the hospital and moves to him.  They
	both stand under a lamppost in the cold December night.
	They are both frightened; MICHAEL gives ENZO a cigarette,
	lights it.  ENZO's hands are trembling, MICHAEL's are not.

				MICHAEL
		Get rid of those and look like
		you've got a gun in your pocket.

	The windows of the hospital twinkle with Christmas
	decorations.

				MICHAEL
		Listen...

	We HEAR the sound of a single automobile coming.  MICHAEL
	and ENZO look with fear in their eyes.  Then MICHAEL takes
	the bouquet of flowers and stuffs them under his jacket.
	They stand, hands in their pockets.

	A long low black car turns the corner and cruises by them.
	MICHAEL's and ENZO's faces are tough, impassive.  The car
	seems as though it will stop; and then quickly accelerates.
	MICHAEL and ENZO are relieved.  MICHAEL looks down; the
	BAKER's hands are shaking.  He looks at his own, and they
	are not.

	Another moment goes by and we can hear the distant sound of
	police sirens.  They are clearly coming toward the hospital,
	getting louder and louder.  MICHAEL heaves a sigh of relief.

	In a second, a patrol car makes a screaming turn in front of
	the hospital; then two more squad cars follow with uniformed
	POLICE and DETECTIVES.  He smiles his relief and starts
	toward them.  TWO huge, burly POLICEMEN suddenly grab his
	arms while ANOTHER frisks him.  A massive POLICE CAPTAIN,
	spattered with gold braid and scrambled eggs on his hat,
	with beefy red face and white hair seems furious.  This is
	McCLUSKEY.

				MCCLUSKEY
		I thought I got all you guinea
		hoods locked up.  Who the hell are
		you and what are you doing here?

	ANOTHER COP standing nearby:

				COP
		He's clean, Captain.

	MICHAEL studies McCLUSKEY closely.

				MICHAEL
			  (quietly)
		What happened to the detectives who
		were supposed to be guarding my
		father?

				MCCLUSKEY
			  (furious)
		You punk-hood.  Who the hell are
		you to tell me my business.  I
		pulled them off.  I don't care how
		many Dago gangsters kill each other.
		I wouldn't lift a finger to keep
		your old man from getting knocked
		off.  Now get the hell out of here;
		get off this street you punk, and
		stay away from this hospital.

	MICHAEL stands quiet.

				MICHAEL
		I'll stay until you put guards
		around my father's room.

				MCCLUSKEY
		Phil, lock this punk up.

				A DETECTIVE
		The Kid's clean, Captain...He's a
		war hero, and he's never been mixed
		up in the rackets...

				MCCLUSKEY
			  (furious)
		Goddam it, I said lock him up.  Put
		the cuffs on him.

				MICHAEL
			  (deliberately, right
			  to McCLUSKEY's face,
			  as he's being handcuffed)
		How much is the Turk paying you to
		set my father up, Captain?

	Without any warning, McCLUSKEY leans back and hits MICHAEL
	squarely on the jaw with all his weight and strength.
	MICHAEL groans, and lifts his hand to his jaw.  He looks at
	McCLUSKEY; we are his VIEW and everything goes spinning, and
	he falls to the ground, just as we see HAGEN and CLEMENZA'S
	MEN arrive.

	---------------------------------------FADE OUT---------

	EXT DAY: MALL (WINTER 1945)

	HIGH ANGLE VIEW of THE CORLEONE MALL.  The gateway now has a
	long black car blocking it.  There are more BUTTON MEN
	stationed more formally; and some of them visibly carrying
	rifles; those of the houses close to the courtyard have MEN
	standing by open windows.  It is clear that the war is
	escalating.  A car pulls up and out get CLEMENZA, LAMPONE,
	MICHAEL and HAGEN.  MICHAEL's jaw is wired and bandaged.  He
	stops and looks up at the open window.  We can see MEN
	holding rifles.

				MICHAEL
		Christ, Sonny really means business.

	They continue walking.  TESSIO joins them.  The various
	BODYGUARDS make no acknowledgment.

				CLEMENZA
		How come all the new men?

				TESSIO
		We'll need them now.  After the
		hospital incident, Sonny got mad.
		We hit Bruno Tattaglia four o'clock
		this morning.

	INT DAY: DON'S HALLWAY

	They enter the house past the scores of new and strange faces.

	INT DAY: DON'S OFFICE (WINTER 1945)

	SONNY is in the DON's office; he is excited and exuberant.

				SONNY
		I've got a hundred button men on
		the streets twenty-four hours a day.
		If Sollozzo shows one hair on his
		ass he's dead.

	He sees MICHAEL, and holds his bandaged face in his hand,
	kiddingly.

				SONNY
		Mikey, you look beautiful!

				MICHAEL
		Cut it out.

				SONNY
		The Turk wants to talk!  The nerve
		of that son of a bitch!  After he
		craps out last night he wants a meet.

				HAGEN
		Was there a definite proposal?

				SONNY
		Sure, he wants us to send Mike to
		meet him to hear his proposition.
		The promise is the deal will be so
		good we can't refuse.

				HAGEN
		What about that Tattaglias?  What
		will they do about Bruno?

				SONNY
		Part of the deal: Bruno cancels out
		what they did to my father.

				HAGEN
		We should hear what they have to say.

				SONNY
		No, no Consiglere.  Not this time.
		No more meetings, no more
		discussions, no more Sollozzo
		tricks.  Give them one message: I
		WANT SOLLOZZO.  If not, it's all
		out war.  We go to the mattresses
		and we put all the button men out
		on the street.

				HAGEN
		The other families won't sit still
		for all out war.

				SONNY
		Then THEY hand me Sollozzo.

				HAGEN
		Come ON Sonny, your father wouldn't
		want to hear this.  This is not a
		personal thing, this is Business.

				SONNY
		And when they shot me father...

				HAGEN
		Yes, even the shooting of your
		father was business, not personal...

				SONNY
		No no, no more advice on how to
		patch it up Tom.  You just help me
		win.  Understood?

	HAGEN bows his head; he is deeply concerned.

				HAGEN
		I found out about this Captain
		McCluskey who broke Mike's jaw.
		He's definitely on Sollozzo's
		payroll, and for big money.
		McCluskey's agreed to be the Turk's
		bodyguard.  What you have to
		understand is that while Sollozzo
		is guarded like this, he's
		invulnerable.  Nobody has ever
		gunned down a New York Police
		Captain.  Never.  It would be
		disastrous.  All the five families
		would come after you Sonny; the
		Corleone family would be outcasts;
		even the old man's political
		protection would run for cover.  So
		just...take that into consideration.

				SONNY
			  (still fuming)
		McCluskey can't stay with the Turk
		forever.  We'll wait.

				MICHAEL
		We can't wait.  No matter what
		Sollozzo say about a deal, he's
		figuring out how to kill Pop.  You
		have to get Sollozzo now.

				SONNY
		The kid's right.

				HAGEN
		What about McCluskey?

				MICHAEL
		Let's say now that we have to kill
		McCluskey.  We'll clear that up
		through our Newspaper contacts later.

				SONNY
		Go on Mike.

				MICHAEL
		They want me to go to the conference
		with Sollozzo.  Set up the meeting
		for two days from now.  Sonny, get
		our informers to find out where the
		meeting will be held.
		Insist it has to be a public place:
		a bar or restaurant at the height
		of the dinner hour.  So I'll feel
		safe.  They'll check me when I meet
		them so I won't be able to carry a
		weapon; but Clemenza, figure out a
		way to have one planted there for
		me.
			  (pause)
		Then I'll kill them both.

	Everyone in the room is astonished; they all look at MICHAEL.
	Silence.  SONNY suddenly breaks out in laughter.  He points
	a finger at MICHAEL, trying to speak.

				SONNY
		You?  You, the high-class college
		kid.  You never wanted to get mixed
		up in the family business.  Now you
		wanta gun down a police Captain and
		the Turk just because you got
		slapped in the face.  You're taking
		it personal, it's just business and
		he's taking it personal.

	Now CLEMENZA and TESSIO are also smiling; only HAGEN keeps
	his face serious.

				MICHAEL
			  (angrily, but cold)
		Sonny, it's all personal, and I
		learned it from him, the old man,
		the Godfather.  He took my joining
		the Marines personal.  I take
		Sollozzo trying to kill my father
		personal, and you know I'll kill
		them Sonny.

	MICHAEL radiates danger...SONNY stops laughing.

	INT DAY: CLEMENZA'S CELLAR (WINTER 1945)

	CLOSE on a revolver.

				CLEMENZA (O.S.)
		It's as cold as they come,
		impossible to trace.
			  (he turns it upside down)
		Don't worry about prints Mike, I
		put a special tape on the trigger
		and butt.  Here.
			  (he hands the gun to
			  another pair of hands)
		Whatsamatter?  Trigger too tight.
			  (it fires: very LOUD)
		I left it noisy, so it'll scare any
		pain-in-the-neck innocent bystander
		away.

	MICHAEL is alone with CLEMENZA in a cellar workshop.

				CLEMENZA
		Just let your hand drop to your
		side, and let the gun slip out.
		Everybody will still think you got
		it.  They'll be starin' at your
		face, see?  Then walk out of the
		place real fast, but don't run.
		Don't look anybody directly in the
		eye, but don't look away from them
		neither.  Hey, they'll be scared
		stiff o you, believe me.  Nobody's
		gonna bother with you.  Don't worry
		about nothing; you'd be surprised
		how good these things go.  O.K.,
		put your hat on, let's see how you
		look.  Helps with identification.

	They put the hat on; CLEMENZA adjusts it.

				CLEMENZA
		Mostly it gives witnesses an excuse
		to change their identification when
		we make them see the light.  Then
		you take a long vacation and we
		catch the hell.

				MICHAEL
		How bad will it be?

				CLEMENZA
		Probably all the other families
		will line up against us.  But, it's
		alright.  These things have to
		happen once every ten years or
		so...gets rid of the bad blood.
		You gotta stop 'em at the beginning.
		Like they shoulda stopped Hitler at
		Munich, they shoulda never let him
		get away with that, they were just
		asking for big trouble...

	INT DAY: DON'S HALL & LIVING ROOM (WINTER 1945)

	MICHAEL steps into the foyer of the main house.  A card
	table is set up with a man playing cards with three of the
	Corleone buttonmen.

	He continues into the living room.  It's a mess.  SONNY
	asleep on the sofa.  On the coffee table are the remains of
	a take-out Chinese food dinner, and a half-empty bottle of
	whisky.  The radio is playing.

				MICHAEL
		Why don't you stop living like a
		bum and get this place cleaned up.

				SONNY
		What are you, inspecting the
		barracks?
			  (SONNY sits up with
			  his head in his hands)
		You ready?  Did Clemenza tell you
		be sure to drop the gun right away?

				MICHAEL
		A million times.

				SONNY
		Sollozzo and McCluskey are going to
		pick you up in an hour and a half
		on Times Square, under the big
		Camels sign.

				HAGEN
		We don't let Mike go until we have
		the hostage, Sonny.

				CLEMENZA
		It's okay...the hostage is outside
		playing pinochle with three of my
		men.

	The phone rings in the DON's office.

				SONNY
		That could be a Tattaglia informer
		with the meeting place.

	INT DAY: DON'S OFFICE (WINTER 1945)

	HAGEN has hurried into the Den to get the phone; the OTHERS
	move in.

	HAGEN's on the phone; he writes something down.

				SONNY
		One of Tattaglia's people?

				HAGEN
		No.  Our informer in McCluskey's
		precinct.  Tonight at 8:00 he
		signed out for Louis' Restaurant in
		the Bronx.  Anyone know it.

				TESSIO
		Sure, I do.  It's perfect for us.
		A small family place with big
		booths where people can talk in
		private.  Good food.  Everybody
		minds their business.  Perfect.
			  (he moves to the desk
			  and makes a crude drawing)
		This is the entrance, Mike.  When
		you finish just walk out and turn
		left, then turn the corner.
		Clemenza, you gotta work fast to
		plant the gun.  They got an old-
		fashioned toilet with a space
		between the water container and the
		wall.  We can tape the gun behind
		there.

				CLEMENZA
		Mike, they're gonna frisk you in
		the car.  You'll be clean so they
		won't worry 'bout nothing.  In the
		restaurant, wait and talk a while,
		and then ask permission to go.  See?
		Then when you come out, don't waste
		time; don't sit down...you come out
		blasting.  And don't take chances.
		In the head, two shots apiece.  And
		out as fast as your legs can move.

				SONNY
		I want somebody very good, very
		safe to plant that gun.  I don't
		want my brother coming out of that
		toilet with just his dick in his
		hand.

				CLEMENZA
		The gun will be there.

				SONNY
			  (to MICHAEL, warmly)
		You're on, kid...I'll square it
		with Mom your not seeing her before
		you left.  And I'll get a message
		to your girl friend when I think
		the time is right.

				CLEMENZA
		We gotta move...

				MICHAEL
		O.K.  How long do you think before
		I can come back?

				SONNY
		Probably a year...

				HAGEN
			  (starting to crack)
		Jesus, I don't know...

				SONNY
		Can you do it Mike?

	MICHAEL moves out.

	EXT NITE: CAMELS SIGN (WINTER 1945)

	The enormous "CAMELS" sign, puffing smoke, below it stands
	MICHAEL, dressed in a warm overcoat, and wearing the hat
	CLEMENZA had given him.  A long black car pulls around the
	corner and slows before him.  The DRIVER, leaning over, open
	the front door.

				DRIVER
		Get in, Mike.

	He does, the car drives off.

	EXT NITE: SOLLOZZO'S CAR (WINTER 1945)

	Inside the car, SOLLOZZO reaches his hand over the back seat
	and shakes MIKE's hand.

				SOLLOZZO
		I'm glad you came, Mike.  I hope we
		can straighten everything out.  All
		this is terrible, it's not the way
		I wanted things to happen at all.
		It should never have happened.

				MICHAEL
		I want to settle things tonight.  I
		want my father left alone.

				SOLLOZZO
		He won't be; I swear to you be my
		children he won't be.  Just keep an
		open mind when we talk.  I hope
		you're not a hothead like your
		brother, Sonny.  It's impossible to
		talk business with him.

	McCLUSKEY grunts.

				MCCLUSKEY
		He's a good kid.  He's all right.
		Turn around, up on your knees,
		facing me.

	He gives MICHAEL a thorough frisk.

				MCCLUSKEY
		I'm sorry about the other night
		Mike.  I'm getting too old for my
		job, too grouchy.  Can't stand the
		aggravation.  You know how it is.
		He's clean.

	EXT NITE: SOLLOZZO'S CAR - WEST SIDE HIGHWAY (WINTER 1945)

	MICHAEL looks at the DRIVER and then ahead to see where
	they're heading.

	The car takes the George Washington Bridge.  MICHAEL is
	concerned.

				MICHAEL
		We're going to New Jersey?

				SOLLOZZO
			  (sly)
		Maybe.

	MICHAEL closes his eyes.

	EXT NITE: SOLLOZZO'S CAR ON G.W. BRIDGE (WINTER 1945)

	The car speeds along the George Washington Bridge on its way
	to New Jersey.  Then suddenly it hits the divider,
	temporarily lifts into the air, and bounces over into the
	lanes going back to New York.  It then hits it very fast, on
	the way back to the city.

	EXT NITE: SOLLOZZO'S CAR (WINTER 1945)

	SOLLOZZO checks to see the cars that had been following, and
	then leans to the DRIVER.

				SOLLOZZO
		Nice work; I'll remember it.

	MICHAEL is relieved.

	EXT NITE: LUNA AZURA RESTAURANT (WINTER 1945)

	The car pulls up in front of a little family restaurant in
	the Bronx:  The "LUNA AZURA".  There is no one on the street.
	MICHAEL looks to see if the DRIVER is going to get out with
	them.  He gets out, and opens the door.  SOLLOZZO, McCLUSKEY
	and MICHAEL get out; the DRIVER remains leaning against the
	car.  They enter the restaurant.

	INT NITE: LUNA AZURA (WINTER 1945)

	A very small family restaurant with a mosaic tile floor.
	SOLLOZZO, MICHAEL and McCLUSKEY sit around a rather small
	round table near the center of the room.  There are empty
	booths along the side walls; with a handful of CUSTOMERS,
	and ONE or TWO WAITERS.  It is very quiet.

				MCCLUSKEY
		Is the Italian food good here?

				SOLLOZZO
		Try the veal; it's the finest in
		New York.

	The solitary WAITER brings a bottle of wine to the table.
	They watch him silently as he uncorks it and pours three
	glasses.  Then, when he leaves, SOLLOZZO turns to McCLUSKEY:

				SOLLOZZO
		I am going to talk Italian to Mike.

				MCCLUSKEY
		Sure, you two go right ahead; I'll
		concentrate on my veal and my
		spaghetti.

	SOLLOZZO now begins in rapid Sicilian.  MICHAEL listening
	carefully and nodding every so often.  Then MICHAEL answers
	in Sicilian, and SOLLOZZO goes on.  The WAITER occasionally
	brings food; and they hesitate while he is there; then go on.
	Then MICHAEL, having difficulty expressing himself in
	Italian, accidentally lapses into English.

				MICHAEL
			  (using English for emphasis)
		Most important...I want a sure
		guarantee that no more attempts
		will be made on my father's life.

				SOLLOZZO
		What guarantees can I give you?  I
		am the hunted one.  I've missed my
		chance.  You think too highly of
		me, my friend...I am not so
		clever...all I want if a truce...

	MICHAEL looks long and hard at SOLLOZZO, who is smiling
	holding his open hands up as if to say: "I have no tricks up
	my sleeve".  Then he looks away and makes a distressed look
	on his face.

				SOLLOZZO
		What is it?

				MICHAEL
		Is it all right if I go to the
		bathroom?

	SOLLOZZO is intuitively suspicious.  He studies MICHAEL with
	his dark eyes.  Then he thrusts his hand onto MICHAEL's
	thigh feeling in and around, searching for a weapon.

				MCCLUSKEY
		I frisked him; I've frisked
		thousands of young punks; he's clean.

	He looks at a MAN sitting at a table opposite them;
	indicating the bathroom with his eyes.  The MAN nods,
	indicating no one is there.

				SOLLOZZO
		Don't take too long.

	MICHAEL gets up and calmly walks to the bathroom, and
	disappears inside.

	INT NITE: LUNA AZURA TOILET (WINTER 1945)

	MICHAEL steps into the small bathroom; he is breathing very
	hard.  He actually uses the urinal.  Then he washes his
	hands with the bar of pink soap; and dries them thoroughly.
	Then he moves to the booth, up to the old-fashioned toilet.
	Slowly he reaches behind the water tank; he panics when he
	cannot feel the gun.  We see behind the tank his hand is
	just a few inches from the gun...he gropes
	searchingly...finally coming to rest on the gun.

	CLOSE ON MICHAEL; the feel of it reassures him.  Then he
	breaks it loose from the tape holding it; he takes a deep
	breath and shoves it under his waistband.  For some
	unexplainable reason he hesitates once again, deliberately
	washes his hands and dries them.  Then he goes out.

	INT NITE: LUNA AZURA (WINTER 1945)

	He hesitates by the bathroom door; and looks at his table.
	McCLUSKEY is eating a plate of spaghetti and veal.  SOLLOZZO
	turns around upon hearing the door, and looks directly at
	MICHAEL.  MICHAEL looks back.  Then he smiles and continues
	back to the table.  He sits down.

				MICHAEL
		Now I can talk.  I feel much better.

	The MAN by the far wall had been stiff with attention; now
	he too relaxes.  SOLLOZZO leans toward MICHAEL who sits down
	comfortably and his hands move under the table and unbutton
	his jacket.  SOLLOZZO begins to speak in Sicilian once again
	but MICHAEL's heart is pounding so hard he can barely hear
	him.

	The WAITER comes to ask about the order, SOLLOZZO turns to
	speak, and without warning, MICHAEL shoves the table away
	from him with his left hand, and with his right hand puts
	the gun right against SOLLOZZO's head, just touching his
	temple.  He pulls the trigger, and we see part of SOLLOZZO's
	head blown away, and a spray of fine mist of blood cover the
	entire area.

	The WAITER looks in amazement; suddenly his white jacket is
	sprayed and stained with blood.

	SOLLOZZO seems in a perpetual fall to the floor; through he
	seems to hang in space suspended.

	MICHAEL pivots, and looks:

	There is McCLUSKEY, frozen, the fork with a piece of veal
	suspended in air before his gaping mouth.

	MICHAEL fires; catching McCLUSKEY in his thick bulging
	throat.  He makes a horrible, gagging, choking sound.  Then
	coolly, and deliberately, MICHAEL fires again, fires right
	through McCLUSKEY's white-topped skull.

	The air is filled with pink mist.
	MICHAEL swings toward the MAN standing by the bathroom wall.
	He does not make a move, seemingly paralyzed.
	Now he carefully shows his hands to be empty.
	The WAITER steps backward through the mist of blood, an
	expression of horror on his face.
	MICHAEL looks at his two victims:
	SOLLOZZO still in his chair, side of his body propped up by
	the table.
	McCLUSKEY finally falls from the chair to the table.
	MICHAEL is wildly at a peak.  He starts to move out.  His
	hand: is frozen by his side, STILL GRIPPING THE GUN.
	He moves, not letting the gun go.

	MICHAEL's face; frozen in its expression.

	His hand: still holding the gun.

	His face: finally he closes his eyes.

	His hand relaxes, the gun falls to the floor with a dull thud.

	He walks quickly out of the restaurant, looks back.

	He sees a frozen tableau of the murder; as though it had
	been recreated in wax.

	Then he leaves.

	---------------------------------------FADE OUT---------

	FADE IN:

	INT DAY: MATTRESS (WINTER 1945)

	A MAN is his shirtsleeves plays a sentimental tune on an old
	upright piano, while his cigarette burns on the edge.
	ANOTHER stands nearby, listening quietly.

	A little distance away, TEN MEN sit around a crude table,
	quietly eating.  They talk in low, relaxed voices, and there
	is an occasional laugh.

	ROCCO LAMPONE stands by a window, which has been covered
	with a heavy-mesh wire grating, gazing out.

	A large bowl of pasta is passed, and the MEN eat heartily.

	The sentimental tune is continued over the following:

	INT DAY: BODIES IN CAR (WINTER 1945)

	A MAN and a WOMAN, blood coming out of their noses, lie
	still together in a bullet-riddled automobile.

	INT DAY: BODY IN BARBER SHOP (WINTER 1945)

	A MAN is covered by a sheet on the floor of a barber shop.

	INT DAY: MATTRESS

	Ten mattresses are spread out around the otherwise empty
	living room of an apartment.  THREE or FOUR MEN including
	CLEMENZA, are taking naps.

	An arsenal of hand guns are spread out on a card table.

	The MEN at the table continue their dinner; passing and
	pouring the wine.

	Trash is thrown in 2 or 3 garbage cans kept in the apartment.

	INT DAY: BODY IN OFFICE (WINTER 1945)

	A MAN, his clothes soaked in blood, lies on the floor of an
	office building, dead, under an enormous portrait of Harry S.
	Truman.

	EXT DAY: BODY ON STOOP (WINTER 1945)

	ANOTHER MAN, his trousers soaked in blood, lies spanning
	three steps of a front stoop.

	INT NITE: MATTRESS (WINTER 1945)

	TESSIO, sits in a simple straight-backed chair, doing a
	crossword puzzle.

	A thin, boyish BUTTON MAN, writes a letter.

	Six or seven empty mattresses, with tossed unmade blankets.
	Coffee cans beside them serve as ash trays.

	A MAN by the table pulls the cork on another bottle of
	Ruffino, and wine is poured as the MEN eat.

	EXT DAY: BODY IN ALLEY (WINTER 1945)

	A CORPSE is half out of an overturned garbage can in a quiet
	alley.

	INT DAY: BODY AT TABLE (WINTER 1945)

	A MAN in a formal jacket and tie is slumped over a table, in
	a pool of blood on the tablecloth.

	INT DAY: MATTRESS (WINTER 1945)

	A neatly stacked pile of newspapers in the corner of an
	apartment.  We catch a glimpse of one headline: "Five Family
	War..."

	The table.  The MEN are sitting around cracking nuts.  ONE
	has fallen asleep on his arms at the table.

	SEVERAL MEN are taking naps on the Mattresses.

	The PIANO PLAYER finishes the tune with finesse.  Picks up
	and takes a drag from his cigarette.  The OTHER MAN nods
	appreciatively.

				MAN
		Nice Augie...nice.

	EXT DAY: MANCINI BLDG. (SPRING '46)

	Several cars are parked in front of a pleasant New York
	apartment building.  We recognize a couple of SONNY's
	bodyguards loafing by the cars, pitching playing cards
	against the curb.

	Inside the building, two others wait quietly by the rows of
	brass mailboxes: they have been there quite awhile.

	Up one flight of stairs, a single man sits on the step,
	smoking a cigarette.

	One of the men by the mailboxes checks his pocketwatch,
	which is attached to a key chain.  We HEAR the sound of a
	door opening; they look up.

	The man sitting on the stop stands; and looks.

	SONNY backs out of an apartment, the arms of LUCY MANCINI
	wrapped around him.  She doesn't want to let go of him; she
	draws him back into the apartment for a moment, and then he
	comes out alone, adjusting his clothes.

	He jauntily skips down the steps, trailed by the bodyguard
	on the first floor, and moves outside toward his car.  The
	men quickly take up their positions.  As he gets in his car:

				DRIVER
		Pick up your sister?

				SONNY
		Yeah.

	The car drives off; accompanied and escorted by the
	bodyguards in their cars.

	INT DAY: CONNIE'S HALL (SPRING '46)

	He knocks on the door.  No answer.  Then again.

				CONNIE'S VOICE
		Who is it?

				SONNY
		It's me, Sonny.

	We hear the bolt slide back, and see the door open.  SONNY
	enters, but CONNIE has quickly moved into the hallway, her
	back to him.

				SONNY
			  (tenderly)
		Connie, what is it?

	He turns her around in his arms.

	Her face is swollen and bruised; and we can tell from her
	rough, red eyes that she has been crying for a long time.
	As soon as he realizes what's happened, his face goes red
	with rage.  She sees it coming, and clings to him, preventing
	him from running out of the apartment.

				CONNIE
			  (desperately)
		It was my fault!  I started a fight
		with him and I tried to hit him so
		he hit me.  He didn't even try to
		hit me hard Sonny, I walked into it.

	Sonny listens, and calms himself.  He touches her shoulder,
	the thin silk robe.

				SONNY
		I'm goin' to have the doctor come
		over and take a look at you.

	He starts to leave.

				CONNIE
		Oh Sonny, please don't do anything.
		Please don't.

	He stops, and then laughs good naturedly.

				SONNY
		Hey.  Con.  What'm I goin' to do?
		Make your kid a orphan before he's
		born.

	She laughs with him.  He kisses her reassuringly, and leaves.

	EXT DAY: CONNIE'S STREET

	CARLO settles down on the front steps of the 112th St.
	"Book" with SALLY RAGS and COACH, who have been drinking
	beer out of glasses and a pitcher of beer from around the
	corner.  The ball game is blaring from the radio; and the
	kids on the street are still playing stickball.

	CARLO has barely settled down, when the kids in the street
	suddenly scatter, and a car comes screeching up the block
	and to a halt in front of the candy store.  The tires
	scream, and before it seems as though it has even stopped, a
	MAN comes hurtling out of the driver's seat, moving so fast
	the everyone is paralyzed.  It is a moment before we
	recognize that it is SONNY.

	His face is contorted with anger; in a split second he is on
	the stoop and has CARLO by the throat.

	He pulls CARLO away from the others, trying to get him down
	into the street.  But CARLO reaches out for the iron railing,
	and hangs on, his hand in a lock, cringing away, trying to
	hide his head and face in the hollow of the his shoulders.
	His shirt is ripped away in SONNY's hand.

	SALLY RAGS and COACH, merely sit, watching, stunned.

	SONNY is pounding the cowered CARLO with all his strength,
	in a continuous monologue of indistinguishable cursing.  His
	blows are powerful; and begin to draw blood.

	The kids who have been playing stickball, move up, watching
	in fascination.

	CARLO's hands are clenched tight around the railing.

	SONNY beats him mercilessly.

	Now SONNY's bodyguards' car pulls up, and they too become
	spectators.

	SONNY's tight fists are going down like hammers, into
	CARLO's face and body.

	CARLO's nose is bleeding profusely; but still he does
	nothing, other than hang onto the railing.

	SONNY grabs hold of CARLO's massive body, and tries to drag
	him off of the hold on the railing, his teeth clenched in
	the effort.  Then he tries loosening CARLO's locked hands;
	even biting them.  CARLO screams but he does not let go.

	It's clear that CARLO is much stronger than he is, and will
	not be moved.  SONNY knees him in the mouth, and beats him
	more; but he is exhausted.  Totally out of breath, he
	stammers haltingly to the bleeding CARLO.

				SONNY
		You...bastard...You...hurt my
		sister... again...and I'll
		kill...you.

	He wipes the sweat from his face, and then turns suddenly.
	and hurries back to the car, in a moment his car is gone,
	leaving even his bodyguards in confusion.  We notice ONE MAN
	with a sports jacket in the group of spectators especially
	interested.

	CARLO finally relaxes the clenched, locked hands.  He slumps
	onto the stoop.

	---------------------------------------FADE OUT---------

	FADE IN:

	EXT DAY: MALL (SPRING 1946)

	HIGH ANGLE on the Corleone Mall.  It is a gray, rainy day.
	Young BUTTON MEN in raincoats stand in quiet groups of
	various points around the main house and compound.  Things
	have changed; one house has been extensively enlarged; a new
	and secure gate house has been built.  Security measures
	that had been make-shift and temporarily have now been made
	a permanent part of the Mall, evolving it into a Medieval
	Fortress.  We notice a huge crater in the courtyard; the
	result of a recent bomb attempt.  The house nearest the
	crater is damaged by fire.

	A taxi arrives; KAY ADAMS steps out, huddled in a bright
	yellow raincoat; she lets the cab go, and hurries to the
	shelter of the gate house.

	They are not expecting her, and ask her to wait while they
	call the main house.

	KAY looks at the imposing, depressing Mall, while rain still
	runs down onto her face.

	She notices the bomb crater, and the fire damage; and the
	sullen faces of the BUTTON MEN.

	TOM HAGEN exits the Main House, and hurries toward her.

				HAGEN
		Kay, we weren't expecting you.  You
		should call...

				KAY
		I've tried calling and writing.  I
		want to reach Michael.

				HAGEN
		Nobody knows where he is.  We know
		he's all right, but that's all.

	KAY looks in the direction of the crater, filling with
	rainwater.

				KAY
		What was that?

				HAGEN
		An accident.  No one was hurt.

				KAY
		Listen Tom, I let my cab go; can I
		come in to call another one?

	TOM is clearly reluctant to involve her any more than he has
	to.

				HAGEN
		Sure...I'm sorry.

	They hurry through the rain and into the Main House.

	INT DAY: DON'S LIVING ROOM (SPRING 1946)

	In the living room, KAY shakes the water from her coat and
	takes her rainhat off.

				KAY
		Will you give this to him.

				HAGEN
		If I accept that letter and you
		told a Court of Law I accepted it,
		they would interpret it as my
		having knowledge of his whereabouts.
		Just wait Kay, he'll contact you.

	We hear footsteps descending the staircase; MAMA CORLEONE
	enters the room; the OLD WOMAN squints at KAY, evaluating her.

				MAMA
		You're Mikey's little girl.

	KAY nods yes; there are still tears in her eyes.

				MAMA
		You eat anything?

	KAY shakes her head.

				MAMA
			  (to HAGEN)
		Disgrazia, you don't even give the
		poor girl a cup of coffee?

	HAGEN shrugs helplessly; on an impulse, KAY quickly moves
	toward MAMA, the letter extended.

				KAY
		Will you give this letter to Michael.

				HAGEN
		Mama, no.

				MAMA
		You tell me what to do?  Even he
		don't tell me what to do.

	She takes the letter from KAY, who is grateful and relieved.

				KAY
		Why did they blame Michael?

				MAMA
		You listen to me, you go home to
		your family, and you find a good
		young man and get married.  Forget
		about Mikey; he's no good for you,
		anymore.

	She looks directly into KAY's eyes; and KAY understands what
	that means.

	EXT DAY: DON'S HOSPITAL (SPRING 1946)

	A hospital in New York City.  POLICE and teams of PRIVATE
	DETECTIVES are stationed guarding the area.  An ambulance
	with a team of DETECTIVES and BUTTON-MEN GUARDS exit the
	hospital with rifles in hand; followed by SEVERAL HOSPITAL
	ASSISTANTS wheeling a hospital stretcher, presumably carrying
	the DON.

	TESSIO and CLEMENZA emerge, with OTHER BUTTON MEN bringing
	up the rear.  HAGEN walks with the stretcher, and for a
	moment they disappear behind the ambulance.  Then suddenly,
	siren blasting, it speeds off, accompanied by dark low-slung
	cars.

	EXT DAY: MALL (SPRING 1946)

	The Corleone Mall.

	Equally impressive security stands ready at the Corleone
	Mall.  EXTRA BUTTON MEN, as well as SOME POLICE, and PRIVATE
	DETECTIVES.

	It all seems to be under the supervision of ROCCO LAMPONE.
	All is silent.  The WOMEN and CHILDREN, dressed in Sunday
	clothes, wait.

	EXT DAY: AMBULANCE (SPRING 1946)

	One ambulance, speeding along the Grand Central Parkway,
	preceded and followed by a dark car, each one carrying a
	team of BUTTON MEN.

	Sitting next to the DRIVER of the ambulance is a GUARD with
	a rifle on his lap.

	INT DAY: DON'S HALL (SPRING 1946)

	Inside the Main CORLEONE House:

	Hospital ORDERLIES carry the DON on his stretcher carefully
	under the watchful eyes of CLEMENZA, TESSIO, LAMPONE and
	various GUARDS and BUTTON MEN.

	All the CORLEONE family is here today: MAMA, FREDO, SANDRA,
	THERESA, CONNIE, CARLO; the various CORLEONE CHILDREN.

	INT DAY: DON'S BEDROOM (SPRING 1946)

	The DON is made comfortable in his room, which has all but
	been converted into a hospital room, with complete and
	extensive equipment.  The various CHILDREN get a turn to
	kiss the OLD MAN, as he is made comfortable... and then
	SONNY indicates that all the CHILDREN, WOMEN, and CARLO
	should leave.

	They do, the door is closed.

	INT DAY: DON'S DINING ROOM (SPRING 1946)

	The mood is quite happy downstairs, as the WOMEN prepare the
	Sunday dinner, and set the table.

	CARLO sits alone among them, a frown on his face.

				CONNIE
		What's the matter, Carlo?

				CARLO
		Shut up.

	INT DAY: DON'S BEDROOM (SPRING 1946)

	All the MEN of the family stand around the hospital bed with
	grim faces, SONNY and HAGEN closest to the OLD MAN.  The DON
	does not speak, yet he asks questions with his looks and
	glances, as clearly as if they were verbalized.  HAGEN is
	the spokesman for the family.

				HAGEN
		...since McCluskey's killing, the
		police have cracked down on most of
		our operations...on the other
		families too.  There's been a lot
		of bad blood.

	The OLD MAN glances at SONNY.

				SONNY
		Pop, they hit us and we hit them
		back.

				HAGEN
		We put out a lot of material
		through our contacts in the
		Newspapers...about McCluskey's
		being tied up with Sollozzo in the
		Drug Rackets...things are starting
		to loosen up.

	The OLD MAN nods.

				SONNY
		Freddie's gonna go to Las
		Vegas...under the protection of Don
		Francesco of L.A.  I want him to
		rest...

				FREDO
		I'm goin' to learn the casino
		business.

	The DON nods approvingly.  Then he searches around the room
	for a face he does not see.  HAGEN knows who he's looking for.

				HAGEN
		Michael...
			  (he takes a breath)
		It was Michael who killed Sollozzo.

	The DON closes his eyes, and then reopens them in anger and
	rage.

				HAGEN
		He's safe now...we're already
		working on ways to bring him back.

	The DON is very angry, he motions with a weak hand that they
	leave him alone.

	INT. DAY: DON'S STAIRS AND HALL (SPRING 1946)

	HAGEN is very upset as he comes down the Stairs; SONNY is
	expansive and optimistic.

				SONNY
		We'll let the old man take it easy
		for a couple of weeks.  I want to
		get things going good before he
		gets better.  What's the matter
		with you?

				HAGEN
		You start operating, the five
		families will start their raids
		again.  We're at a stalemate Sonny,
		your war is costing us a lot of
		money.

				SONNY
		No more stalemate Tom, we got the
		soldiers, we'll match them gun for
		gun if that's how they want it.
		They know me for what I am, Tom--
		and they're scared of me.

				HAGEN
		Yes.  That's true, you're getting a
		hell of a reputation.

				SONNY
		Well it's war!  We might not be in
		this shape if we had a real war-
		time Consiglere, a Sicilian.  Pop
		had Genco, who do I have?
			  (TOM starts to leave)
		Hey Tom, hey...hey.  It's Sunday,
		we're gonna have dinner.  Don't be
		sore.

	INT DAY: DON'S DINING ROOM (SPRING 1946)

	The FAMILY, WIVES, CHILDREN and all sit around the table
	over Sunday dinner.  SONNY is at the head of the table.

	EXT DAY: MALL (SPRING 1946)

	SOME of the CORLEONE GRANDCHILDREN play in the enclosed
	Mall, in the proximity of the BUTTON MEN stationed liberally
	by the gate.

	ONE CHILD misses a ball, it rolls by the gate house.  A
	young BUTTON MAN scoops it up and throws it back, smiling.

	-----------------------------------------FADE OUT-------

	INT DAY: CONNIE'S APT. (SPRING 1946)

	CONNIE and CARLO's apartment.  She's in a slip, on the phone.
	We HEAR the shower going in the bathroom.

				CONNIE
		Who is this?

				GIRL (O.S.)
			  (giggle)
		I'm a friend of Carlo's.  I just
		wanted to tell him I can't see him
		tonight; I have to go out of town.

	CONNIE's face turns red.

				CONNIE
		You lousy tramp bitch.
			  (click)


	She slams the phone down; just as CARLO is coming out of the
	bathroom drying his golden body.

				CARLO
		What was that?

				CONNIE
		Your girl friend.  She says she
		can't make it tonight.  You lousy
		bastard you have the nerve to give
		your whores my telephone number.
		I'll kill you, you bastard!

	She hauls off and punches him knowingly; he laughs, so then
	she flings herself at him, kicking and scratching; her heavy
	belly heaving under the thin slip.

				CARLO
			  (defending himself)
		You're crazy.  She was kidding
		around; I don't know, some nut.

	He pushes her aside, and moves into the bedroom to continue
	dressing.

				CONNIE
		You're staying home.  You're not
		going out.

				CARLO
		OK, OK.  You gonna make me something
		to eat at least?

	That calms her down; she stands there a moment, breathing
	heavily; and then she nods, and goes into the kitchen, and
	starts her wifely duties.

	CARLO is dressed; puts on some cologne; CONNIE appears in
	the doorway.

				CONNIE
		The food is on the table.

				CARLO
		I'm not hungry yet.

				CONNIE
		Eat it, it's on the table.

				CARLO
		Ba Fa Goulle.

				CONNIE
		BA FA GOULE YOU!

	She turns deliberately, goes out into the kitchen.  A moment
	later we begin to hear the sound of dishes breaking.  CARLO
	slowly walks out, where we can see CONNIE systematically
	smashing all the dishes against the sink, sending the greasy
	veal and peppers all over the apartment floor.

				CARLO
		You filthy guinea spoiled brat.
		Clean it up or I'll kick your head
		in.

				CONNIE
		Like hell I will.

	She stands there, solid, ready to punch him again.  Slowly,
	he slides his belt out of his trousers, and doubles it in
	his hand.

				CARLO
		Clean it up!

	He swings the belt against her heavy hips.  She moves back
	into the kitchen, and gets a kitchen knife, and holds it
	ready.

				CARLO
		Even the female Corleones are
		murderers.

	He puts the strap down on a table, and moves after her.  She
	makes a sudden thrust at his groin, which he avoids.  He
	pulls the knife away, cutting his hand in the process.  She
	gets away momentarily, but he pursues her around the table,
	gets her; and starts to slap her in the face.

	She breaks away from him, and rushes into the bedroom.

				CONNIE
		The baby!  The baby!

	INT DAY: CONNIE'S BEDROOM  (SPRING 1946)

	She runs into the bedroom; he follows.  She moves into a
	corner, and then like a desperate animal, tries to hide
	under the bed.

	He reaches under, and pulls her out by the hair.

	He slaps her in the face until she begins to weep; then he
	throws her on the bed, contemptuously.  He grabs part of her
	thigh, pinching it very hard.

				CARLO
		You're fat as a pig.

	Then he pushes her away, and walks out of the room, leaving
	her in tears.  She is crying; she pulls herself to the
	bedroom phone, and in a whisper:

				CONNIE
		Mama...mama, it's Connie.  Mama, I
		can't talk any louder.  No, I don't
		want to talk to Sonny.

	We can tell that the phone has been passed to SONNY.

	INT DAY: DON'S KITCHEN (SPRING 1946)

	In the kitchen at the Mall, MAMA cannot understand the
	whispering and she has given the phone to SONNY.

				SONNY
		Yeah Connie.

				CONNIE (O.S.)
		Sonny, just send a car to bring me
		home.  I'll tell you then, it's
		nothing Sonny, don't you come.
		Send TOM, please Sonny, it's
		nothing; I just want to come home.

	SONNY's face is turning red.

				SONNY
			  (in a controlled voice)
		You wait there.  You just wait there.

	He hangs up the phone; and just stands there for a moment.

				SONNY
			  (quietly)
		That sonofabitch; that sonofabitch...

	HAGEN enters the room; he knows what is happening, knows he
	cannot interfere.

	EXT DAY: MALL

	SONNY leaves the house.  HAGEN moves to the outside mall
	just as SONNY's car is driving off.  He moves to a group of
	BUTTON MEN.

				HAGEN
		Go after him.

	EXT DAY: CAUSEWAY (SPRING 1946)

	SONNY's car on the Jones Beach Causeway, speeds quickly by.
	After a pause, another car, with the CORLEONE BODYGUARDS, is
	trailing.

	SONNY is driving; he is very angry.

	EXT NITE: TOLL BOOTHS (SPRING 1946)

	SONNY in his car; driving back.  Still breathing hard and
	still furious.  Then he thinks it's funny; he enjoyed it.
	He starts laughing, louder and louder, as he pulls up to a
	toll booth, stops, and extends his hand with a coin to the
	COLLECTOR.

	---------------------------------------FADE OUT---------

	FADE IN:

	INT NITE: AMERIGO BONASERA'S APARTMENT

	The serious-faced UNDERTAKER is on the telephone.

				HAGEN (O.S.)
		This is Tom Hagen.  I'm calling for
		Don Corleone, at his request.

	BONASERA looks at his WIFE, with deep anxiety in his eyes.
	BONASERA's lips are suddenly dry.

				BONASERA
		Yes, I understand.  I'm listening.

				HAGEN (O.S.)
		You owe the Don a service.  In one
		hour, not before, perhaps later, he
		will be at your funeral parlor to
		ask for your help.  Be there to
		greet him.  If you have any
		objections speak now, and I'll
		inform him.

	Silence.  BONASERA stutters, then speaks in fright.

				BONASERA
		Anything...Anything the Godfather
		wishes.

				HAGEN (O.S.)
		Good.  He never doubted you.

				BONASERA
		The Don himself is coming to me
		tonight?

				HAGEN (O.S.)
		Yes.
			  (click)


	BONASERA is sweating; slowly he lowers the phone; his WIFE
	sees his pale expression, and follows him into the room.

	Silently, he begins the ritual of dressing.  His WIFE knows
	something serious is happening, and never takes her eyes
	from him.  He lights a cigarette.

				BONASERA
		For the last year, they have been
		killing one another.  So now, what?
		Your Godfather comes to me...Why?
			  (whispering, slyly)
		They've killed someone so important
		that they wish to make his body
		disappear.

				MRS. BONASERA
			  (frightened)
		Amerigo!

				BONASERA
		They could make me an accomplice to
		their murder.  They could send me
		to jail!

	He slips into his trousers.  Then he moves to his WIFE to
	tie his tie, as she has done for years.

				BONASERA
		And if the other families find
		out...they will make me their enemy.
		They could come here to our house.
		I curse the day I ever went to the
		Godfather.

	EXT NITE: FUNERAL PARLOR (SPRING 1946)

	With his ring of keys, he opens the funeral parlor, enters.

	INT NITE: FUNERAL PARLOR (SPRING 1946)

	BONASERA walks through the darkened funeral parlor, without
	turning on the lights; then into the rear, preparation room,
	past the tables, and equipment.  He operates the chain that
	lifts a large overhead garage type door.  And looks out into
	the alley.

	He sits on a bench, and waits.

	EXT NITE: FUNERAL PARLOR ALLEY (SPRING 1946)

	The tires of a car roll very quietly along the small alley;
	we notice a dark car approach the rear of BONASERA's funeral
	parlor.

	CLEMENZA gets out, and moves to the open, rear door.
	BONASERA greets him, too petrified to speak.  He notices TWO
	OTHER MEN get out of the car, and carry a stretcher with a
	CORPSE swaddled in a gray blanket, with yellowed feet
	protruding.

	BONASERA closes his eyes in fear, but indicates which way
	the MEN should carry their sinister burden.

	INT NITE: FUNERAL PARLOR EMBALMING ROOM (SPRING 1946)

	They carry the CORPSE to one of the tables in the embalming
	room.

	Then BONASERA turns to see ANOTHER MAN step out of the
	darkness somewhat uncertainly.  It is DON CORLEONE.

	He walks up to BONASERA, very close, without speaking.  His
	cold eyes looking directly at the frightened UNDERTAKER.
	Then, after a long gaze:

				DON CORLEONE
		Well my friend, are you ready to do
		me this service?

	BONASERA nods.  The DON moves to the CORPSE on the embalming
	table; he makes a gesture, and the OTHER MEN leave them alone.

				BONASERA
		What do you wish me to do?

				DON CORLEONE
			  (staring at the table)
		I want you to use all your powers,
		all your skill, as you love me.  I
		do not want his mother to see him
		as he is.

	He draws down the gray blanket.

	BONASERA lets out a gasp of horror at what he sees:

	The bullet-smashed face of SONNY CORLEONE.

	EXT NITE: TOLL BOOTHS (SPRING 1946)

	SONNY extends his hand with a coin at the toll booth.

	A car suddenly swerves in front of him, trapping him in the
	booth, and in incredible rally of machine gun fire greets
	him, coming through and smashing the windows of the toll
	booths on both side of him, and from the front window of the
	car blocking him.

	The windows of his car are shot out.

	Bullet holes puncture the doors of his car.

	His hand, with the coin in it, falls inside the car.

	His arms, shoulders are riddled by the fire, and still it
	continues, as though the ASSASSINS cannot take a chance that
	he will survive it.

	Suddenly, he lets out an enormous ROAR, like a bull, and
	actually, opens the door, and steps out of the car, UNDER
	fire.

	His face is hit; and finally he falls to the ground.

	A FULL SHOT...as the ASSASSINS scramble for their cars and
	make off in the distance.

	SONNY's BODYGUARDS stop a safe distance away, realizing they
	are too late.

	INT NITE: DON'S LIVING ROOM (SPRING 1946)

	View on HAGEN's ashen face in the living room.  He is silent
	a moment, and then:

				HAGEN
			  (quietly)
		OK.  Go to Clemenza's house and
		tell him to come here right away.
		He'll tell you what to do.

	The MEN leave him alone.  He is quiet, standing in the
	middle of the living room a moment.  He looks in the
	direction of the kitchen, where he can see fragments of MAMA
	moving around.

	INT NITE: UPSTAIRS (SPRING 1946)

	TOM proceeds up stairs, and quietly in the direction of the
	DON's room.  He opens the DON's door.  Looks in.

	INT NITE: DON'S BEDROOM (SPRING 1946)

	The DON in his hospital bed.  Asleep under sedation.  HAGEN
	hesitates.  He cannot go in; he cannot tell the OLD MAN.  He
	closes the door.

	INT NITE: DON'S OFFICE (SPRING 1946)

	HAGEN alone in the office.  He is drinking.  He looks up at
	the sound of cars; the CAPOREGIMES are arriving.  Then he
	hears footsteps.

	The door opens; and in a robe, with slippers, DON CORLEONE
	slowly enters the room.  He walks directly to his stuffed
	armchair, sits down.  His face is stern, as he looks into
	HAGEN's eyes.

				DON CORLEONE
		Give me a drop of anisette.

	HAGEN rises, and pours a glass for the OLD MAN.

				DON CORLEONE
		My wife was weeping before she fell
		asleep, outside my window I saw my
		caporegimes to the house, and it is
		midnight.  So, Consigliere of mine,
		I think you should tell your Don
		what everyone knows.

				HAGEN
			  (quietly)
		I didn't tell Mama anything.  I was
		about to come up and wake you and
		tell you.  Just now.

				DON CORLEONE
		But you needed a drink first.

				HAGEN
		Yes.

				DON CORLEONE
		Now you've had your drink.

	Pause.

				HAGEN
		They shot Sonny on the Causeway.
			  (pause)
		He's dead.

	DON CORLEONE blinks.  One feels that just for a second he
	loses all physical strength; he clasps his hands in front of
	him on the top of the desk and looks into HAGEN's eyes.

				DON CORLEONE
		I want no inquiries made.  No acts
		of vengeance.
			  (pause)
		Consigliere, arrange a meeting with
		the heads of the five
		families...this war stops now.

	He rises and unsteadily leaves the room, turns...

				DON CORLEONE
		Call Bonasera...he will do me a
		service.

	And leaves.  HAGEN moves to the phone; dials...

				HAGEN
		This is Tom Hagen; I'm calling for
		Don Corleone, at his request.

				BONASERA (O.S.)
		Yes, I understand I'm listening.

				HAGEN
		You owe the Don a service.  He has
		no doubt that you will repay it.

	EXT DAY: BANK BUILDING (SPRING 1946)

	Day in Manhattan.  An impressive Bank Building in the
	financial center of New York.  Many limousines are parked,
	uniforms and plain-clothed CHAUFFEURS waiting quietly.

	INT DAY: BOARD ROOM (SPRING 1946)

	The Board Room of a bank, daylight shines in the windows.

	CARLO TRAMONTI, an impressive, handsome middle-aged man,
	sits quietly, smoking a Di Napoli cigar, OUR VIEW moves to a
	MAN sitting to his left, and a little to the rear, and
	settles on JOSEPH ZALUCHI, a moon-faced amiable-looking man;
	as the view continues, around the table, we HEAR:

				DON CORLEONE (O.S.)
		I want to thank you all for coming.
		I consider it a service done to me
		personally and I am in the debt of
		each and every one of you.
		Especially those of you who have
		traveled from such distances as
		California, St. Louis, Kansas City;
		and New Orleans...

	The VIEW PASSES to FRANK FALCONE and ANTHONY MOLINARI, both
	younger than any of the others; then on to DOMENICK PANZA,
	short and squat sitting in a wheelchair; then around the
	table to DON VINCENENZO FORLENZA, who is whispering to his
	JEWISH ASSISTANT; the VIEW PASSES on to ANTHONY STRACCI, an
	older man, sipping from a drink and smoking a cigar; OTTILIO
	CUNEO, in his middle sixties with a jolly round face; then
	DON PHILLIP TATTAGLIA, a delicate older man with dyed hair
	and a pencil mustache; and finally, EMILIO BARZINI, in his
	early sixties, a man to 'respect'; whom we had seen at
	CONNIE's Wedding.

				DON CORLEONE
		Ah well, let's get down to business.
		We are all honorable men here, we
		don't have to give assurances as if
		we were lawyers.
			  (he sits, gazes out
			  at them, and sighs)
		How did things ever go so far?
		Well, no matter.  A lot of
		foolishness has come to pass.  It
		was so unfortunate, so unnecessary.

	The VIEW examines the room once again, as the DON speaks.  A
	large, clicking board is changing numbers at various times,
	and two tapes, showing the fluctuations of the Market during
	the day's trading, and projected above.

	DON CORLEONE pauses; and TOM HAGEN hands him a cold drink.

				DON CORLEONE
		Tattaglia has lost a son; I have
		lost a son.  We are quits.  Let
		there be a peace...
			  (he gestures
			  expressively,
			  submissively, with
			  his hands)
		That is all I want...

				BARZINI
		Don Corleone is too modest.  He had
		the judges and politicians in his
		pocket and he refused to share them.
		His refusal is not the act of a
		friend.  He takes the bread out of
		the mouths of our families.  Times
		have changed, it's not like the old
		days where everyone can go his own
		way.  If Don Corleone had all the
		judges and politicians in New York,
		then he must share them or let
		others use them.  Certainly he can
		present a bill for such services,
		we're not Communists, after all.
		But he has to let us draw water
		from the well.  It's that simple.

				DON CORLEONE
		My friends, I didn't refuse out of
		malice.  You all know me.  When
		have I ever refused an accommodation?
		But why, this time?  Because I
		think this drug business will
		destroy us in the years to come.
		It's not like whiskey or gambling
		or even women which most people
		want and is forbidden them by the
		pezzonovante of the Church and the
		Government.  But drugs?  No.  Even
		policemen, who help us in gambling
		and other things would refuse to
		help us in drugs.  But...I am
		willing to do whatever all of you
		think is necessary.

				DON ZALUCHI
		I don't believe in drugs.  For
		years I paid my people extra so
		they wouldn't do that kind of
		business...$200 a week.  But it
		didn't matter.  Somebody comes to
		them and says, "I have powders, if
		you put up three, four thousand
		dollar investment, we can make
		fifty thousand distributing."  Who
		can resist such a profit?
		There's no way to control it, as a
		business...to keep it respectable.
			  (rapping the table)
		I don't want it near schools!  I
		don't want it sold to children.
		That is an infamita.
			  (thinking)
		In my city I would try to keep the
		traffic in the dark people, the
		colored.  They are the best
		customers, the least troublesome,
		and they are animals anyway.  They
		have no respect for their wives or
		their families or themselves.  Let
		them lose their souls with drugs.
		But something has to be done, we
		can't have everybody running around
		doing just what they please, like a
		bunch of anarchists.

				BARZINI
		Then, are we agreed; the traffic in
		drugs will be permitted, but
		controlled; and Don Corleone agrees
		to give it protection in the East.

	DON CORLEONE nods.

				BARZINI
		That's the whole matter then, we
		have the peace, and let me pay my
		respects to Don Corleone, whom we
		have all known over the years as a
		man of his word.
			  (noticing TATTAGLIA
			  is uneasy)
		Don Philip?

				TATTAGLIA
		I agree to everything here, I'm
		willing to forget my own misfortune.
		But I must hear strict assurance
		from Corleone.  When time goes by
		and his position becomes stronger,
		will he attempt any individual
		vengeance?

	They all look at the DON; especially HAGEN, who feels that
	DON CORLEONE has given a great deal, and must have something
	else in mind.  Slowly the DON rises.

				DON CORLEONE
		I forego my vengeance for my dead
		son, for the common good.  But I
		have selfish reasons.  My youngest
		son had to flee, accused of
		Sollozzo's murder, and I must now
		make arrangements so that he can
		come home with safety, cleared of
		all those false charges.  That is
		my affair, and I will make those
		arrangements.
			  (with strength)
		But I am a superstitious man...and
		so if some unlucky accident should
		befall my youngest son, if some
		police officer should accidentally
		shoot him, or if he should hang
		himself in his cell, or if my son
		is struck by a bolt of lightning,
		then I will blame some of the
		people here.  That, I could never
		forgive, but...aside from that, let
		me swear by the souls of my
		Grandchildren that I will never be
		the one to break the peace we have
		made.

	EXT NITE: DON'S LIMO (SPRING 1946)

	The DON's black limousine.  He sits quietly in the padded
	rear seat; TOM HAGEN next to him.

	It is night.  Lights flash by them every so often.

				HAGEN
		When I meet with Tattaglia's
		people; should I insist that all
		his drug middle-men be clean?

				DON CORLEONE
		Mention it, don't insist.  Barzini
		is a man who will know that without
		being told.

				HAGEN
		You mean Tattaglia.

				DON CORLEONE
			  (shaking his head)
		Barzini.

				HAGEN
			  (a revelation)
		He was the one behind Sollozzo?

				DON CORLEONE
		Tattaglia is a pimp.  He could
		never have outfought Santino.  But
		I wasn't sure until this day.  No,
		it was Barzini all along.

	The black limousine speeds away from us in the night.

	------------------------------------------FADE OUT------

	FADE IN:

	EXT DAY: ESTABLISHING SICILY SHOT

	A CLOSE VIEW OF MICHAEL, moving as he walks, sullen and
	downcast, the left side of his face healed, but left
	grotesque and misshapen.

	GRADUALLY, THE VIEW LOOSENS, he wears a warm navy Pea
	jacket, and walks with his hands in his pockets.

	THE VIEW LOOSENS FURTHER, revealing a Sicilian SHEPHERD on
	either side of him, each carrying a shotgun slung over his
	shoulder, CALO, a squat and husky young man with a simple
	honest quality, and FABRIZZIO, slender and handsome, likable,
	and with a pleasing build.  Each of the SHEPHERDS  carry
	knapsacks.

	The THREE YOUNG MEN continue over the Sicilian landscape,
	overlooking an impressive view of land and sea.

	EXT DAY: SICILY ROAD

	The THREE move through a flock of wind-blown sheep, and make
	their way to a dusty rural road.  We HEAR a rinky horn
	sound, as a pre-war Italian automobile makes its way to them.
	An OLD MAN peeks from the window, waving to MICHAEL.  The
	car pulls in front of them and stops.  MICHAEL nods
	respectfully.

				MICHAEL
		Don Tommassino.

				DON TOMMASSINO
		Michael, why must you do this.  We
		have been lucky so far, all these
		months you've been here we've kept
		your name a secret.  It is from
		love for your father that I've
		asked you never to more than an
		hour from the Villa.

				MICHAEL
		Calo and Fabrizzio are with me;
		nothing will happen.

				DON TOMMASSINO
		You must understand that your
		Father's enemies have friends in
		Palermo.

				MICHAEL
		I know.

				DON TOMMASSINO
		Where are you going?

				MICHAEL
		Corleone.

				DON TOMMASSINO
		There is nothing there.  Not anymore.

				MICHAEL
		I was told that my Grandfather was
		murdered on its main street; and
		his murderers came to kill my
		father there when he was twelve
		years old.

				DON TOMMASSINO
		Long ago.  Now there is nothing:
		the men killed each other in family
		vendettas...the others escaped to
		America.

				MICHAEL
		Don Tommassino...I should see this
		place.

	DON TOMMASSINO thinks a moment, then concedes.

				DON TOMMASSINO
		That is your birthright...but
		Michael, use this car.

				MICHAEL
		No...I would like to walk to
		Corleone.

	The OLD MAN sighs, and then returns to his car.

				DON TOMMASSINO
		Be careful Michael, don't let them
		know your name.

	The old car sputters off; MICHAEL watches, and then continues
	on his journey.

	EXT DAY: COUNTRYSIDE

	The THREE pass through abundant areas of flowers and fruit
	trees, in bloom and bursting with life.

	EXT DAY: VILLAGE

	They continue in the empty streets of a little town; the
	post-war poverty is evident in the skinny dogs; and the
	empty streets.  Occasionally, a military vehicle, the only
	gasoline-powered vehicles on the road, will pass.  And there
	are many POLICE evident, most of them carrying machine guns.

	The THREE pass under an enormous banner slung over the main
	road "VOTA COMMUNISTA".

	EXT DAY: COUNTRY ROAD

	They continue through dusty country roads, where occasionally
	a donkey pulling a cart, or a lone horseman will pass them.

	EXT DAY: FIELD

	Out in a field, in the distance, they come upon a procession
	of peasants and activists, perhaps two hundred strong,
	marching, and singing, and in the lead, are five or six men
	carrying billowing red banners.

	EXT DAY: GROVE

	They are in an orange grove; on the other side of the trees
	is a deep, tall field of wild flowers.

	The Shepherds unsling their guns and knapsacks, and take out
	loaves of bread, some wine, sausage and cheese.

	MICHAEL rests against a tree, and uses his handkerchief.

				FABRIZZIO
		You tell us about America.

				MICHAEL
		How do you know I come from America?

				FABRIZZIO
		We hear.  We were told you were a
		Pezzonovanta...big shot.

				MICHAEL
		Only the son of a Pezzonovanta.

				FABRIZZIO
		Hey America!  Is she as rich as
		they say?

				MICHAEL
		Yes.

				FABRIZZIO
		Take me to America!  You need a
		good lupara in America?
			  (pats his shotgun)
		You take me, I'll be the best man
		you got.  "Oh say, can you
		seeee...By da star early light..."

	MICHAEL laughs.

	EXT DAY: ANOTHER ROAD

	The TRIO continues down a dirt road, as an American Military
	convoy speeds by; FABRIZZIO waves, and calls out to each of
	the U.S. drivers, as they move by.

				FABRIZZIO
		America.
		Hey America!
		Take me with you!
		Hey, take me to America G.I.!

	EXT DAY: CORLEONE HILL

	They continue their long hike, high on a promentory; until
	they hesitate, and look down.

				CALO
		Corleone.

	They can see a grim Sicilian village, almost devoid of people.

	EXT DAY: CORLEONE STREET

	MICHAEL and his bodyguards move through the empty streets of
	the village.  They walk behind him, and spread to either
	side about fifteen feet away from him.

	They move down ancient steps, past an old stone fountain.
	MICHAEL hesitates, cups his hands and drinks some water.
	They go on.

	They move up a very narrow old street.  MICHAEL looks at the
	doorways that they pass.

	MOVING VIEW: Each door has a plaque, with a ribbon or flower.

	CALO sees MICHAEL looking.

				CALO
		The names of the dead.

	MICHAEL hesitates in the center of the main street.  He looks.

	The street is empty, barren.  Occasionally, an old woman
	will pass.

	MICHAEL turns his head.

	The other side of the street: empty and deathly.

	A HIGH VIEW of MICHAEL standing in the center of the old
	street, the shepherds a respectful distance away.

	-------------------------------------FADE OUT-----------

	EXT DAY: BARONIAL ESTATE

	A green ribboned field of a baronial Estate.  Further ahead
	is a villa so Roman it looks as though it had just been
	discovered in the ruins of Pompeii.  There is a group of
	young village GIRLS accompanied by two stocky MATRONS,
	dressed in black.  They have been gathering the pink sulla,
	purple wisteria, and mixing them with orange and lemon
	blossoms.  They are singing, off in the distance as they work.

	MICHAEL, CALO and FABRIZZIO are silent as they watch this
	Fantasy-like scene.

				FABRIZZIO
			  (calling out to them)
		Hey, beautiful girls!

				MICHAEL
			  (sternly)
		Shhhhh.

	He settles down to watch.

	The GIRLS are dressed in cheap gaily painted frocks that
	cling to their bodies.  They are still in their teens, but
	developed and womanly.

	They are moving along the fields, picking blossoms, not
	aware of the three men watching them from the orange grove.
	Three or four of the girls begin chasing one of them
	playfully, in the direction of the grove.

	The GIRL being chased holds a bunch of purple grapes in her
	left hand and with the right, picks more grapes, and throws
	them back at her pursuers laughing.

	They come closer and closer.  Just short of the grove, she
	poses, startled, her large, oval shaped eyes catching the
	view of the THREE MEN.  She stands there on her toes about
	to run.

	MICHAEL sees her; now face to face.  He looks.

	Her face.  Incredibly beautiful with olive skin, black hair
	and a rich mouth.

				FABRIZZIO
			  (murmuring)
		Jesus Christ, take my soul.  I'm
		dying.

	Quickly, she turns, and runs away.

	MICHAEL stands up never taking his eyes from her.  We hold
	on him for a long while; and eventually hear the SHEPHERDS
	laughing.  Then he turns to them.

				FABRIZZIO
		You got hit by the thunderbolt, eh?

	CALO pats him on the shoulder.

				CALO
		Easy man.

				MICHAEL
		What are you talking about?

				FABRIZZIO
		You can't hide it when you're hit
		by the thunderbolt.

	EXT DAY: BARONIAL VILLAGE

	The little village built attendant to the Baronial Estate,
	is decked with the flowers the girls had been picking.

	MICHAEL, followed by the bodyguards, moves into the central
	square, and onto the balcony of a little cafe.

	The proprietor of the cafe, VITELLI, is a short burly man;
	he greets them cheerfully, and sets a dish of chickpeas at
	their table.

				FABRIZZIO
		You know all the girls in this
		town, eh?  We saw some beauties
		coming down the road.  One in
		particular got our friend hit with
		the Thunderbolt...
			  (he indicates MICHAEL)

	VITELLI gives a big knowing laugh, and looks at MICHAEL with
	new interest.

				VITELLI
		You had better bring a few bottles
		home with you, my friend; you'll
		need help sleeping tonight.
			  (he laughs)

				FABRIZZIO
		This one could seduce the devil.  A
		body! and eyes as big and black as
		olives.

				VITELLI
			  (laughing with
			  them...pouring more wine)
		I know about what you mean!

				FABRIZZIO
		This was a beauty.  Right, Calo?

				VITELLI
			  (laughing)
		Beautiful all over, eh?

				FABRIZZIO
		And hair.  Black and curly, like a
		doll.  And such a mouth.

	VITELLI does not laugh quite so much.

				VITELLI
		Yes, we have beautiful girls here...
		but virtuous.

	VITELLI is no longer drinking with them.

				MICHAEL
		She wore a red dress, and a red
		ribbon in her hair.  She looks more
		Greek than Italian.  Do you know a
		beauty like that?

	As MICHAEL describes her, VITELLI laughed less and less,
	until he wears a scowl.

				VITELLI
		No.

	Then he curtly leaves him, and walks into the back room.

				FABRIZZIO
		God in Heaven, I think I
		understand...

	He goes into the back room after the innkeeper.  Then he
	returns.

				FABRIZZIO
		Let's get out of here; he's boiling
		up his blood to do us mischief.
		It's his daughter.

	They start to leave; but MICHAEL doesn't move.

				CALO
		Come quickly.

				MICHAEL
		Innkeeper.  More wine!

				FABRIZZIO
			  (whispered)
		The old bastard mentioned two sons
		he only has to whistle up.

	MICHAEL turns to FABRIZZIO with his cold authority.

				MICHAEL
		Tell him to come to me.

	The two BODYGUARDS shoulder their luparas, and disappear in
	a moment they return with the red-faced angry VITELLI
	between them.

				MICHAEL
			  (quietly)
		I understand I've offended you by
		talking about your daughter.  I
		offer you my apologies, I'm a
		stranger in this country, I don't
		know the customs very well.  Let me
		say this, I meant no disrespect to
		you or her...

	CALO and FABRIZZIO are impressed.

				VITELLI
			  (shrugs)
		Who are you and what do you want
		from my daughter?

				MICHAEL
		I am an American hiding in Sicily
		from the police of my country.  My
		name is Michael.  You can inform
		the police and make your fortune
		but then your daughter would lose a
		father rather than gain a husband.
		In any case, I want to meet your
		daughter.  With your permission and
		under the supervision of your
		family.  With all decorum.  With
		all respect.  I am an honorable man.

	CALO and FABRIZZIO are stupefied; VITELLI pauses, and then
	asks:

				VITELLI
		Are you a friend of the friends?

				MICHAEL
		When the proper time comes, I'll
		tell you everything that a wife's
		father should know.

				FABRIZZIO
		It's the real Thunderbolt, then.

				VITELLI
			  (formally)
		Come Sunday morning:  My name is
		Vitelli and my house is up there on
		the hill, above the village.

				MICHAEL
		Your daughter's name?

				VITELLI
		Appolonia.

	-------------------------------------FADE OUT-----------

	EXT DAY: TOMMASSINO COURTYARD

	MUSIC comes up; as MICHAEL, dressed in new clothes from
	Palermo, and carrying a stack of wrapped gifts, gets into an
	Alfa Romeo.  CALO and FABRIZZIO each dressed in their Sunday
	best, are in the rear seat, huddled together, with their
	luparas on their shoulders.

	DON TOMMASSINO waves them off, as the little car drives off,
	rocky and bouncing on the dirt road.

	The Sunday churchbells ring.

	--------------------------------------DISSOLVE----------

	EXT DAY: VITELLI HOUSE

	MICHAEL is presented to each of the Vitelli relatives, by
	the yard of their little hilltop house; the BROTHERS; the
	MOTHER, who is given a gift; several UNCLES and AUNTS.
	Finally APPOLONIA enters, dressed beautifully in appropriate
	Sunday clothing.  Now he presents the wrapped gift to
	APPOLONIA.  She looks at her MOTHER, who with a nod gives
	her permission to open it.  She unwraps it.  Her eyes light
	at the sight of a heavy gold chain; to be worn as a necklace.

	She looks at him.

				APPOLONIA
		Grazia.

	--------------------------------------DISSOLVE----------

	EXT DAY: VITELLI CAFE

	Now the little Alpha drives into the village near VITELLI's
	cafe.

	MICHAEL is, as ever, accompanied with his two BODYGUARDS,
	though they are all dressed differently.

	They go up to the cafe...and sit with VITELLI, who is
	talking and talking.

	MICHAEL looks at APPOLONIA; who sits, respectfully quiet.
	She wears the gold necklace around her neck.

	--------------------------------------DISSOLVE----------

	EXT DAY: HILLTOP NEAR VITELLI HOME

	MICHAEL and APPOLONIA are walking through a hilltop path,
	seemingly alone, although a respectful distance apart.

	As the VIEW PANS with them, we notice that her MOTHER and a
	half dozen AUNTS are twenty paces behind them, and ten paces
	further behind are CALO and FABRIZZIO, their luparas on
	their shoulders.

	Further up the hill, APPOLONIA stumbles on a loose stone,
	and falls briefly onto MICHAEL's arm.  She modestly regains
	her balance, and they continue walking.

	Behind them, her MOTHER giggles to herself.

	--------------------------------------DISSOLVE----------

	EXT DAY: VITELLI VILLAGE CHURCH

	Church bells in an ancient belfry ring out.  Music, old and
	dissonant, plays.

	There is a bridal procession in the street of the village;
	the same in feeling and texture as it might have been five
	hundred years ago.

	Donkeys and other animals have been decorated with abundant
	flowers; children carrying candles and wearing white
	confirmation gowns walk in the procession, followed by
	countless townspeople, members of the clergy, even the police.

	We present the entire bridal procession and ceremony with
	all the ritual and pageantry, as it has always been, in
	Sicily.

	APPOLONIA is radiant as the Bride; MICHAEL is handsome
	despite the grotesque jaw and occasional white handkerchief.

	--------------------------------------DISSOLVE----------

	EXT NITE: VITELLI VILLAGE SQUARE

	CALO and FABRIZZIO dance wildly through the night of the
	great wedding celebration.  It is held in the Village
	Square; under the watchful eyes of SHEPHERDS above on the
	tops of buildings, carrying luparas.

	--------------------------------------DISSOLVE----------

	INT NITE: MICHAEL'S ROOM IN VILLA

	MICHAEL opens the shutters in his darkened room; moonlight
	fills the room.

	He turns, and there, in her wedding slip, is APPOLONIA.  A
	little frightened; but lovely.

	He moves to her; and for a moment just stands before her,
	looking at her incredible face; her lovely hair and body.

	Slowly and tenderly he kisses her.  Her tiny hands come up
	to his face; touch his cheek and embrace him.

	She lets her bridal slip fall to the floor.

	--------------------------------------FADE OUT----------

	INT DAY: MICHAEL'S ROOM AT VILLA

	Morning.  MICHAEL sits on the window ledge, gazing into the
	room.

	APPOLONIA is asleep; she is naked, and only partially
	covered by the bedsheets.

	He looks at her for a long time in the early morning light.

	EXT DAY: TOMMASSINO COURTYARD

	HIGH ANGLE ON DON TOMMASSINO'S VILLA

	We HEAR girlish laughter; the little Alpha is driving
	erratically, knocking down an occasional wall, and almost
	hitting th inner court wall.

	APPOLONIA is laughing, driving.  MICHAEL pretends to be
	frightened, as he teaches her to drive.

	Outside the walls, we notice SHEPHERDS with luparas, walking
	guard duty.

	The car stops and a laughing MICHAEL gets out.

				MICHAEL
		It's safer to teach you English.

				APPOLONIA
		Monday, Tuesday, Wednesday,
		Thursday, Friday...See, I learned
		it.  Now teach me to drive!

	DON TOMMASSINO enters the Courtyard.  He seems tired and
	concerned.

				MICHAEL
		Ciao, Don Tommassino.

	APPOLONIA kisses him.

				MICHAEL
		Things went badly in Palermo?

				DON TOMMASSINO
		The younger men have no respect.
		Things are changing; I don't know
		what will happen.  Michael, because
		of the wedding, people now know
		your name.

				MICHAEL
		Is that why there are more men on
		the walls?

				DON TOMMASSINO
		Even so, I don't think it is safe
		here anymore.  I've made plans to
		move you to a villa near Siracuse.
		You must go right away.

				MICHAEL
		What is it?

				DON TOMMASSINO
		Bad news from America.  Your
		brother, Santino.  He has been
		killed.

	For a moment, the whole world of New York, Sollozzo, the
	Five Family War, all comes back to MICHAEL.

	EXT DAY: VILLA COURTYARD

	Morning.  MICHAEL leans out of the bedroom window.

	Below, FABRIZZIO is sitting in one of the garden chairs,
	combing his thick hair.

	MICHAEL whistles and FABRIZZIO looks up to his window.

				MICHAEL
		Get the car.  I'll be leaving in
		ten minutes.  Where's Calo?

				FABRIZZIO
		Calo is having a cup of coffee in
		the kitchen.  Is your wife coming
		with you?

				MICHAEL
		No, she's going home to her family.
		She'll join me in a few weeks...

	INT DAY: VILLA KITCHEN

	MICHAEL, dressed, crosses from the hallway, and into the
	kitchen.  CALO is just finishing a bite.  He rises when he
	sees MICHAEL.

				CALO
		Should I get your bag?

				MICHAEL
		No, I'll get it.  Where's Appolonia?

				CALO
			  (smiling)
		She is sitting in the driver's seat
		of the car, dying to step on the
		gas.  She'll be a real American
		woman before she gets to America.

	MICHAEL smiles.

				MICHAEL
		Tell Fabrizzio and wait for me in
		the car.

	He leaves the kitchen, after a quick sip of coffee.

	He looks out from the opening in the doorway.

	EXT DAY: VILLA COURTYARD

	There is the car, with APPOLONIA sitting in the driver's
	seat, playing with the wheel like a child.

	CALO moves to the car, and puts a lunch basket in the rear
	seat.

	Then MICHAEL seems disturbed.

	Over, on the other side of the courtyard, he sees FABRIZZIO
	disappear through the gate.

				MICHAEL
			  (muttering to himself)
		Where the hell is he going?

	MICHAEL goes down the hallway, and outside.

	MICHAEL steps out into the bright sunlight of the outer
	courtyard, causing him to shade his eyes.

	APPOLONIA sees him, and waves, motioning that he should stay
	where he is.

				APPOLONIA
			  (calling out)
		I'll drive to you.

	He smiles affectionately.

	CALO stands beside the car, smiling, with his lupara dangling
	by his side.  There is no sight of FABRIZZIO.  Suddenly the
	smile fades from MICHAEL's face.  He steps forward and holds
	out his hand.

				MICHAEL
		No.  No!

	His shout is drowned in the roar of a tremendous EXPLOSION,
	as she switched on the ignition.

	Part of the wall is caved in, the kitchen door is blown off;
	and there is nothing left of the Alpha, or of Appolonia.

	MICHAEL is thrown against the wall, and knocked unconscious.

	INT DAY: VILLA BEDROOM

	MICHAEL is unconscious in a darkened room.  We hear
	whispering around him, but can't make any of it out.  A soft
	cloth is applied to his face; gradually his eyes open.  DON
	TOMMASSINO is there, close to him.  He looks at them and
	from their grave expressions, he knows his wife is dead.

				MICHAEL
		Fabrizzio.  Let your shepherds know
		that the one who gives me Fabrizzio
		will own the finest pastures in
		Sicily.

	--------------------------------------FADE OUT----------

	FADE IN:

	EXT DAY: MALL (SPRING 1951)

	Easter.

	A HIGH VIEW ON THE CORLEONE MALL in the springtime.  Hordes
	of little CHILDREN including many of the Corleone Children
	and Grandchilren, rush about carrying little Easter baskets,
	searching here and there for candy treasures and hidden
	Easter eggs.

	The DON himself, much older, much smaller in size, wearing
	baggy pants and a plaid shirt and an old hat, moves around
	his garden, tending rows and rows of rich tomato plants.

	Suddenly, he stops and looks.

	MICHAEL stands there, still holding his suitcase.

	Great emotion comes over the DON, who takes a few steps in
	MICHAEL's direction.

	MICHAEL leaves his suitcase and walks to his favorite son
	and embraces him.

				DON CORLEONE
		Be my son...

	INT DAY: THE OLIVE OIL FACTORY

	DON CORLEONE leads MICHAEL through the corridors of the
	building.

				DON CORLEONE
		This old building has seen its day.
		No way to do business...too small,
		too old.

	They enter the DON's glass-panelled office.

				DON CORLEONE
		Have you thought about a wife?  A
		family?

				MICHAEL
			  (pained)
		No.

				DON CORLEONE
		I understand, Michael.  But you
		must make a family, you know.

				MICHAEL
		I want children, I want a family.
		But I don't know when.

				DON CORLEONE
		Accept what's happened, Michael.

				MICHAEL
		I could accept everything that's
		happened; I could accept it, but
		that I never had a choice.  From
		the time I was born, you had laid
		this all out for me.

				DON CORLEONE
		No, I wanted other things for you.

				MICHAEL
		You wanted me to be your son.

				DON CORLEONE
		Yes, but sons who would be
		professors, scientists,
		musicians...and grandchildren who
		could be, who knows, a Governor, a
		President even, nothing's impossible
		here in America.

				MICHAEL
		Then why have I become a man like
		you?

				DON CORLEONE
		You are like me, we refuse to be
		fools, to be puppets dancing on a
		string pulled by other men.  I
		hoped the time for guns and killing
		and massacres was over.  That was
		my misfortune.  That was your
		misfortune.  I was hunted on the
		streets of Corleone when I was
		twelve years old because of who my
		father was.  I had no choice.

				MICHAEL
		A man has to choose what he will be.
		I believe that.

				DON CORLEONE
		What else do you believe in?

	MICHAEL doesn't answer.

				DON CORLEONE
		Believe in a family.  Can you
		believe in your country?  Those
		Pezzonovante of the State who
		decide what we shall do with our
		lives?  Who declare wars they wish
		us to fight in to protect what they
		own.  Do you put your fate in the
		hands of men whose only talent is
		that they tricked a bloc of people
		to vote for them?  Michael, in five
		years the Corleone family can be
		completely legitimate.  Very
		difficult things have to happen to
		make that possible.  I can't do
		them anymore, but you can, if you
		choose to.

	MICHAEL listens.

				DON CORLEONE
		Believe in a family; believe in a
		Code of Honor, older and higher,
		believe in Roots that go back
		thousands of years into your Race.
		Make a family, Michael, and protect
		it.  These are our affairs, sono cosa
		nostra, Governments only protect
		men who have their own individual
		power.  Be one of those men...you
		have the choice.

	--------------------------------------FADE OUT----------

	EXT DAY: STOCK FOOTAGE LAS VEGAS (1955)

	A MOVING VIEW, driving up the Las Vegas Strip of 1955.

				FREDO (O.S.)
		There's a new one.  Construction
		going on everywhere.

	MORE VIEWS, showing new hotels and casinos being built; the
	bill marquees read: "MARTIN AND LEWIS", "PATTI PAGE", etc.

				FREDO (O.S.)
		That's one of the family's new ones.
		Not bad, eh?

	EXT DAY: FLAMINGO (1955)

	The car pulls up at the Flamingo Hotel.

	Inside the car: MICHAEL, FREDO, TOM HAGEN and a new man,
	NERI, quiet and sinister.

				MICHAEL
		Why didn't Moe Green meet us at the
		airport?

				FREDO
		He had business at the hotel, but
		he'll drop in for dinner.

	From the expression on MICHAEL's face we know this is a
	discourtesy.

	INT DAY: FLAMINGO HOTEL SUITE (1955)

	A whole entourage precedes FREDO and his V.I.P. party of
	MICHAEL, HAGEN and NERI.  Great fuss is made.  They are
	being shown into the hotel's 'special' suite.

				FREDO
		You look wonderful, kid; really
		wonderful.  That doctor did some
		job on your face.

				MICHAEL
		You look good, too.

	They enter the suite.

				FREDO
		Nice, eh?

	FREDO is as excited as a kid, snapping orders at the
	bellboys, waiters and maids.

				FREDO
			  (hurrying into the bedroom)
		Kid, take a look-see.

	MICHAEL gives a look to HAGEN, and continues into the bedroom.

	There is an enormous circular bed on a huge platform,
	mirrors to each side.  FREDO points upward.

	A VIEW into a large CEILING mirror.

				FREDO
		Ever seen anything like that before?

				MICHAEL
			  (dryly)
		No.

	INT NITE: FLAMINGO SUITE BEDROOM (1955)

	MICHAEL is alone in the bedroom.  He is just finishing
	dressing; he puts on his jacket.  From the window, with the
	lights blinking, we can tell it's late at night.  MICHAEL
	passes into the other room.

	He stops, looks.  He is disturbed.

	INT NITE: FLAMINGO SUITE (1955)

	A magnificent, circular table has been set up in his suite;
	a lavish table setting for eight.  Standing by the table are
	HAGEN, JOHNNY FONTANE, looking wonderful, a little heavier,
	beautifully dressed; FREDO, a dandy, and TWO LAS VEGAS GIRLS.
	NERI stands quietly by the door.

				FREDO
		Mike!  The party starting!

				MICHAEL
		Come here a minute, Fredo.

	FREDO goes to him, a big smile all over his face.

				MICHAEL
		Who are those girls?

				FREDO
			  (jokingly)
		That's for you to find out.

				MICHAEL
		Give them some money and send them
		home.

				FREDO
		Mike!

				MICHAEL
		Get rid of them...

	INT NITE: FLAMINGO SUITE (1955)

	They are seated around the lavish table in Michael's suite.
	MICHAEL is speaking to JOHNNY.

				MICHAEL
		Johnny, the Corleone family is
		thinking of selling out all our
		interests in the Olive Oil business
		and settling here.  Moe Greene will
		sell us his interest so it can be
		wholly owned by friends of the
		family.

	FREDDIE seems anxious.

				FREDO
		Mike, you sure about Moe selling.
		He never mentioned it to me and he
		loves the business.

				MICHAEL
		I'll make him an offer he can't
		refuse.

	MICHAEL turns to JOHNNY.

				MICHAEL
		Johnny, the Don wants you to help
		us get started.  We figure
		entertainment will be the big
		factor in drawing gamblers.  We
		hope you'll sign a contract to
		appear five times a year for maybe
		a week long engagement.
		We hope your friends in the movies
		will do the same.  We count on you
		to convince them.

				JOHNNY
		Sure, I'll do anything for my
		Godfather.  You know that, Mike.

	There is knock on the door.  NERI rises, looks at MICHAEL,
	who nods.  NERI opens the door, and MOE GREENE enters,
	followed by TWO BODYGUARDS.  He is a handsome hood, dressed
	in the Hollywood style.  His BODYGUARDS are more West Coast
	style.

				MOE
		Mike, good to see you.  Got
		everything you want?

				MICHAEL
		Thanks.

				MOE
		The chef cooked for you special;
		the dancers will kick your tongue
		out and you credit is good!
			  (to his BODYGUARDS)
		Draw chips for all these people so
		they can play on the house.

				MICHAEL
		Is my credit good enough to buy you
		out?

	MOE laughs.

				MOE
		Buy me out?...

				MICHAEL
		The hotel, the casino.  The Corleone
		family wants to buy you out.

	GREENE stops laughing; the room becomes tense.  NERI eyes
	the BODYGUARDS.

				MOE
			  (furious)
		The Corleone family wants to buy me
		out.  I buy you out.  You don't buy
		me out.

				MICHAEL
		Your casino loses money.  Maybe we
		can do better.

				MOE
		You think I scam?

				MICHAEL
			  (the worst insult)
		You're unlucky.

				MOE
		You goddamn dagos.  I do you a
		favor and take Freddie in when
		you're having a bad time, and then
		you try to push me out.

				MICHAEL
		You took Freddie in because the
		Corleone family bankrolled your
		casino.  You and the Corleone
		family are evened out.  This is for
		business; name your price.

				MOE
		The Corleone family don't have that
		kind of muscle anymore.  The
		Godfather is sick.  You're getting
		chased out of New York by Barzini
		and the other families, and you
		think you can find easier pickings
		here.  I've talked to Barzini; I
		can make a deal with him and keep
		my hotel!

				MICHAEL
			  (quietly, deadly)
		Is that why you thought you could
		slap Freddie around in public?

				FREDO
			  (his face turns red)
		Ah Mike, that was nothing.  Moe
		didn't mean anything.  He flies off
		the handle sometimes; but me and
		him are good friends.  Right, Moe?

				MOE
		Yeah sure.  Sometimes I gotta kick
		asses to make this place run right.
		Freddie and I had a little argument
		and I had to straighten him out.

				MICHAEL
		You straightened my brother out?

				MOE
		Hell, he was banging cocktail
		waitresses two at a time.  Players
		couldn't get a drink.

	MICHAEL rises from his chair, and says in a tone of dismissal:

				MICHAEL
		I have to go back to New York
		tomorrow.  Think of your price.

				MOE
		You son of a bitch, you think you
		can brush me off like that?  I made
		my bones when you were going out
		with cheerleaders.

				FREDO
			  (frightened)
		Tom, you're the Consigliere; you
		can talk to the Don and advise him.

				MICHAEL
		The Don has semi-retired.  I'm
		running the Family business now.
		So anything you have to say, say it
		to me.

	Nobody answers.  MICHAEL nods to NERI, who opens the door.
	MOE exits angrily.

				MICHAEL
		Freddie, you're my older brother.
		I love you.  But don't ever take
		sides with anybody against the
		Family again.

	EXT DAY: N.Y. AIRPORT (1955)

	KAY sits in the back of a limousine parked by the Newark
	AIRPORT.  ROCCO LAMPONE is leaning against it.

	She has a little three year old boy; MICHAEL's son, who
	plays with a cardboard bird on a string.

	Two other cars are stationed discreetly, with men we have
	learned to tell are bodyguards.

	MICHAEL, HAGEN and NERI exit the airport with TWO NEGRO
	PORTERS carrying luggage.

	NERI sees something, and taps MICHAEL on the shoulder.

	MICHAEL turns, and sees KAY.

	LAMPONE opens the car door; KAY steps out with the BOY, and
	MICHAEL embraces her, and kisses his son.  Automatically,
	the luggage is put in.  NERI replaces LAMPONE as the driver;
	and LAMPONE joins the other men.  HAGEN gets into one of the
	other cars.

	And the limo drives off, preceded and followed by the other
	sedans.

	INT DAY: LIMO (1955)

	The little BOY looks out the window as they drive.

				MICHAEL
		I have to see my father and his
		people when we get back to the Mall.

				KAY
		Oh Michael.

				MICHAEL
		We'll go to the show tomorrow
		night--we can change the tickets.

				KAY
		Don't you want dinner first?

				MICHAEL
		No, you eat...don't wait up for me.

				KAY
		Wake me up when you come to bed?

	The little BOY flies his cardboard bird out of the speeding
	limousine window.

	EXT DAY: MALL (1955)

	The limousine arrives at the Mall.  We are inside.

				KAY
		Your sister wants to ask you
		something.

				MICHAEL
		Let HER ask.

	NERI opens the door.  KAY wants to talk just a little more.

				KAY
		She's afraid to.  Michael...

	MICHAEL nods to NERI; who gives them their privacy a moment
	longer.

				KAY
		Why are you so cold to her and
		Carlo?  They live with us on the
		Mall now, but you never get close
		to them.

				MICHAEL
		I'm busy.

				KAY
		Connie and Carlo want you to be
		godfather to their little boy.

	NERI opens the door; MICHAEL starts to get out; KAY too.

	He smiles at her, tired, and a little sad.

				KAY
		Will you?

				MICHAEL
		Let me think about it, O.K.?

	She smiles; MICHAEL goes with NERI to the Main House; KAY
	and the little BOY move to the house that was Sonny's.

	INT DAY: DON'S OFFICE (1955)

	VIEW ON DON CORLEONE, much older, much smaller in size.  He
	wears baggy pants, and a warm plaid shirt.  He sits in a
	chair, gazing out through the window, into the garden.

				TESSIO (O.S.)
		Barzini's people chisel my territory
		and we do nothing about it.  Pretty
		soon there won't be one place in
		Brooklyn I can hang my hat.

				MICHAEL (O.S.)
		Just be patient.

				TESSIO
		I'm not asking you for help, Mike.
		Just take off the handcuffs.

				MICHAEL (O.S.)
		Be patient.

				CLEMENZA (O.S.)
		We gotta fight sometime.  Let us at
		least recruit our regimes to full
		strength.

				MICHAEL (O.S.)
		No, I don't want to give Barzini an
		excuse to start fighting.

				TESSIO (O.S.)
		Mike, you're wrong.

				CLEMENZA (O.S.)
		Don Corleone...Don Corleone.

	The OLD MAN looks up.  CLEMENZA stand before him in the Den.
	Beside him is an anxious TESSIO.  NERI stands by the door;
	HAGEN is seated; MICHAEL sits behind the big desk.

				CLEMENZA
		You said there would come a day
		when Tessio and me could form our
		own Families.  Only with your
		benediction, of course.  I ask
		permission...

				DON CORLEONE
		My son is head of the Family now.
		If you have his permission, you
		have my good will.

				MICHAEL
		In six months you can break off
		from the Corleone Family and go on
		your own.  Carlo, I'm counting on
		you to make the move to Nevada;
		you'll be my right-hand man out
		there.  Tom Hagen is no longer the
		Consigliere.

	Everyone is a bit surprised; look to see HAGEN's reaction.
	He remains inexpressive.

				MICHAEL
		He's going to be our lawyer in
		Vegas.  Nobody goes to him with any
		other business as of now, this
		minute.  No reflection on Tom;
		that's the way I want it.  Besides,
		if I ever need any advice, who's a
		better Consigliere than my father.

				CLEMENZA
		Then in a six month time we're on
		our own; is that it?

				MICHAEL
		Maybe less...

				TESSIO
		Let us fill up our Regimes.

				MICHAEL
		No.  I want things very calm for
		another six months.

				TESSIO
		Forgive me, Godfather, let our
		years of friendship be my excuse.
		How can you hope for success there
		without your strength here to back
		you up?  The two  go hand in hand.
		And with you gone from here the
		Barzini and the Tattaglias will be
		too strong for us.

				CLEMENZA
		And I don't like Barzini.  I say
		the Corleone Family has to move
		from strength, not weakness.  We
		should build our Regimes and take
		back our lost territories in Staten
		Island, at least.

				DON CORLEONE
		Do you have faith in my judgement?

				CLEMENZA
		Yes, Godfather...

				DON CORLEONE
		Then do what Michael says...

				MICHAEL
		All I can say is that things are
		being resolved that are more
		effective than a thousand buttonmen
		on the streets.  Understood?

	There are uneasy looks all around.

				CARLO
		Understood.  I just wish I was
		doing more to help out.

				MICHAEL
		I'll come to you when I need you.

	He looks at CLEMENZA, TESSIO and HAGEN.  They all nod,
	reluctantly.

				MICHAEL
		All right, then it's resolved.

	NERI knows the meeting is over, he opens the Den's door.

	CLEMENZA and TESSIO pay their respects to the DON and leave,
	then CARLO.  NERI watches CARLO as he walks down the
	corridor, casting a nervous look back at the sinister man.

	Then NERI closes the door.

	MICHAEL relaxes.

				HAGEN
		Mike, why are you cutting me out of
		the action?

				MICHAEL
		Tom, we're going to be legitimate
		all the way, and you're the legal
		man.  What could be more important
		than that.

				HAGEN
		I'm not talking about that.  I'm
		talking about Rocco Lampone building
		a secret regime.  Why does Neri
		report directly to you, rather than
		through me or a caporegime?

				DON CORLEONE
		I told you that it wouldn't escape
		his eye.

				MICHAEL
		How did you find out?

				HAGEN
		Bookkeepers know everything.
		Rocco's men are all a little too
		good for the jobs they're supposed
		to be doing.  They get a little
		more money than the job's worth.
			  (pause)
		Lampone's a good man; he's operating
		perfectly.

				MICHAEL
		Not so perfectly if you noticed.

				HAGEN
		Mike, why am I out?

				MICHAEL
		You're not a wartime Consigliere.
		Things may get tough with the move
		we're trying.

				HAGEN
		OK, but then I agree with Tessio.
		You're going about it all wrong;
		you're making the move out of
		weakness... Barzini's a wolf, and
		if he tears you apart, the other
		families won't come running to help
		the Corleones...

				DON CORLEONE
		Tom, I never thought you were a bad
		Consigliere, I thought Santino a
		bad Don, rest in peace.  He had a
		good heart but he wasn't the right
		man to head the family when I had
		my misfortune.  Michael has all my
		confidence, as you do.  For reasons
		which you can't know, you must have
		no part in what will happen.

				HAGEN
		Maybe I can help.

				MICHAEL
			  (coldly)
		You're out, Tom.

	TOM pauses, thinks...and then he nods in acquiescence.  TOM
	leaves.

	MICHAEL looks at NERI.

				MICHAEL
		I'm going to talk to my father.

	NERI nods, and then leaves.  The DON opens the doors,
	breathes in the air, and steps outside.

	EXT DAY: THE GARDEN (1955)

				DON CORLEONE
		I see you have your Luca Brasi.

				MICHAEL
		I'll need him.

				DON CORLEONE
		There are men in this world who
		demand to be killed.  They argue in
		gambling games; they jump out of
		their cars in a rage if someone so
		much as scratches their fender.
		These people wander through the
		streets calling out "Kill me, kill
		me."  Luca Brasi was like that.
		And since he wasn't scared of
		death, and in fact, looked for
		it...I made him my weapon.  Because
		I was the only person in the world
		that he truly hoped would not kill
		him.  I think you have done the
		same with this man.

	They walk through the DON's vegetable garden.  Tomatoes,
	peppers, carefully tended, and covered with a silky netting.
	MICHAEL follows; the DON turns and looks at him.  Then
	stoops over to right a tomato plant that had been pushed over.

				DON CORLEONE
		Barzini will move against you first.

				MICHAEL
		How?

				DON CORLEONE
		He will get in touch with you
		through someone you absolutely
		trust.  That person will arrange a
		meeting, guarantee your safety...

	He rises, and looks at Michael...

				DON CORLEONE
		...and at that meeting you will be
		assassinated.

	The DON walks on further.

				DON CORLEONE
		Your wife and children...you're
		happy with them?

				MICHAEL
		Yes.

				DON CORLEONE
		Good.

	MICHAEL wants to express something...hesitates, then:

				MICHAEL
		I've always respected you...

	A long silence.  The DON smiles at MICHAEL.

				DON CORLEONE
		And I...you.

	EXT DAY: CHURCH (1955)

	KAY and MAMA walking from the black car that has just left
	them off.

				KAY
		How is your husband feeling?

				MAMA
		He's not the same since they shot
		him.  He lets Michael do all the
		work.  He just plays the fool with
		his garden, his peppers, his
		tomatoes, as if he was some peasant
		still.  But men are like that...

	She stops toward the Church.

				MAMA
		You come in, too.

	KAY shakes her head.

				MAMA
		The Priest ain't gonna bite you
		cause you're not Catholic.
			  (whispered)
		He's in the back drinkin' his wine.

	KAY laughs and follows MAMA up the steps of the Church.
	They enter.

	INT DAY: CHURCH (1955)

	Inside the Church, KAY watches as MAMA blesses herself from
	the holy water.

				MAMA
		You can.

	Tentatively, KAY dips her fingers into the water, and
	blesses herself.  Then SHE follows MAMA down the aisle, in
	awe at the high ceiling, the art, the windows, and finally
	the Altar.

	MAMA stops by the impressive tiers of candles.  There is a
	large coin box for those who wish to pay for lighting
	candles.  MAMA fumbles in her purse for change; KAY gives
	her some.

	MAMA drops the coins in the box, one by one; then takes the
	taper, and in a pattern known only to her, and with great
	dignity, she closes her eyes, says a prayer, and then lights
	twenty candles.

	She finishes, and bows her head.

	EXT DAY: BONASERA'S FUNERAL HOME

	Very few people in the streets.  TOTAL SILENCE.  But black
	flower cars as far as the eye can see, for blocks and blocks.
	An expression of respect, of honor and fear that is enormous.
	Certainly no more could be done for a President or a King.

	Each car carries an elaborate floral decoration.  We show
	these in detail; and the flowered messages: "A Benefactor to
	Mankind", "He Knew and Pitied"..."Our Don Our Leader"..."The
	Sacred Heart"...

	EXT DAY: MALL (1955)

	HIGH ANGLE ON THE CORLEONE MALL

	Silence.

	The flower cars, funeral limousines, and private cars fill
	all the areas attendant to the Corleone residence.

	Hundreds of people fill the Mall, reminiscent in size of the
	wedding of Connie and Carlo; of course, now the mood is
	somber and respectful.

	MICHAEL, MAMA, FREDO and HAGEN stand by the flowered platform
	which holds the ornate coffin.  We cannot see the remains of
	Don Corleone.

	BONASERA is nearby, ready to do service to the bereaved
	family.  One by one the mourners come by, weeping, or merely
	with grave expressions; pay their respects and continue on.

	The VIEW ALTERS,

	and we see that the line is endless.  JOHNNY FONTANE, tears
	openly falling, takes his turn.

	Children are taken by the hand, and lifted for their last
	look at the great man.

	CLEMENZA whispers into the ear of LAMPONE.  LAMPONE
	immediately arranges for the members of the Five New York
	Families to pay their respects.

	First CUNEO, then STRACHI and then ZALUCHI.  Then PHILIP
	TATTAGLIA, who merely passes by the Coffin.

	Then BARZINI in a black homburg, standing a long time.

	MICHAEL watches the scene.

	BARZINI crosses himself and passes on, immediately rejoined
	by his men.

	As BARZINI leaves, it seems as though everyone is fawning on
	him; perhaps asking for favors: But at any rate, it is clear
	from the doors opened for him, the cigars lit for him, that
	he is the new Capo di Capi--the place formerly held by Don
	Corleone.

	MICHAEL watches silently.

	BARZINI is searching for somebody with his eyes.  First
	CLEMENZA.  Then TESSIO.

	CONNIE rushes into MICHAEL's arms, tears in her eyes.  He
	embraces and comforts her.

	Everywhere MICHAEL goes, NERI is a few feet away--watching
	all who come close to him.

	EXT DAY: MALL (LATER)

	Later on the Mall; some people have left, although there are
	still hundreds of mourners.

	A young GIRL approaches TESSIO.  She's about 18.

				GIRL
		Do you remember me?

				TESSIO
		No...

				GIRL
		We danced together at Connie's
		wedding.

	TESSIO makes a gesture, which is to say 'How you've grown',
	and they move though the crowd, looking for Michael.  He
	finds him.

				TESSIO
		Mike, could I have a minute?

	MIKE; nods; and they move to a private place.  NERI is close
	by.

				TESSIO
		Barzini wants to arrange a meeting.
		Says we can straighten any of our
		problems out.

				MICHAEL
		He talked to you?

				TESSIO
			  (nods)
		I can arrange security.

	MICHAEL looks at him.

	EXT DAY: CEMETERY (1955)

	The Cemetery.  Late day.

	The hundreds of cars, limousines and flower cars line the
	stone wall that surrounds this Italian-Catholic cemetary in
	Queens Village.

	Hundreds of people stand in a cluster; others watch; take
	pictures, etc.

	MICHAEL stands with his family, his MOTHER...and TOM HAGEN.

				MICHAEL
			  (softly)
		Christ, Tom; I needed more time
		with him.  I really needed him.

				HAGEN
		Did he give you his politicians?

				MICHAEL
		Not all...I needed another four
		months and I would have had them
		all.
			  (he looks at TOM)
		I guess you've figured it all out?

				HAGEN
		How will they come at you?

				MICHAEL
		I know now.
			  (a passion wells up
			  inside of MICHAEL)
		I'll make them call me Don.

				HAGEN
		Have you agreed on a meeting?

				MICHAEL
			  (nods)
		A week from tonight.  In Brooklyn
		on Tessio's ground, where I'll be
		safe.

	HAGEN looks at him; understands.

				MICHAEL
		But after the Baptism.  I've
		decided to stand as godfather to
		Connie's baby.

	They look up.

	The coffin is lowered into an excavation, behind which
	stands an enormous stone monument; it is of a weeping angel,
	with the bold inscription: CORLEONE.

	---------------------------------------FADE OUT---------

	FADE IN:

	INT DAY: NERI'S APT. (1955)

	ALBERT NERI moves around in his small Corona Apartment; he
	pulls a small trunk from under his bed.  He opens it, and we
	see in it, nearly folded, a New York City Policeman's
	uniform.  He takes it out piece by piece, almost reverently.
	Then the badge, and the identification card; with his
	picture on it.  Slowly, in the solitude of his room, he
	begins to dress.

	INT DAY: MICHAEL'S BEDROOM (1955)

	MICHAEL and KAY are getting dressed for the christening in
	their room.  MICHAEL looks very well; very calm; KAY is
	beginning to take on a matronly look.

	INT DAY: MOTEL ROOM (1955)

	In a Long Island motel.

	ROCCO LAMPONE carefully disassembles a revolver; oils it,
	checks it, and puts it back together.

	EXT DAY: CLEMENZA'S HOUSE (1955)

	PETER CLEMENZA about to get in his Lincoln.  He hesitates,
	takes a rag and cleans some dirt off of the fender, and then
	gets in, drives off.

	EXT DAY: CHURCH (1955)

	The Church.

	Various relatives and friends are beginning to gather at the
	Church.  They laugh and talk.  A MONSIGNOR is officiating.
	Not all of the participants have arrived yet.

	CONNIE is there, with a beaming CARLO.  She holds the
	infant; showing him off to interested people.

	EXT DAY: U.N. PLAZA (1955)

	NERI walks down the sidewalk in the neighborhood of the UN
	Building.  He is dressed as, and has the bearing of, a
	policeman.  He carries a huge flashlight.

	EXT DAY: MOTEL BALCONY (1955)

	LAMPONE steps out onto the little balcony of a Sea-Resort
	Motel; We can see the bright, neon lit sign advertising
	"ROOMS FACING THE SEA--VACANY".

	INT DAY: CHURCH

	The Church.

	CONNIE holds the baby; the MONSIGNOR is speaking; KAY and
	MICHAEL stand side by side around the urn.

				PRIEST
			  (to MICHAEL)
		Do you pledge to guide and protect
		this child if he is left fatherless?
		Do you promise to shield him
		against the wickedness of the world?

				MICHAEL
		Yes, I promise.

	EXT DAY: FIFTH AVE.

	NERI continues up the 55th St. and Fifth Avenue area.  He
	continues until he is in front of Rockefeller Center.  On
	his side of the street, he spots a limousine waiting directly
	across from the main entrance of the building.  Slowly he
	approaches the limo, and taps on its fender with his
	nightstick.

	The DRIVER looks up in surprise.

	NERI points to the "No Parking" sign.

	The DRIVER turns his head away.

				NERI
		OK, wise guy, you wanna summons, or
		you wanna move?

				DRIVER
			  (obviously a hood)
		You better check with your precinct.

				NERI
		Move it!

	The DRIVER takes a ten dollar bill, folds it deliberately,
	and hands it out the window, trying to put it under NERI's
	jacket.

	NERI backs up, letting the bill fall onto the street.  Then
	he crooks a finger at the DRIVER.

				NERI
		Let me see you license and
		registration.

	EXT DAY: MOTEL BALCONY

	LAMPONE on the motel balcony spots a Cadillac pulling up.
	It parks.  A young, pretty GIRL gets out.  Quickly, he
	returns into the room.

	INT DAY: HOTEL STAIRS (1955)

	CLEMENZA is climbing the back stairs of a large hotel.  He
	rounds the corner, puffs a little, and then continues upward.

	INT DAY: CHURCH

	The Church.  Close on the PRIEST's fingers as he gently
	applies oil to the infant's ears and nostrils.

				PRIEST
		Ephetha...be opened...So you may
		perceive the fragrance of God's
		sweetness.

	EXT DAY: ROCKEFELLER CENTER (1955)

	The DRIVER of the limousine in front of Rockefeller Center
	is arguing with NERI.

	Now the DRIVER looks up.

	WHAT HE SEES:

	TWO MEN in topcoats exit the building, through the revolving
	glass doors.

	NERI opens up fire, trapping BARZINI in the shattering glass
	doors.  The doors still rotate, moving the dead body of
	BARZINI within them.

	INT DAY: CHURCH

	In the Church--the VIEW on MICHAEL.  The PRIEST hands him
	the infant.

				PRIEST
		Do you renounce Satan.

				MICHAEL
		I do renounce him.

				PRIEST
		And all his works?

				MICHAEL
		I do renounce them.

	INT DAY: MOTEL MURDER (1955)

	LAMPONE, backed up by two other MEN in his regime, runs down
	the iron-rail steps, and kicks in the door on Room 7F.
	PHILIP TATTAGLIA, old and wizened and naked, leaps up; a
	semi-nude young GIRL leans up.

	They are riddled with gunfire.

	INT DAY: HOTEL STAIRS (1955)

	CLEMENZA, huffing and puffing, climbs the back stairs, with
	his package.

	INT DAY: CHURCH

	The PRIEST pours water over the forehead of the infant
	MICHAEL holds.

				PRIEST
		Do you wish to be baptized?

				MICHAEL
		I do wish to be baptized.

	INT DAY: HOTEL ELEVATOR MURDER (1955)

	CLEMENZA, out of breath, climbs the final few steps.

	He walks through some glass doors, and moves to an ornate
	elevator waiting shaft.

	The lights indicate the elevator has arrived.

	The doors open, and we see a surprised CUNEO standing with
	the dapper MOE GREENE.

	CLEMENZA fires into the small elevator with a shotgun.

	The PRIEST hands a lighted candle to MICHAEL.

				PRIEST
		I christen you Michael Francis Rizzi.

	Flash bulbs go off.  Everyone is smiles, and crowds around
	MICHAEL, KAY, CONNIE...and CARLO.

	--------------------------------------FADE OUT----------

	EXT DAY: CHURCH (1955)

	The christening party outside the Church.

	Four or five limousines have been waiting; now pull up to
	receive MAMA, CONNIE and the baby; and the others.

	Everyone is very happy; only MICHAEL seems aloof and grave.

	As the fuss is going on, a car pulls up.  LAMPONE gets out
	and works his way to MICHAEL.  He whispers in his ear.  This
	is the news MICHAEL has been waiting for.

	CONNIE holds the baby up to MICHAEL.

				CONNIE
		Kiss your Godfather.

	The infant turns its head, and MICHAEL uses that as an
	excuse to back away.

				MICHAEL
		Carlo...we've had a change in the
		plans.  Mama, Connie, Kay and the
		kids will have to take the trip out
		to Vegas without us.

				CONNIE
		Oh Mike, it's our first vacation
		together.

				CARLO
			  (anxious to please)
		Jesus, Connie...Sure, Mike...

				MICHAEL
		Go back to your house and wait for
		me...

	He kisses KAY.

				MICHAEL
			  (to KAY)
		I'll just be a couple of days...

	People are guided to the correct limousines; they start to
	drive off.

	INT DAY: DON'S KITCHEN

	TESSIO sits in the Kitchen of the Main House on the Mall.

	HAGEN enters.

				HAGEN
		You'd better make your call to
		Barzini; Michael's ready.

	TESSIO nods; moves to the telephone and dials a number.

				TESSIO
		We're on our way to Brooklyn.

	He hangs up and smiles.

				TESSIO
		I hope Mike can get us a good deal
		tonight.

				HAGEN
			  (gravely)
		I'm sure he will.

	EXT DAY: MALL (1955)

	The TWO MEN walk out onto the Mall, toward a car.  On their
	way they are stopped by TWO BODYGUARDS.

				BUTTON MAN
		The boss says he'll come in a
		separate car.  He says for you two
		to go on ahead.

				TESSIO
			  (frowning)
		Hell, he can't do that.  It screws
		up all my arrangements.

	THREE MORE BODYGUARDS appear around him.

				HAGEN
			  (gently)
		I can't go with you either, Tessio.

	He flashes at the men surrounding him; for a moment he
	panics, and then he accepts it.

				TESSIO
			  (after the pause)
		Tell Mike it was business...I
		always liked him.

				HAGEN
		He understands that.

	TESSIO looks at the men, and then pauses.

				TESSIO
			  (softly)
		Tom, can you get me off the hook?
		For old times' sake?

				HAGEN
		I can't.

	HAGEN turns, and walks away from the group.  Then about
	twenty paces away, he stops, and looks back.

	TESSIO is led into a waiting car.

	HAGEN looks away, and walks off.

	INT DAY: CARLO'S LIVING ROOM (1955)

	CARLO RIZZI is alone in his house, smoking, waiting rather
	nervously.  He moves to the window and looks out.

	WHAT HE SEES:

	EXT DAY: MALL (1955)

	MICHAEL, still dressed in a dark suit; followed by NERI,
	LAMPONE and CLEMENZA, then HAGEN.

	They move toward us.

	Excitedly, CARLO moves to the front door; opens it.

	He wears a broad smile.

				CARLO
		Godfather!

				MICHAEL
		You have to answer for Santino.

	The smile on CARLO's face slowly fades, then, in a foolish
	attempt for safety, he slams the door in their faces and
	backs into the living room.

	INT DAY: CARLO'S LIVING ROOM (1955)

	The door opens, and the grim party enters.

				MICHAEL
		You fingered Sonny for the Barzini
		people.  That little farce you
		played out with my sister.  Did
		Barzini kid you that would fool a
		Corleone?

				CARLO
			  (dignity)
		I swear I'm innocent.  I swear on
		the head of my children, I'm
		innocent.  Mike, don't do this to
		me, please Mike, don't do this to me!

				MICHAEL
			  (quietly)
		Barzini is dead.  So is Philip
		Tattaglia, so are Strachi, Cuneo
		and Moe Greene...I want to square
		all the family accounts tonight.
		So don't tell me you're innocent;
		admit what you did.

	CARLO is silent; he wants to talk but is terrified.

				MICHAEL
			  (almost kindly)
		Don't be frightened.  Do you think
		I'd make my sister a widow?  Do you
		think I'd make your children
		fatherless?  After all, I'm
		Godfather to your son.  No, your
		punishment is that you're out of
		the family business.  I'm putting
		you on a plane to Vegas--and I want
		you to stay there.  I'll send
		Connie an allowance, that's all.
		But don't keep saying you're
		innocent; it insults my intelligence
		and makes me angry.  Who approached
		you, Tattaglia or Barzini?

				CARLO
			  (sees his way out)
		Barzini.

				MICHAEL
			  (softly)
		Good, good.  Leave now; there's a
		car waiting to take you to the
		airport.

	CARLO moves to the door; opens it.  There is a car waiting;
	with a group of MEN around it.

	He looks back at MICHAEL, who reassures him.

				MICHAEL
		I'll call your wife and tell her
		what flight you're on.

	EXT DAY: MALL

	CARLO moves out to the Mall; the BUTTONMEN are putting his
	things in the trunk.

	ONE opens the front door for him.

	SOMEONE is sitting in the rear seat, though we cannot see who.

	CARLO gets into the car; out of nervousness, he looks back
	to see the other man.

	It is CLEMENZA, who nods cordially.

	The motor starts, and as the car pulls away, CLEMENZA
	suddenly throws the garrote around CARLO's neck.  He chokes
	and leaps up like a fish on a line, kicking his feet.

	The garrote is pulled tighter; CARLO's face turns color.

	His thrashing feet kick right through the front windshield.

	Then the body goes slack.

	CLEMENZA makes a foul face, and opens the window as the car
	drives off.

	EXT DAY: CARLO'S STEPS (1955)

	MICHAEL and his party.  They watch.

	Then he turns and walks off, and they follow.

	---------------------------------------FADE OUT---------

	FADE IN:

	INT NITE: MICHAEL'S LIMO EN ROUTE (1955)

	MICHAEL sits alone in the back of his car; NERI is driving.

	They do not speak for a long time; it is night--car lights
	flash by.

	NERI turns back.

				NERI
		You know I would never question
		anything you say.

				MICHAEL
			  (smiles)
		Speak your mind.

				NERI
		I'll do this for you; you know I
		should.

				MICHAEL
		No.  This I have to do.

	EXT NITE: PIZZA STREET (1955)

	MICHAEL's car pulls up in a quiet neighborhood, near an
	Italian Pizzeria.  NERI opens the door.

				MICHAEL
		Sit in the car.

	INT NITE: PIZZA PLACE (1955)

	He walks alone into the restaurant.  A MAN is tossing pizza
	dough in the air.

				MICHAEL
		Where's the boss?

				MAN
		In the back.  Hey Frank, someone
		wants you.

	A MAN comes out of the shadows, with a strong Italian accent.

				MAN
		What is it?

	He stops, frozen in fear.  It is FABRIZZIO.

	VIEW ON MICHAEL.  Gunfire from under his coat.  FABRIZZIO is
	cut down.  MICHAEL throws the gun down; turns and exits.

	EXT DAY: MALL (1955)

	HIGH ANGLE ON THE CORLEONE MALL

	Several moving vans are parked in the Mall; one feels that
	these are the final days; the families are moving out; signs
	indicating that the property is for sale are evident.

	A black limousine pulls up, and before it has even stopped,
	the rear door flies open, and CONNIE attempts to run out,
	restrained by MAMA.  She manages to break free and runs
	across the Mall into Michael's house.

	INT DAY: DON'S LIVING ROOM (1955)

	Inside the Corleone house.  Big boxes have been packed;
	furniture prepared for shipping.

				CONNIE
		Michael!

	She hurries into the living room, where she comes upon
	MICHAEL and KAY.

				KAY
			  (comforting)
		Connie...

	But CONNIE avoids her, and moves directly to MICHAEL.  NERI
	is watchful.

				CONNIE
		You lousy bastard; you killed my
		husband...

				KAY
		Connie...

				CONNIE
		You waited until our father died
		and nobody could stop you and you
		killed him, you killed him!  You
		blamed him about Sonny, you always
		did, everybody did.  But you never
		thought about me, never gave a damn
		about me.
			  (crying)
		What am I going to do now, what am
		I going to do.

	TWO of Michael's BODYGUARDS move closer, ready for orders
	from him.  But he stands there, waiting for his sister to
	finish.

				KAY
		Connie, how could you say such
		things?

				CONNIE
		Why do you think he kept Carlo on
		the Mall?  All the time he knew he
		was going to kill my husband.  But
		he didn't dare while my father was
		alive.  And then he stood Godfather
		to our child.  That coldhearted
		bastard.
			  (to KAY)
		And do you know how many men he had
		killed with Carlo?  Just read the
		papers.  That's your husband.

	She tries to spit into MICHAEL's face; but in her hysteria
	she has no saliva.

				MICHAEL
		Get her home and get a doctor.

	The TWO BODYGUARDS immediately take her arms and move her,
	gently but firmly.

	KAY is shocked; never taking her look of amazement from
	MICHAEL.  He feels her look.

				MICHAEL
		She's hysterical.

	But KAY won't let him avoid her eyes.

				KAY
		Michael, it's not true.  Please
		tell me.

				MICHAEL
		Don't ask me.

				KAY
		Tell me!

				MICHAEL
		All right, this one time I'll let
		you ask about my affairs, one last
		time.

				KAY
		Is it true?

	She looks directly into his eyes, he returns the look, so
	directly that we know he will tell the truth.

				MICHAEL
			  (after a very long pause)
		No.

	KAY is relieved; she throws her arms around him, and hugs
	him.  Then she kisses him.

				KAY
			  (through her tears)
		We both need a drink.

	INT DAY: DON'S KITCHEN (1955)

	She moves back into the kitchen and begins to prepare the
	drinks.  From her vantage point, as she smilingly makes the
	drinks, she sees CLEMENZA, NERI and ROCCO LAMPONE enter the
	house with their BODYGUARDS.

	She watches with curiosity, as MICHAEL stands to receive
	them.  He stands arrogantly at ease, weight resting on one
	foot slightly behind the other.  One hand on his hip, like a
	Roman Emperor.  The CAPOREGIMES stand before him.

	CLEMENZA takes MICHAEL's hand, kissing it.

				CLEMENZA
		Don Corleone...

	The smile fades from KAY's face, as she looks at what her
	husband has become.

	INT DAY: CHURCH (1955)

	KAY wears a shawl over her hand.  She drops many coins in
	the coin box, and lifts a burning taper, and one by one, in
	a pattern known only to herself, lights thirty candles.

					THE END




================================================ FILE: samples/go/decent/data/godfather2.html ================================================

The Godfather: Part two














				THE GODFATHER

				 Part Two

				Screenplay by

				Mario Puzo

				    and

			 Francis Ford Coppola




















SECOND DRAFT

September 24, 1973





FADE IN:

The Paramount Pictures logo is presented over a simple black
background, as a single trumpet plays the familiar theme of
a waltz.  White lettering fades in:

		     Mario Puzo's THE GODFATHER

There is a pause, as the trumpet concludes, and there is the
additional title: - Part Two -

INT. DON CORLEONE'S OLD OFFICE - CLOSE VIEW ON MICHAEL
CORLEONE - DAY

standing impassively, like a young Prince, recently crowned
King.

CLOSE VIEW ON Michael's hand.  ROCCO LAMPONE kisses his hand.
Then it is taken away.  We can SEE only the empty desk and
chair of Michael's father, Vito Corleone.  We HEAR, over
this, very faintly a funeral dirge played in the distance,
as THE VIEW MOVES SLOWLY CLOSER to the empty desk and chair.

								DISSOLVE TO:

EXT. A SICILIAN LANDSCAPE - FULL VIEW - DAY

We can barely make out the funeral procession passing over
the burnt-brown of a dry river bed.  The figures move
slowly, seemingly from out of hundreds of years of the past.

The MUSICIANS walking unsteadily on the rocky bed, their
instruments harsh and blaring.

They are followed by six young peasant men, carrying the
crude wooden coffin on their shoulders.  Then the widow, a
strong large woman, dressed in black, and not accepting the
arms of those walking with her.

Behind her, not more than twenty relatives, few children and
paisani continue alone behind the coffin.

Suddenly, we HEAR the shots of the lupara, and the musicians
stop their playing.  The entire procession scatters in odd
directions along the rocky river bed.

The young men struggle with the burden of the heavy coffin,
throwing it out of balance and nearly crashing to the ground.
We hear a woman SCREAMING:

				WOMAN
			(Sicilian)
		They've killed young Paolo!  They've
		killed the boy Paolo!

EXT. SICILIAN LANDSCAPE - MED. VIEW - DAY

across the slain body of a fourteen year old boy, lying on
the parched ground.  In the distance we see four or five of
the mourning women, the wind blowing their black dresses and
veils, running up to the body of the boy.  They begin to
wail, and cry out in anguished Sicilian, as the widow, the
mother of the murdered boy, holds her child in her arms, his
fresh blood wetting her strong hands.

EXT. BARONIAL ESTATE - TIGHT MOVING VIEW - DAY

A boy, eight or nine, with wide, frightened eyes, being
pulled quickly by the hand.  This is VITO ANDOLINI, who is
to become The Godfather.

The VIEW ALTERS revealing that he is being pulled along by
his Mother, the Widow, across a field leading to the
ornamental gates of a Baronial Estate of some forgotten Noble.

At various positions near the gates are men with shotguns,
or lupara.  The gates are opened; and the Widow and her boy
are shown before DON FRANCESCO, a man in his sixties.  He
wears his trousers with suspenders, and an open white shirt
sloppily tucked in over his enormous belly.  He wears a hat
to protect him from the white-hot sun, and proudly displays
a gold watch and chain over his vest.

He sits in a chair, near a group of his men in the garden,
listening to the Widow, who stands before him with her only
son.

				WIDOW
			(Sicilian)
		Don Francesco.  You murdered my
		husband, because he would not bend.
		And his oldest son Paolo, because
		he swore revenge.  But Vitone is
		only nine, and dumb-witted.  He
		never speaks.

				DON FRANCESCO
			(Sicilian)
		I'm not afraid of his words.

				WIDOW
			(Sicilian)
		He is weak.

				DON FRANCESCO
			(Sicilian)
		He will grow strong.

				WIDOW
			(Sicilian)
		The child cannot harm you.

				DON FRANCESCO
			(Sicilian)
		He will be a man, and then he will
		come for revenge.

As she pleads, the Widow moves closer to the Don, until she
has practically thrown herself to her knees before him.

				WIDOW
			(Sicilian)
		I beg you, Don Francesco, spare my
		only son.  He is all I have.  In
		the name of the Holy Spirit, I
		swear he will never be a danger to
		you...

Suddenly, she reaches under her skirt, where she has hidden
a kitchen knife.

				WIDOW
			(continuing)
		But I will kill you myself!
			(she lunges at the
			Mafia chieftain)
		Vitone, go!

The boy runs as fast as he can out through the gates.  Then
there is a lupara blast.  He turns, and sees his Mother
flung a distance of five feet from the short range of the
terrible blast of the shotgun.  Then he sees the men turn
their attention to him.  One fires at him; but the boy is
quick, and disappears into a grove of olive trees.

EXT. STREETS OF CORLEONE - NIGHT

Two men roam the deserted streets of Corleone, carrying
lupare.  Every so often, they stop, and one shouts in a
loud, almost singsong voice, like a fish peddler.  Their
names are MOSCA and STROLLO.

				MOSCA
			(Sicilian)
		Our Friend promises misery to
		anyone who harbors the boy Vito
		Andolini.
			(he turns and shouts
			in the other direction)
		Our Friend promises misery to
		anyone who harbors the boy Vito
		Andolini.

INT. A HOUSE - NIGHT

A family quietly eats their dinner.  The father is the local
policeman, as indicated by his uniform jacket and gun,
hanging nearby.

				STROLLO
			(Sicilian, O.S.)
		Our Friend will be hard with any
		family who gives help to Vito
		Andolini.

One of the children looks up, about to speak.  But the
father sternly indicates that nothing must be said.  They go
on with their dinner.

EXT. THE STREETS OF CORLEONE - FULL VIEW - NIGHT

The men continue walking up and throughout the streets, far
in the distance.

				MOSCA
			(Sicilian O.S.)
		...misery to any family who harbors
		the boy, Vito...

INT. A BARN - NIGHT

Four little girls watch with wide eyes as their mother and
father bind Vito tightly in swaddled cloth, and then lift
him up to the side of a mule; counter-balancing a heavy load
of firewood.  The father looks at the boy's almost stoically
calm little face.

				FATHER
			(Sicilian)
		Vito...We pray for you.

He pulls the fabric over the boy's face.

				MOSCA
			(Sicilian O.S.)
		...Andolini...

				STROLLO
			(Sicilian O.S.)
		Our Friend promises misery to any
		family...

EXT. THE CHURCH PLAZA - NIGHT

The men continue on their night-walk, up to the plaza of the
church.

				STROLLO
			(Sicilian)
		...who harbors the boy Vitone
		Andolini.

The figure of a single man on a mule passes them.

				MOSCA
			(Sicilian)
		Let no one give help to the boy
		Vito Andolini...

The man on the mule makes his way out of the village and
disappears into the distance.

We begin to hear, very quietly, the Waltz repeated once again.

EXT. STEAMSHIP - CLOSE VIEW ON VITO - DAY

huddled in blankets, on the deck of the ship in Steerage.
He does not say a word.  The Waltz grows louder as the VIEW
ALTERS, revealing the hundreds of immigrant families huddled
together with all their earthly possessions on their way to
America.

Then, suddenly, the Waltz stops.

THE NEW YORK HARBOR - DAY

SILENCE.  We glide past the Statue of Liberty.

VIEW on the IMMIGRANTS standing on shipboard silently;
looking.  Vito is standing with them, his eyes wide.

CAMERA MOVES IN on the statue, then MOVING PAST, on to the
beautiful buildings of Ellis Island.

EXT. ELLIS ISLAND - DAY

A tugboat pulls a barge brimming with immigrants into the
Ellis Island harbor.  Uniformed officials of the Immigration
Service load them up toward the main building.

INT. ELLIS PROCESSING HALL - DAY

The hundreds of immigrant families sit on rows of benches in
the great hall.  Various painted lines lead to the steps and
processing rooms above.

There is the babble of many interviews going on
simultaneously, uncertainly, in different languages.

Vito is bundled in an old coat, with a large tag pinned on
it: "Vitone Andolini -- Corleone, Sicilia."

He stands, moves up in the line, when several other immigrant
boys, older than he, rush up an push him back in the line.
Weak from the trip, he falls to the floor.  The boys laugh,
derisive in a language he cannot understand.  He struggles
to his feet, lifting his makeshift bags; staring at them in
an icy hatred.

INT. PROCESSING ROOM - DAY

Three or four interviews are crowded into the small room;
they are conducted in English.  From the expression on
Vito's face, and from the fragmented of the English, we
realize that he doesn't understand a word of it.

				OFFICIAL
			(English)
		What is your name?

The man waits, impatiently.

				OFFICIAL
		Your name?

Vito doesn't answer.  The Official pulls the tag pinned onto
his coat and copies to down on his form, using a typewriter.

				OFFICIAL
			(speaking as he types)
		Vito...Corleone.  Step up, over
		there.

He hands the form to another official.

CLOSE VIEW on the form.  The name has been entered as Vito
Corleone.

INT. MEDICAL EXAM - DAY

Vito is stripped to the waist, as other immigrants wait.

The DOCTOR is just finishing his examination.  He shakes his
head, and then writes on the medical form.

				DOCTOR
		Can you understand me?

Vito stares blankly.

				DOCTOR
		You understand?  Smallpox.  Smallpox.

He doesn't understand.  The doctor turns to the Immigration
Official.

				DOCTOR
		Quarantine...six months.

UNDERGROUND PASSAGEWAY - MOVING VIEW - DAY

Officials move a group of immigrant men, including Vito, to
the quarantine section of the Island.

INT. QUARANTINE HALLWAY - DAY

The official stops at each doorway, and reads off a name.

				OFFICIAL
		Salvatore Ormenta.

The man moves into the room, and the group proceeds.

				OFFICIAL
		Vito Corleone.

No one responds.  The guard moves to the boy, reads his new
name tag.  And then, not unkindly:

				GUARD
		That's you.

He opens the door, and Vito enters the room.

EXT. THE STATUE OF LIBERTY - DAY

The VIEW slowly begins to pull back, revealing this to be
the view from inside the quarantine cell, where Vito stands
on his bench, looking out to the statue through the barred
window.

Then he turns, and sits in the corner.  He is silent for a
long time.

Then, in a sweet, pure voice, he sings to himself in Sicilian.

							DISSOLVE TO:

INT. CATHOLIC CHURCH - MOVING CLOSE SHOT - DAY

A nine year old boy, dressed immaculately in white, with a
large white silk bow tied to his shoulder, moving slowly
down the aisle of the church with a group of other children
dressed in white.  He has dark black hair, and his face is
unmistakably similar to young Vito's.  He moves slowly, his
hands clasped around a golden missal.  We HEAR only the pure
voice of Vito in Sicilian, his sad song reaching out from
the past, as ANTHONY CORLEONE, his Grandson, moves on the
way to his First Holy Communion more than fifty years later.

FULL VIEW

The little children move in procession down to the Altar,
where the PRIEST raises the Host, and performs the Communion
Mass in Latin.

				PRIEST
		Ecce Agnus Dei, ecce qui tollit
		peccata mundi.

MOVING VIEW ON THE PRIEST

and Altar boys, as he moves along the row of kneeling
children, blessing them, and administering their first
Communion.

CLOSE MOVING VIEW

as the innocent faces receive the Host; finally, the Priest
comes to Anthony.

				PRIEST
		Corpus Christi.

				ANTHONY
		Amen.

EXT. LAKE TAHOE ESTATE - DAY

The lawns of this great estate on the shore of Lake Tahoe
are covered with guests of a wonderful party to honor the
First Holy Communion of Anthony Corleone, the son of Mr. and
Mrs. Michael Corleone.  A full dance orchestra plays music
of the times on a pavilion bandstand built especially for
the occasion.  Speedboats roar through the water, pulling
youthful waterskiers; and the pool and private harbor are
filled with laughing, swimming guests.  It is Fall of 1958.

MED. VIEW

Anthony, in his Communion suit sits alone at the table,
looking like a lonely young Prince.

				KAY (O.S.)
		Smile, Anthony.  Smile.

He does, and a flash goes off.

				PHOTOGRAPHER (O.S.)
		Now, one with the whole family.

				KAY (O.S.)
		Mr. Corleone can't right now...

KAY CORLEONE enters from the side, leading her four year old
daughter, MARY, and MAMA CORLEONE to pose with Anthony.

				KAY (O.S.)
		...but we'll get one with the ladies.

				PHOTOGRAPHER
		All together now, c'mon, Anthony...
		CHEESE and
			(flash)

				KAY
		Thank you.

She smiles as she leaves the photographer, and then lets out
a weary sigh to Mama, as she touches the slightly protruding
belly.

				KAY
		Do you think it'll show in the
		picture?

				MAMA
		Two months never shows.  Two months
		look like you had a big lunch.

				VOICE (O.S.)
		Oh, Mrs. Corleone.

A slender, aristocratic WOMAN in her late forties is waving
to KAY.

				MRS. BARRETT
		Hello, Mrs. Corleone.  I'm Fran
		Barrett, our place is just down the
		lake.  This is my husband, Marshall.

				KAY
		I'm so happy you could come.

				MR. BARRETT
		The place is transformed.  We've
		been watching workmen come and go
		all summer.

				MRS. BARRETT
		Where is Mr. Corleone?

				KAY
		A business meeting ran late...but
		he promised he wouldn't be long.

Kay puts her arm around little Anthony's shoulder.

				KAY
		This is our son Anthony Vito
		Corleone.  Today he made his First
		Holy Communion.

EXT. TAHOE GATE AND KENNELS - DAY

A confusion of cars; arriving and parking.  The squad of
parking attendants are supplemented by a whole team of the
local Police, working as high-class parking valets.

A very beautiful, statuesque woman, though slightly drunk,
DEANNA DUNN, slams the door of a powder blue Mercedes and
hurries barefoot through the great stone gate.

				DEANNA
		I will not shut my mouth, and keep
		your Goddamn hands off of me!

She is followed by a harried, FREDDIE CORLEONE, dressed with
flash in the Hollywood style, and carrying her shoes in his
hands.

				FREDO
		Honey!  Wait a minute; let's go for
		a drive.

				DEANNA
		I just had a drive; besides, I want
		to see my brother-in-law Michael.

				FREDO
			(trying to get her to
			put her shoes on)
		Yeah, but I don't want him to see
		you.

Deanna pauses reflectively a moment, allowing Fredo to get
her shoes on.

				DEANNA
		What beats me, is how you guys
		could be brothers.  You musta been
		your Mother's rotten egg.

She kicks off the shoes, giggling, and runs toward a waiter.

				DEANNA
			(lifting a glass of champagne)
		Young man, young man...thank you,
		young man.

				WAITER
			(impressed)
		Excuse me, but aren't you...

				DEANNA
		Yes, you saw me in the movies, Good
		Humor man, and yes, I had more off
		than my shoes!

				FREDO
		Goddamn bitch.

				DEANNA
		Relax, Freddie honey.  Come dance
		with me.

She extends her hand to him.

				FREDO
		Listen, Michael's got a lot of nice
		people here.  Friends of Kay's.
		He'll never forgive me if you ruin
		his party.

				DEANNA
		I hate to see you cringe in front
		of him.  How come you're so scared
		of your own kid brother?

				FREDO
		He's the head of the family.

Disgusted, she turns around, and heads toward the music.

				DEANNA
		Don't follow me!

EXT. TAHOE LAWN AND TABLES - MED. SHOT - DAY

Rushing through the tables, waving an arm jangling with gold
jewelry, and carrying several gift-wrapped packages, is a
hardened and aging CONNIE CORLEONE.  She is followed by a
blond, and wrinkled-handsome escort named MERLE.

				CONNIE
		Mama...Mama!  Here I am!

She throws her arms around her Mother, who returns the
affection somewhat reproachfully.

				MAMA
		Constanzia.  We expected you last
		week; we sent the car to pick you
		up at the airport last week.

				CONNIE
		I know, it was chaos; but anyway,
		here I am one week late.
			(lifting a shiny
			green package out of
			Merle's arms)
		This is for my Mama.  You remember
		Merle?

				MAMA
			(not giving him a
			chance to greet her)
		Yes, thank you.

				CONNIE
		How are the kids?

				MAMA
		Well, thank you, they asked for you
		all week.

				CONNIE
		I got surprises for everybody!

				MAMA
			(glancing at the wrapping)
		Bought at the airport.

				CONNIE
			(gazing about)
		This is swell.  Where's Michael?
		I've got things to get straight
		with him and I can't wait on line.

				MAMA
		You go see your children first, and
		then you wait to see your brother
		like everybody else.

EXT. THE BOATHOUSE - DAY

A porch-like foyer of the boathouse, where a group of five
or six men wait, some nervously.  Some sit, and some pace.

MED. CLOSE VIEW

on one of these men, FRANKIE PENTANGELI, approaching his
sixties, with gray hair (the little of it left).  He's a bit
scruffy, this morning's shave of his white beard is not
perfect, and he seems tired.  He is accompanied by an
associate-bodyguard, WILLY CICCI; thin and dark, and also
dressed up for the occasion.  Frankie tries to get the
attention of one of the waiters; a college-groomed young man
in white sports jacket and black bow-tie.

				PENTANGELI
		Hey, kid!  You got any red wine?

				WAITER
			(offering the tray)
		Only champagne and cocktails.

				PENTANGELI
		Forget it...

Finally, he sees someone he recognizes, Fredo, and shouts
out in a husky voice:

				PENTANGELI
		Fredo!  Sonuvabitch.  You look great.

Fredo squints in his direction; finally recognizes him.

				FREDO
		Who's that?  Pentangeli?  Frankie
		"Five-Angels"...thought you were
		never coming West.

				PENTANGELI
			(affectionately)
		Gotta check up on my boys.  Hey,
		what's with the food?  Some kid in
		a white jacket brings me a ritz
		cracker with some chopped liver.
		'Canapes,' he says.  I say, 'Can a
		peas, my ass, that's a ritz cracker
		with chopped liver.' Go get me a
		salami sandwich and a glass of wine
		or I'll send you and your white
		jacket to the dry cleaners!

They get a good laugh at this fresh breath of New York.

				FREDO
		Gee, Frankie, it's good to see you.
		Reminds me of old times.

				PENTANGELI
		You remember Willy Cicci, don't
		you, Freddie?  We was all together
		with the old man Clemenza in
		Brooklyn... before...uh...

				FREDO
		We were all upset about that.

				PENTANGELI
		That's what I'm here to talk to
		your brother about.  What's with
		him, I got to get a letter of
		introduction to have a 'sitdown'?

				FREDO
			(throwing his arm
			around him)
		C'mon, I see what I can do.

EXT. TAHOE PAVILION - MED. VIEW - DAY

The orchestra wears white summer sportcoats and black tuxedo
slacks as they play a tango behind monogrammed music stands.
A professional dance team, probably imported from Vegas,
dance the tango for the excited guests.

INT. TAHOE BOATHOUSE - DAY

A large and very beautiful room overlooking the lake.  It is
dominated by an enormous bar, behind which stands ALBERT
NERI, discreetly in the background.

MICHAEL CORLEONE sits on a large sofa, his back to us.
Standing to one side is a tired and somewhat uneasy TOM
HAGEN.  Standing before Michael is SANDRA CORLEONE, Sonny's
widow; her daughter, one of the twins, FRANCESCA CORLEONE,
and a handsome young man of twenty, GARDNER SHAW.

				SANDRA
		Michael, this is Gardner Shaw.
		Francesca and he have been seeing
		each other for six months now.
		Gardner, this is Francie's Uncle
		Michael.

				GARDNER
			(a little nervous)
		I've heard a lot about you, Mr.
		Corleone.

				MICHAEL (O.S.)
		Sit down.  Francie.

The couple sit themselves on the sofa opposite Michael.

				SANDRA
		They would like to set an engagement
		date, and...

				MICHAEL
		Let them speak for themselves.

VIEW ON MICHAEL, calm, thoughtful.  One can tell that he has
special affection for his niece.

				FRANCESCA
		We love each other, Uncle Michael.
		And, we want to be married.  I came
		to ask for your blessing.

There is a loud KNOCKING on the door; then Fredo's voice.

				FREDO (O.S.)
		Hey, Mike...guess who's here?

Neri goes to answer it, cracks the door open.

				NERI
		Not now, Freddie...

				FREDO
		Tell Mike Frankie 'Five-Angels' is
		here.

				NERI
		Not now...

Neri closes the door, and Michael looks at the nervous young
man.

				MICHAEL
		Francesca is my oldest brother's
		daughter.  He died many years ago,
		and ever since I've felt much more
		of a father than an uncle.  I love
		her very much.  I'm pleased and
		impressed that you had the thought
		to come to me before going on with
		your plans.  It shows me that
		you're a considerate man, and will
		be good to her.  What are you
		studying in college?

				GARDNER
		My major is Fine Arts, sir.

				MICHAEL
		How will Fine Arts support your new
		wife?

				GARDNER
		It's embarrassing to say, sir, but
		I'm a major stockholder in the
		family corporation.

				MICHAEL
			(smiling)
		Never be embarrassed by your wealth.
		This recent contempt for money is
		still another trick of the rich to
		keep the poor without it.
			(warmly)
		Of course I give you my blessing.
		Let's set the wedding soon...it
		will be my pleasure to give the
		bride away.

They all smile, and rise.

				MICHAEL
			(continuing)
		...and take a few courses in
		Business Administration just to be
		on the safe side!

They laugh; Michael moves toward them.  Francesca throws her
arms around him, and kisses her favorite uncle.  The flushed
young man shakes his hand heartily.

				FRANCESCA
		Thank you, Uncle Michael.

They all take their leave; Michael turns to Hagen.

				MICHAEL
		Make her dowry impressive.  He
		comes from a family who still
		thinks an Italian bride goes
		barefoot.

EXT. TAHOE SWIMMING POOLS AND HARBOR - DAY

Francesca and Gardner are greeted by her twin sister and
their young friends, who squeal and embrace at the good news.
Someone throws someone in the pool, and life is good.

MED. CLOSE

Francesca kisses her Aunt Kay.

				FRANCESCA
		Uncle Michael is the greatest man
		ever!

VIEW on Kay - happy for her niece.

INT. TAHOE BOATHOUSE - DAY

Michael sits in the darkened boathouse.  Tom Hagen paces.
Michael is looking at photographs.  Neri stands over him.

CLOSE ON MICHAEL

studying the pictures.

				NERI (O.S.)
		His name is Fred Vincent.  He owns
		a small pizza parlor in Buffalo...

CLOSE ON THE PICTURES

Snapshots of a middle-aged man, handsome, Italian.  There is
something familiar about him.

				NERI (O.S.)
			(continuing)
		...American wife and two small kids.
		We traced him and found that he's
		in the country illegally, from
		Sicily...

Michael looks at another picture.  The same man.  Only
younger, and dressed in Sicilian shepherd's clothing.  We
remember him as FABRIZZIO...Michael's traitorous bodyguard
in Sicily.

				NERI (O.S.)
		...came over around 1956.  Sponsored
		by the Barzini Family.

Michael puts the pictures down.

				MICHAEL
		It's him.  Fabrizzio.
			(almost to himself)
		Revenge is a dish that tastes best
		when it's cold.

				NERI
		How do you want me to handle it?

Michael glances at Hagen, who has been waiting in the room.

				MICHAEL
		Later.  Tom?

Hagen brings him a folder; then, as Michael glances through
it:

				HAGEN
		I've cleared it through the
		Senator's chief aide, a man named
		Turnbull.  Turnbull's a heavy
		gambler, and into us for over a
		hundred grand, so I figure his
		information is reliable.

Neri moves to the bar, to prepare Michael a drink.

				HAGEN
		The Senator can be set up; but he
		thinks of himself as a clean
		politician.  So it's got to be on
		terms he can live with: campaign
		contribution, donation to a
		charitable cause that he controls,
		things like that.  If he gets even
		the inkling that you think you're
		buying him, he'll freeze up.
		Nevada's a funny state, they like
		things both ways here... All right.
		Turnbull says the Senator will be
		here at two-thirty, and he's been
		primed.  He knows you'll want to
		meet with him alone, and he knows
		it's about the Tropicana's license.
		At any rate, he expects to be
		introduced around to some of the
		influential people here today, and
		generally treated as an ordinary
		guest.  Just go light on him,
		Mikey, sometimes the biggest crooks
		don't like to think of themselves
		as crooks...

Michael glances at Hagen, as though that last remark was
unnecessary.

				HAGEN
		I'm sorry; of course, you know that.

				MICHAEL
		Two-thirty.  That gives me time to
		see my boy.

				HAGEN
		Connie's outside.

Michael doesn't want to see her.

				HAGEN
		I promised; she said it was urgent.

Michael nods.

				MICHAEL
		All right.  Apologize to Pentangeli.

Neri opens the door; Hagen exits, and Connie steps in
impatiently, followed by Merle.

				MICHAEL
		I said I would see my sister, alone.

				MERLE
		I think this concerns me too.
			(taking a cigarette
			from the dispenser)
		You don't, do you?

Connie steps forward, kisses Michael on the cheek.

				CONNIE
		How are you, honey?  You've met
		Merle, haven't you.  He was with me
		in Vegas.

				MICHAEL
		I saw him with you.

				CONNIE
		We're going to Europe next week.  I
		want to get passage booked on the
		Queen.

				MICHAEL
		Why do you come to me?  Why don't
		you go to a travel agent?

				MERLE
		We're going to get married first.

Michael is silent.  Then he rises, and moves to the window
overlooking the lake.

				MICHAEL
		The ink on your divorce isn't dry.
		Your children see you on weekends;
		your oldest boy, Michael Francis...
		was in some trouble with the Reno
		police over some petty theft that
		you don't even know about.

				CONNIE
		Michael...

				MICHAEL
		You fly around the world with lazy
		young men who don't have any love
		for you, and use you like a whore.

				CONNIE
		You're not my father!

				MICHAEL
		Then why do you come to me?

				CONNIE
		Because I need MONEY!

				MICHAEL
			(softly)
		Connie, I want to be reasonable
		with you.  You have a house here,
		with us.  You can live here with
		your kids...and you won't be
		deprived of anything.  I don't know
		much about Merle; I don't know what
		he does for a living; what he lives
		on.  Why don't you tell him marriage
		is really out of the question; and
		that you can't see him any more.
		He'll understand.  But if you
		disobey me, and marry this pimp...it
		would disappoint me.

				CONNIE
		It was my father's money; and I'm
		entitled to what I need.  Where is
		Tom Hagen?

She turns angrily, leaving Michael standing face to face
with Merle.

				MICHAEL
		Are you finished?

				MERLE
		I think so.

				MICHAEL
		Then out.

Merle puts out his cigarette and leaves, quickly.

EXT. TAHOE PAVILION - FULL VIEW - DAY

The orchestra has struck up a "Paul Jones," where two
concentric circles of young people march in opposite
directions, until the music stops.  Then they take whomever
is opposite them as their new dance partner.

VIEW ON THE HARBOR AREA

Francesca and her twin, Gardner and their elite young
friends roar out of the private harbor, to get up on the
water skis.  We notice ROCCO LAMPONE, move along a path
leading to a separate and more private boathouse.  A small
covered craft approaches, ties off, and a group of three men
step on to the pathway, shake hands with Lampone - and
follow him to the large boathouse where Michael conducts his
business.

CLOSE VIEW

Pentangeli has led Mama up to the dance floor, and is having
some difficulty with the orchestra.

				PENTANGELI
		I can't believe that out of thirty
		professional musicians, not one of
		you is Italian!
			(as the musicians laugh)
		C'mon, give us a tarantella.

He waves his hands, conducting, and singing.  The piano
starts a vamp, the drums uncertainly join in.  A clarinet
starts to play "Pop Goes the Weasel," and soon the rest of
the orchestra is playing that.  They look to Pentangeli for
approval.  Disgusted, he goes back to his table, eating a
handful of canapes.

INT. THE BOATHOUSE - DAY

Rocco ushers an older Italian, bundled up against the cold
and wet of his boatride, to Michael.

The man shows respect to Michael, who quickly indicates that
Neri should get him a drink.

				MICHAEL
		Rocco, his friends must be hungry.
		See what you can do, but I'd like
		to keep them away from the guests.

The older man, JOHNNY 'BLUE BOY' OLA, gestures to his
bodyguards, and they follow Lampone.

				MICHAEL
		You know my lawyer, Tom Hagen.
		Johnny Ola.

				OLA
		Sure, I remember Tom from the old
		days.

Tom shakes hands with Ola, remembering him, and his
importance.

				MICHAEL
		Tom isn't going to sit in with us,
		Johnny.  He only handles specific
		areas of the family business.  Tom?

				HAGEN
		Sure, Mikey.

He gathers up some of his papers, as the three men remain
silent, waiting for him to go before they talk.  It's clear
Tom doesn't want to be excluded.

				HAGEN
		If you need anything, just...

				MICHAEL
		Just tell Rocco I'm waiting.

Hagen nods and leaves.  As soon as the door closes:

				OLA
		I just left our friend in Miami.

				MICHAEL
		How is his health?

				OLA
		Not good.

				MICHAEL
		Is there anything I can do; anything
		I can send?

				OLA
		He appreciates your concern,
		Michael, and your respect.

There's a KNOCK on the door; a moment, and then Rocco
quietly enters and takes his place without disturbing the
conversation.

				OLA
		The hotel's registered owners are
		one Jacob Lawrence, and Sidney
		Barclay, both Beverly Hills
		attorneys.  In reality it's split
		between the Old Lakeville Road
		Group from Cleveland, and our
		friend in Miami.  He takes care of
		others outside the country, you
		know who I mean.  Meyer Klingman
		runs the store, and does all right,
		but I've been instructed to tell
		you, that if you move him out, our
		friend in Miami will go along with
		you.

				MICHAEL
		He's very kind, tell him it's
		appreciated.  I'm sure it will be
		profitable all the way around.

				OLA
		He always makes money for his
		partners.  One by one, our old
		friends are gone.  Death, natural
		or not, prison, deported.  Our
		friend in Miami is the only one
		left, because he always made money
		for his partners.

The door opens suddenly, and standing there in his white
Communion suit, is Michael's boy Anthony.  A moment later,
Kay appears, and takes the boy's hand.

				KAY
		Anthony, Daddy's busy.

				MICHAEL
			(rising)
		This is my boy, and my wife.  Mr.
		John Ola of Miami.

				KAY
		I'm sorry, Michael.  Senator
		Geary's here, and Mr. and Mrs.
		Barrett wanted to thank you before
		they left.  Won't you join us, Mr.
		Ola?

				MICHAEL
		Mr. Ola's just leaving, Kay.
		Please tell the Senator I won't be
		a minute.

Pause; she stands there a moment.

				MICHAEL
			(continuing)
		Kay.

				KAY
		Yes, Michael.

EXT. THE BOATHOUSE - DAY

Kay closes the door.  It seems as though Michael has violated
some sort of promise to her by having this man here today.
She looks up toward the first boathouse.

WHAT SHE SEES:

The covered launch, and Ola's three bodyguards, eating while
they wait.

MED. VIEW

Anthony runs away from her, heading toward the house.

				KAY
		Anthony!
			(she runs after him)
		Anthony, where are you going?

Moodily, the boy stops, turns, and walks back to his table
of honor without answering her.

EXT. TAHOE TABLES AND PAVILION - VIEW ON THE PAVILION - DAY

The orchestra has taken its break; now two couples in formal
dress are performing the Quartet from Rigoletto.

VIEW ON HAGEN

sitting by himself, a little down, having a drink.  He's
waiting for Michael to re-summon him.  SANDRA, Sonny's
widow, sits opposite him.

				HAGEN
		Where's my wife?

				SANDRA
		With Mama, putting the baby to
		sleep.  Francesca's very happy.
		Michael was kind to her.  She
		idolizes him.
			(pause; she looks at
			a despondent Hagen)
		The children are all out in the
		speedboat.  I'm going to my house.

Sandra gets up, still an attractive woman, and walks alone
to the back path that leads to her home on the estate.

VIEW ON THE PAVILION

The returned orchestra strikes a big, show-biz chord,
intended to command the guests' attention.

The orchestra LEADER raises his hands for silence, and makes
an announcement over the P.A. system.

				MAESTRO
		Ladies and gentlemen, a most
		distinguished guest would like to
		say a few words: Senator and Mrs.
		Pat Geary of the state of Nevada!

A big hand, as the smiling SENATOR introduces his WIFE by
holding her arm up to the crowd, and then proceeds alone to
the bandstand.

MED. VIEW

Michael stands with Kay and Mrs. Geary.  The Senator's
presence seems to be a statement of political and social
status.

A little distance away, his beautiful son Anthony sits
quietly, in an unmistakably morose mood.

INT. TAHOE - SANDRA'S HOUSE - DAY

We HEAR the applause and whistles echoing in the distance.
Sandra stands in her bedroom, looking at the door.  We SEE a
photograph of SONNY, and also one of their wedding.

A moment goes by, and then Tom Hagen enters, closing the
door behind him.

We begin to HEAR Senator Geary's amplified voice resounding
over the lake.  Hagen moves to Sandra.  She takes him in her
arms, comforting, holding his head against her full breast.

				HAGEN
			(quietly)
		He doesn't want my help any more.
		He doesn't need it.

				SANDRA
		We don't know that's true, he never
		said that.

				HAGEN
		I can feel it in the way he talks
		to me.

He moves to the dresser; pours himself a drink.

				HAGEN
		Just now when Johnny Ola showed up,
		he asked me to leave them alone.
		Ola is Hyman Roth's Sicilian
		contact.  I was on the inside of
		ten, twenty meetings with him.  But
		today Mike asked me to leave, like
		an outsider.

				SANDRA
		Talk to him.  Tell him how you feel.

				HAGEN
		It's as though he blames me for the
		ground the family lost when I was
		Consigliere to Sonny.

Sandra pulls Hagen to her, and kisses him passionately on
the mouth.

				HAGEN
		I love Michael, I want to help him,
		be close to him.  I don't want to
		end up a third string lawyer making
		property settlements for the hotels.

Sandra knows he needs her.  Slowly she begins to undress.

				SANDRA
		We have a little time now.

EXT. THE PAVILION - VIEW ON SENATOR GEARY - DAY

				SENATOR GEARY
		...my thanks, and the thanks of the
		young people of the State of
		Nevada, for this most impressive
		endowment...
			(he holds a check in
			his hand)
		...made to the University in the
		name of Anthony Vito Corleone.
		Thank you, Mr. and Mrs. Michael
		Corleone.

Applause.  Senator Geary returns the microphone to the
Maestro who adds:

				MAESTRO
		And now, the Nevada Boys' Choir
		have prepared a special thank you
		for Mr. Michael Corleone.

He turns to a small Choir Master, who leads the Boys' Choir
in a choral arrangement of "MR. WONDERFUL."

VIEW ON SENATOR GEARY

shaking hands with Michael, as Press Photographers snap
pictures, showing the check; showing a special award of
Gratitude from the State; Mrs. Corleone and Mrs. Geary; all
together; Michael and his son; Senator Geary and Michael's
son; and on and on.  In the midst of this:

				SENATOR GEARY
		Where can we meet alone?

Michael indicates the boathouse a distance away, where Neri
seems to be waiting for them.  Then Michael leans to Rocco:

				MICHAEL
		Find Hagen.

Rocco sets off; as more pictures are taken, and the:

BOYS' CHOIR

sings its lovely arrangement of "Mr. Wonderful."

INT. TAHOE BOATHOUSE - MED. VIEW - DAY

Michael, the Senator, Neri and Rocco make a group in the
dark, large room.

				MICHAEL
		It was very kind of you to come to
		my home on this occasion, Senator.
		My wife has been very concerned
		with making a good impression on
		the people who are our neighbors,
		and your appearance here has made
		her very happy.  If I can ever
		perform a service for you, you only
		have to ask.

The door opens, and Hagen sheepishly makes his way in.

				MICHAEL
		My lawyer, Tom Hagen.  He arranged
		this all through your man Turnbull.

				SENATOR GEARY
		I thought we would meet alone.

				MICHAEL
		I trust these men with my life.
		They are my right arms; I cannot
		insult them by sending them away.

				SENATOR GEARY
			(taking out some medication)
		Some water.

He addresses that to Neri, who resentfully goes to fetch the
Senator a glass of water.

				SENATOR GEARY
		Alright, Corleone.  I'm going to be
		very frank with you.  Maybe more
		frank than any man in my position
		has ever spoken to you before.

Michael nods, indicating that he should do so.

				SENATOR GEARY
		The Corleone family controls two
		major hotels in Vegas; one in Reno.
		The licenses were grandfathered in,
		so you had no difficulties with the
		Gaming Commission.  But I have the
		idea from sources...
			(takes the water from
			Neri and swallows his pills)
		...that you're planning to move in
		on the Tropicana.  In another week
		or so you'll move Klingman out,
		which leaves you with only one
		technicality.  The license, which
		is now in Klingman's name.

				MICHAEL
		Turnbull is a good man.

				SENATOR GEARY
		Let's forget the bullshit, I don't
		want to stay here any longer than I
		have to.  You can have the license
		for two hundred and fifty thousand
		in cash, plus a monthly fee equal
		to five percent of the gross...

Michael is taken aback; he looks at Hagen.

				SENATOR GEARY
		...of all three Corleone hotels.

Hagen is frustrated; all his information was wrong.

				MICHAEL
		Senator Geary, I speak to you as a
		businessman who has made a large
		investment in your state.  I have
		made that state my home; plan to
		raise my children here.  The
		license fee from the Gambling
		Commission costs one thousand
		dollars; why would I ever consider
		paying more?

				SENATOR GEARY
		I'm going to squeeze you, Corleone,
		because I don't like you; I don't
		like the kind of man you are.  I
		despise your masquerade, and the
		dishonest way you pose yourself and
		your fucking family.

VIEW ON HAGEN

glances at Michael.

VIEW ON MICHAEL

makes no outward reaction.

				MICHAEL
			(quietly)
		We're all part of the same
		hypocrisy, Senator.  But never
		think it applies to my family.

				SENATOR GEARY
		All right, then let me say you'll
		pay me because it's in your
		interests to pay me.

VIEW ON GEARY

rising.

				SENATOR GEARY
		I'll expect your answer, with
		payment, by tomorrow morning.  Only
		don't contact me...from now on,
		deal only through Turnbull.

He is almost out the door.

				MICHAEL
		Senator...
			(cold and calm)
		...you can have my answer now if
		you'd like.

Geary turns back.

				MICHAEL
		My offer is this.  Nothing...not
		even the thousand dollars for the
		Gaming Commission, which I'd
		appreciate if you would put up
		personally.

Geary returns Michael's hard look; then laughs and leaves.
Slowly Michael turns to Hagen.

VIEW ON HAGEN

embarrassed at being so off the mark.

				MICHAEL
		It's all right, Tom, we'll talk
		later.  Tell Frankie Pentangeli I'd
		like him to have dinner at my
		family table before we do business.

EXT. THE PAVILION - NIGHT

Now the light has faltered, and the young waiters have put
up the night lights.  The tables are all properly set for
dinner, with candles on each one.

The orchestra is playing quiet, unobtrusive dinner music,
and many of the guests have begun to help themselves to the
impressive buffet, under a party tent.

MED. VIEW

Michael sits at a large table with Kay, his son Anthony,
Mama, Hagen and TERESA, Connie and Merle' Fredo and Deanna,
and Frankie Pentangeli.

				MAMA
		Cent' Anne.

This, the table of honor, all raise their glasses and repeat
the toast.

				DEANNA
		What's 'cent' Anne?'

				FREDO
		A hundred years...it's a toast.

				CONNIE
		It means we should all live happily
		for one hundred years.  The family.
		If my Father were alive, it'd be
		true.

				MAMA
		Connie.

				CONNIE
		Merle, have you met my sister-in-
		law Deanna?

				DEANNA
		What a pleasure, Merle.
			(shaking hands)


				MAMA
			(Sicilian)
		Those two are perfect for each other.

				MERLE
		What's that mean?

				CONNIE
		Mama!

				PENTANGELI
			(Sicilian)
		Michael, in all respect, I didn't
		come three thousand miles for dinner.

				MICHAEL
			(Sicilian)
		I know.

				PENTANGELI
			(Sicilian)
		When do we talk?

				MICHAEL
			(Sicilian)
		After dinner.

By now, the conversation has become exclusively Sicilian,
with Merle and Deanna, looking from side to side like in a
tennis match.  Finally, Kay, to be polite:

				KAY
		Anthony, you were talking to Mr.
		Pentangeli?

				ANTHONY
		His name is "Five-Angels."

				PENTANGELI
		Yeah, the kid and me talked Sicilian.
		A one-way conversation!

INT. TAHOE BOATHOUSE - NIGHT

Pentangeli is angry; but because it is Michael he is talking
to, he keeps his voice low and represses his desire to shout.

				PENTANGELI
		Sure, Pete Clemenza died of a heart
		attack, but the Rosato Brothers
		gave it to him.

				MICHAEL
		We were all heartbroken at the
		news; but that wasn't cause to
		start a war.

				PENTANGELI
		Okay, now it's my family in
		Brooklyn; and I wanna keep up
		Clemenza's loyalty to you.  But how
		can I run my family with you
		challenging my every move?  You're
		too far from the street, Mike, the
		only way to reason with the Rosato
		Brothers is to whack 'em and whack
		'em fast.

				MICHAEL
		You were unfair with them.

				PENTANGELI
		Says who?

				MICHAEL
		Clemenza promised Rosato three
		territories in the Bronx after he
		died, and then you took over and
		welched.

				PENTANGELI
		Clemenza promised them nothing, he
		hated the sonsuvbitches.

				MICHAEL
		They feel cheated.

				PENTANGELI
		Michael, you're sitting up here in
		the Sierra Mountains with champagne
		cocktails making judgment on the
		way I run my family.

				MICHAEL
			(suddenly in Sicilian)
		Your family still carries the name
		Corleone, and you will run it like
		a Corleone!

				PENTANGELI
			(Sicilian)
		And while I feed my family in New
		York, you put the knife in my back
		in Miami.

				MICHAEL
			(firm)
		Frankie, you're a good old man, and
		you've been loyal to my Father for
		years...so I hope you can explain
		what you mean.

				PENTANGELI
		The Rosatos are running crazy;
		taking hostages, spitting in my
		face, because they're backed by the
		Jew in Miami.

				MICHAEL
		I know.  That's why I want you to
		be fair with them.

				PENTANGELI
		How can you be fair with animals?
		They recruit niggers and spicks;
		they do violence in their own
		Grandmother's neighborhoods.  And
		everything is dope and whores; the
		gambling is left to last.  Let me
		run my family without you on my
		back.  I want them taken care of.

				MICHAEL
		No.  There are things that I have
		planned with Hyman Roth.  I don't
		want them disturbed.

				PENTANGELI
		You give your loyalty to a Jew over
		your own blood.

				MICHAEL
		Frankie, you know my father
		respected Roth, did business with
		him.

				PENTANGELI
		He did business...but he never
		trusted him.

Pentangeli takes his hat, and leaves.

				NERI
		Should he go?

				MICHAEL
		The old man had too much vino
		rosso, or he'd never talk openly
		that way.  Let him go back to New
		York; I've already made my plans.
			(he checks his watch)
		It's late; I've spent so little
		time at the party.

EXT. THE LAWNS AND TABLES - FULL VIEW - NIGHT

By now the sun has fallen and the lawns of the Corleone
estate are lit by moonlight.  Beautifully dressed couples
dance as the orchestra plays a foxtrot of the late fifties.

VIEW ON THE DANCE FLOOR

Deanna has been dancing with Fredo; she has gotten drunk and
it teasing her husband by flirting with other men on the
floor.

				DEANNA
		I wanta dance...whatsa matter with
		that?

				FREDO
		Dancing is alright; you're falling
		on the floor.

				DEANNA
		Whatsamatter, you don't want me to
		dance with him 'cause he's a man!

				FREDO
		Deanna, I'm going to belt you right
		in the mouth!

				DEANNA
		These Eye-ties are really crazy
		when it comes to their wives.

By now guests are starting to notice the disturbance;
Michael is with Kay, and some friends; Rocco catches his eye.

				DEANNA (O.S.)
		Jesus, never marry a WOP, they
		treat their wives like shit.

VIEW on Kay, listening, embarrassed by her flashy sister-in-
law.

VIEW ON FREDO AND DEANNA

Rocco passes by Fredo and whispers:

				ROCCO
		Freddie, Mike says take care of it,
		or I have to.

				DEANNA
		He's a friend of your brother!

Without another word, Rocco grabs firm hold of her and
whisks her out of the crowd.

				DEANNA
		"Shuffle off to Buffa...
		Shuffle off to Buffa...
		Shuffle off to Buffalooooo..."

Freddie mops his forehead, and moves to Michael.

				FREDO
		Hey Mike, what can I say?

				MICHAEL
		Forget it, just go take care of her.

EXT. THE HARBOR DECK - NIGHT

A large group of Tahoe teenagers join the Corleone youngsters
sitting around a large fire out by the harbor.  Gardner and
Francie, sitting arm in arm.

EXT. TABLE OF HONOR - MED. VIEW - NIGHT

Little Anthony, in his white suit, sitting alone.

EXT. MAIN GATE - NIGHT

A taxi pulls up, and is signaled over to the gate by a
policeman carrying a torch flashlight.

Connie and Merle enter; Merle tips the cop, and the cab
drives off.

EXT. DANCE FLOOR AND PAVILION - MOVING TWO SHOT - NIGHT

Kay and Michael dancing in the moonlight.

				MICHAEL
		How's the baby?

				KAY
		Sleeping inside me.

				MICHAEL
		Does it feel like a boy?

				KAY
		Yes, Michael, it does.

				MICHAEL
		I'm sorry about some of the people
		I had to see today.  It was bad
		timing... but it couldn't be helped.

				KAY
		It made me think of what you told
		me once.  In five years, the
		Corleone family will be completely
		legitimate.  That was seven years
		ago.

He has no answer for her; except that he loves and values
her, and holds her tight, as they dance amid their guests,
all dressed elegantly for the social event of the season.

The VIEW LOOSENS to show the entire, night-lit party.

							DISSOLVE TO:

EXT. LAS VEGAS CHAPEL - MED. CLOSE VIEW - NIGHT

A Cadillac limousine waits for some people inside the tacky,
Las Vegas marriage mill.

INT. THE CHAPEL - NIGHT

Some quiet, informally dressed couples wait in the rear of
the chapel; some talking, others sitting nervously.

A single organ plays some standard wedding music.

The VIEW PANS up to the altar, where Connie and Merle, in
the same clothing they wore to the Tahoe party, are being
married by a Justice of the Peace.

The Justice goes through the bored, simple ceremony, and we
begin to HEAR an echo of the waltz Connie danced with her
father, when she was married all those years ago in Long
Island.

EXT. THE TROPICANA - LAS VEGAS - MED. VIEW - DAY

A dark car pulls up to the glitter of the neon facade.
Albert Neri, alone, leaves it to the parking valets, and
moves quickly through the automatic doors, into the main
casino.  We still hear the CORLEONE WALTZ.

INT. THE TROPICANA - DAY

Albert Neri enters the room; glances around a moment, and
then heads toward the crap table, where a short, middle-aged
man, KLINGMAN, stands by the pit boss.  Several security
guards of the casino, are at their posts.

				NERI
		Are you Klingman?

				KLINGMAN
		Who's asking?

				NERI
		Where can we talk?

				KLINGMAN
		Right here.

				NERI
		I represent the interests of the
		Corleone family.  We make the
		invitation to you to tie up your
		affairs and be out of the hotel by
		Monday morning.

				KLINGMAN
		Who do you think you're talking to?

				NERI
		You said you were Klingman.

				KLINGMAN
		You don't come in here, talk to an
		owner in Las Vegas like that.

				NERI
		You missed my point; you are no
		longer an owner.

				KLINGMAN
		Get out of my hotel.

Neri stands in front of him, smiling.

				KLINGMAN
		Boys, get him out of here.

Quickly, Neri moves toward Klingman, and slaps him hard
several times in the face, knocking off his glasses... Red-
faced, Klingman gets down on his knees to pick them up once,
again.  Glasses on, he looks to his guards.

WHAT HE SEES

They stand, motionless.

VIEW ON KLINGMAN

Humiliated, Klingman moves across the casino floor, past a
few interested gamblers, and his own people.  Neri slowly
follows.

INT. SHOWROOM - MED. VIEW - DAY

A typical, Lido de Paris type of show is in rehearsal, as
Klingman backs into the showroom.

HIS VIEW

Neri keeps coming.

VIEW ON KLINGMAN

realizes that no one will help him.  He finally capitulates.

				KLINGMAN
		All right!  All right, I'll be out.

Neri keeps moving, then heads past the terrified man, sits
down at a table, and looks up at the stage.

				NERI
			(to the staring performers)
		Keep it going.

EXT. A STREET IN BUFFALO, NEW YORK - NIGHT

The neon lights that spell out "FRED'S PIZZERIA" go out;
after a moment a man in an overcoat steps out, and turns to
lock the door of his restaurant.  The Corleone Waltz
continues over this.  He turns.

MED. CLOSE VIEW

FRED VINCENT, whom we remember as the Sicilian Fabrizzio.
He moves toward his parked car.  Gets in.

MED. LONG VIEW

The starter turns, and the automobile blows in a great
explosion.

							DISSOLVE TO:

EXT. THE TAHOE ESTATE - NIGHT

The waltz continues over the VIEW of the empty, but still
illuminated pavilion.  There is the debris of the great
party spread over the grounds, which a silent crew of
workmen are at work cleaning up,

MED. VIEW

Michael walks alone, followed by two of the family dogs,
Irish Setters.

He walks to the water line, and looks out across the lake.
He picks up a stick, and throws it for the dogs; who go
scampering after it.

We notice that a respectful distance away, there are
bodyguards watching every move he makes.

CLOSE VIEW ON MICHAEL

looking across the lake.  There is much on his mind.  The
SOUND of the waltz, begins to segue into the echoed music
and laughter of an old Italian Music Hall from the past.

							DISSOLVE TO:

INT. NEW YORK THEATRE - 1915 - NIGHT

VITO CORLEONE is a shy young man of 23, holding his hat in
his hand, being led down the crowded aisle of this Italian
Vaudeville theatre by an energetic and fulfilled GENCO
ABBANDANDO, his friend in America.  This entire sequence is
played in Sicilian.

				GENCO
		Come on, you've got to see her!

VIEW ON THE STAGE

A tattered melodrama is in progress in Neapolitan.  The sets
are two-dimensional, and flap whenever there's an entrance
or exit.

The hero, PEPPINO, is weeping as he sings about how he's
left his Mother in Italy, while he is in this new country,
America.

VIEW ON THE AUDIENCE

All poor, Italo-Americans.  Genco finds a few seats, and
leads Vito to them, stepping on a few shoes in the process.
They have barely come to their seats, when an excited Genco
nudges Vito, and points to the stage.  People shout that
they should sit down.

VIEW ON THE STAGE

There is a knock on the door, and a young girl enters,
delivering a letter to Peppino in his tenement in America.
The girl is pretty; and obviously the object of Genco's
affection.  The letter brings bad news.  Peppino's Mother is
dead.  He weeps, and sings the final song, which most of the
audience knows: SENZA MAMMA.

VIEW ON THE AUDIENCE

Genco is enthralled with the young actress.  The people in
the audience are singing along with Peppino.

VIEW ON THE STAGE

The actress, object of Genco's affection, makes a dramatic
exit.

VIEW ON THE AUDIENCE

Genco pulls on Vito's jacket, indicating that now that his
love is offstage, they should leave.  Vito rises with him,
and they make their way all the way down the aisle.

INT. BACKSTAGE THEATRE - MOVING VIEW - NIGHT

Genco hurrying down the backstage corridor, hoping to catch
a glimpse of the actress.  He is followed by Vito.  Suddenly,
Genco stops short.

GENCO'S VIEW

A heavy-set, fierce looking Italian wearing an expensive
light-colored suit and a cream colored fedora.  This is
FANUCCI.  He is discussing a business matter with the
theatre IMPRESARIO; a large, strong looking man, who is
sweating nonetheless.  He doesn't seem to be giving in to
Fanucci.  He holds a locked strongbox.

VIEW ON VITO

watching.  The two men argue in Italian.

MED. VIEW

The young ACTRESS crosses into the area, unaware of the
difficulties.  The impresario sees her, and frightened,
motions that she should keep away.

				IMPRESARIO
		Carla!

But Fanucci grabs her easily by her slender wrist, and with
lightning speed, produces a knife which he holds against her
cheek.  The impresario wrings his hands in agony.

				IMPRESARIO
			(Sicilian)
		No...please, not my daughter.

Whereupon he begins to unlock the box which holds the
receipts for the night's box-office.

VIEW ON GENCO AND VITO

hiding, watching.  At first, Genco is enraged, as though he
would rush up to help his enamorata.

				GENCO
			(Sicilian)
		The Black Hand.

Then he backs away.  Vito looks at him shocked and
disappointed in this cowardly behavior.  Genco shakes his
head, and points, as though to say that where Fanucci is
concerned, there is nothing to be done.

				GENCO
			(Sicilian)
			 (whispered)
		Let's get out of here.

VIEW ON FANUCCI

has released the girl.  Her father pulls her away from him,
and slaps her for no reason; then he pays Fanucci.

				FANUCCI
			(Sicilian)
		Because you protested, it will cost
		a hundred more.

EXT. NEW YORK ALLEY - NIGHT

Genco and Vito; Genco leans against the wall, breathlessly,
as though he's had a near escape.

				GENCO
			(Sicilian)
		I know what you are thinking,
		Vitone, but you don't understand
		yet how things are.  Fanucci is of
		the Black Hand.  Everyone in the
		neighborhood pays him, even my
		father.

				VITO
			(Sicilian)
		He's an Italian?

				GENCO
			(Sicilian)
		A pig of a Neaponitan.
			(spits)


				VITO
			(Sicilian)
		Why?  Why does he bother other
		Italians?

				GENCO
			(Sicilian)
		Because he knows them; he knows
		they have no one to protect them.
		Vitone?  What do you think of my
		angel?

				VITO
			(Sicilian)
		Beautiful.

				GENCO
			(Sicilian)
		Beautiful.

				VITO
		For you, she is beautiful.  For me,
		there is only my wife!

				GENCO
		I know.  That's why I brought you
		with me!

Genco embraces his good friend, and they laughingly walk
down the alley.

The stage door opens, and Fanucci exits, a sinister figure
in white, moving down the alley just in front of them, into
the night.

The two friends hold their breath, until he disappears.

EXT. NEW YORK STREETS - MOVING VIEW - DAY

Vito moves through the street, carrying groceries that he is
to deliver.

It is cold, and so vendors are huddled around fires they
have lit in old cans and drums.

He turns up an alleyway, and then stops.

VIEW UP THE ALLEY

With great strength, Fanucci lifts one of them up into the
air and throws him down hard to the concrete; but another,
holding onto his back, manages to produce a switchblade
knife and awkwardly reaching around from behind the moving
man, slits Fanucci's throat from one side to the other.

Fanucci groans like some great hurt animal.  Blood pours
from the deep, smile-like slit in his throat.

He throws the young man off his back.

VIEW ON VITO

stepping back in the alley.

VIEW ON FANUCCI

He takes off his white fedora, and runs down the alley
toward Vito, catching the flowing blood in his hat.

The young attackers scurry off in various directions.

INT. ABBANDANDO GROCERY STORE - DAY

A tiny shop featuring imported food: trays of cured meats,
prosciutto, copagole, mortadella lies on the counter covered
with netting to keep away the thousands of flies.

Olive oil is sold in bulk, as well as wine, cheese and bacala.

Genco works here for his father, and is busy slicing paper
thin prosciutto for a customer, by hand.  Vito works in the
back as a stock clerk.

Finished with his customer, Genco moves to his friend.

				GENCO
			(Sicilian)
		I bet you can't guess what happened?

				VITO
			(Sicilian)
		What?

				GENCO
			(Sicilian)
		Some guys from Ninth Avenue jumped
		Fanucci today; slit his throat from
		ear to ear.

				VITO
			(Sicilian)
		No, I didn't know.  Is he dead?

				GENCO
			(Sicilian)
		Nah.  Those guys aren't murderers.
		They wanted to scare him, that's
		all.  Make him look bad.

				VITO
			(Sicilian)
		In Sicily, when you attack a man,
		you had better finish him.

				GENCO
			(Sicilian)
		I wish they had.  He takes fifty
		dollars a week from my father's
		cash drawer.  But you can't kill a
		man like Fanucci.

				VITO
			(Sicilian)
		Why?

				GENCO
			(Sicilian)
		Because he's what we say...
		"connected"... You wait, see what
		happens to those guys from Ninth
		Avenue.

A customer enters; and Genco moves away to serve him.

CLOSE VIEW ON VITO

recalling what he had seen and thought.

EXT. NEW YORK ALLEYWAY - MED. VIEW - NIGHT

A young man, one of those who had tried to kill Fanucci,
runs down an alleyway, breathlessly.  Then he stops, and
looks behind himself.  Whoever was following him is gone.
He turns and walks ahead.  Then the mammoth, white-suited
figure of Fanucci leaps down before him from the fire-escape.
He grins at the young man, and then raises his neck, showing
the gruesome wound that marks his throat.

He takes out his pistol and fires point-blank at his attacker.

INT. TINY TENEMENT - FULL VIEW - NIGHT

The very small, railroad type flat where Vito lives with his
new family.

It is late at night, and he is exhausted.

He returns home; where his young wife, CARMELLA, goes
through the silent ritual of preparing a simple meal for him.
He sits and eats quietly.

INT. TENEMENT ROOM - NIGHT

Vito and Carmella enter the darkened bedroom, and approach a
metal crib.  Vito reaches down and takes the small hand of
the baby between his thick peasant fingers.  Carmella waits
a respectful distance behind him.

INT. ABBANDANDO GROCERY - DAY

The shop bell RINGS; SINGER ABBANDANDO turns to see a
smiling Fanucci tipping his hat, like an old customer.

				FANUCCI
		Buon giorno.

Immediately, Vito turns back to his work, and Signor
Abbandando moves to Fanucci with a sigh.

Vito notices the two men talking quietly at one side of the
store, while he goes about his work.  Genco works his way
closer to his friend.

				GENCO
			(Sicilian)
		What did I tell you.  The one who
		cut him was found in an alley.  And
		the family of the others paid
		Fanucci all their savings to make
		him forswear his vengeance.

				VITO
			(Sicilian)
			 (surprised)
		And he agreed?

				GENCO
			(Sicilian)
		He took the money.  Now he wants
		double from everybody in the
		neighborhood, including Papa.

Vito watches the heated, but inevitable transaction.

				VITO
			(Sicilian)
			 (almost to himself)
		A real mafioso doesn't sell his
		vengeance.

MED. VIEW

Signor Abbandando seems to be arguing with Fanucci, and
every so often they turn and relate to where Vito is working.
Then Fanucci leaves, the little bell RINGING; and Signor
Abbandando reluctantly moves to Vito.

				SIG. ABBANDANDO
			(Sicilian)
		Vitone.  How is your son?

				VITO
			(Sicilian)
		We are all well.

It is clear that he has something difficult to tell the
young man.

				SIG. ABBANDANDO
		Vitone...I...Fanucci has a nephew.

Vito looks at him a while, as the old man struggles to tell
him.

				VITO
			(Sicilian)
		And you must give him my job.

The old man nods, regretfully.

				VITO
			(Sicilian)
		You have been kind to me since I
		was a boy; taken care of me, and
		been as a father.  I will always be
		grateful to you.  Thank you.

Vito takes off his apron, and leaves, passing the youth who
loiters by the counter.

EXT. THE STREET - DAY

making his way from the store.

				SIG. ABBANDANDO
			(Sicilian o.s.)
		Vitone!

He turns, and Abbandando has followed him out of the shop,
holding a basket of some groceries.

				SIG. ABBANDANDO
		Here...for your family.

				VITO
		No...please understand...I cannot
		accept.

INT. VITO'S TENEMENT - MED. VIEW - NIGHT

Vito and his wife sit quietly at the table; the two are
quiet and sad.

Suddenly, we HEAR a noise, and Vito is astonished to see a
young man, PETER CLEMENZA, leaning out of the window on the
other side of the air shaft which separates their apartments.

				CLEMENZA
		Hey Paisan!  Hold this for me until
		I ask for it.  Hurry up!

Automatically Vito reaches over to the empty space at the
air shaft, and takes the bundle of rags.  Clemenza's round
face is strained and urgent, obviously in some kind of
trouble.  Suddenly, he closes the window and there is
activity that we cannot see in the other apartment.

Vito looks to his wife, and then closes the window and
window dressing and takes the bundle into a private part of
his kitchen and begins to unwrap it.

WHAT HE SEES:

Five oily guns.  He immediately wraps them again, and
carries them to a private closet, and hides it, and returns
to his wife.  He sits down back at the table; and she knows
not to ask him what has happened.

EXT. NEW YORK STREETS - DAY

Vito is walking through the crowded streets with a group of
workmen; they all wear work clothes, and paper hats on their
heads.

Vito looks to his left, and realizes that Clemenza is
walking silently with him; by contrast, Clemenza dresses well.

				CLEMENZA
			(Sicilian)
			 (casually)
		Do you have my goods still?

Vito nods.

				CLEMENZA
			(Sicilian)
		Did you look inside?

Vito, his face impassive, shakes his head 'no.'

				CLEMENZA
			(Sicilian)
		I'm not interested in things that
		don't concern me.

INT. DOWNTOWN ITALIAN SOCIAL CLUB - DAY

Vito and Clemenza drinking wine; they've become friends.

				CLEMENZA
			(Sicilian)
		I have a friend who has a fine rug.
		Maybe your wife would like it.

				VITO
			(Sicilian)
		We have no money for a rug.

				CLEMENZA
			(Sicilian)
		No.  He would give it away.  I know
		how to repay a consideration.

Vito thinks, then nods.

				VITO
			(Sicilian)
		She would like it.

INT. HALLWAY WEALTHY APARTMENT BUILDING - DAY

The two men proceed up the hallway; Vito is impressed with
the opulence.

				VITO
			(Sicilian)
		Your friend lives in a fine building.

				CLEMENZA
			(Sicilian)
		Oh yes, the very best.

Clemenza knocks on the door as though he is well known here;
then rings.  No answer.

				CLEMENZA
			(Sicilian)
		Ah, he's not at home.  Oh, well, he
		wouldn't mind.

Quickly and expertly he takes out a tool and pries open the
door.

INT. WEALTHY APARTMENT - FULL VIEW - DAY

Vito looks in awe at the luxurious apartment, which features
a fabulous rich red wool rug.

Clemenza immediately moves some of the furniture away, and
drops to the floor.

				CLEMENZA
		A little help.

Vito joins him, and the two begin rolling the rug.  We HEAR
a BUZZER RING.  Clemenza immediately drops his side of the
roll, and moves to the window.  He pulls a gun from his
jacket.

VIEW ON Vito watching.  He moves so he can see out the window.

THEIR VIEW

A Policeman stands at the exterior door, waiting.  He rings
the buzzer again.

VIEW ON CLEMENZA

cocking his gun.  Vito realizes that if the Policeman should
pursue it any further he is a dead man.  The Policeman gives
up and leaves.

Clemenza puts away his gun.

INT. VITO'S TENEMENT HALLWAY - DAY

The two men run up the steps, laughing, carrying the fine rug.

INT. VITO'S TENEMENT - DAY

They are on their knees cutting the rug to fit the small
room.  Carmella watches, holding the baby SANTINO.

MED. CLOSE ON CLEMENZA

Like a professional, cutting quickly, with the proper tools.
He sings as he works.

							DISSOLVE TO:

EXT. DOWNTOWN WAREHOUSE - NIGHT

Clemenza knocks on the steel door of this downtown building.
Vito waits with him, holding some packages; and another
youth, TESSIO, tall and thin and deadly waits with them.

The door is lifted, and they are greeted by a bright,
middle-aged Italian named AUGUSTINO who leads them into a
machine shop.

INT. MACHINE SHOP - NIGHT

				CLEMENZA
			(Sicilian)
		Good, you waited for us.

Clemenza looks up on a higher level.

HIS VIEW

There is a nine year old boy, operating a drill press.

MED. VIEW

				TESSIO
			(Sicilian)
		Who is he?

				AUGUSTINO
			(Sicilian)
		My son, Carmine...it's all right.

The men then quickly open the packages they've brought;
revealing gun, including a more sophisticated machine weapon.

Augustino takes them and expertly begins to clean and
prepare them.

				CLEMENZA
			(Sicilian)
			 (to Vito)
		Paisan Augustino was a gunsmith in
		the Italian army.  We do each other
		favors.

				AUGUSTINO
			(Sicilian)
			 (while he works)
		My boy is studying the flute.  He
		plays very well.  He helps me at
		night so we can buy him a silver
		flute someday.  Now he has one made
		of wood.  Carmine...play...play for
		my friends.

VIEW ON THE BOY

wide-eyed... he shuts off the press; and takes out a shabby
wooden flute.  And begins to play a simple and pure melody.

CLOSE ON VITO

listening.

CLOSE ON AUGUSTINO

proudly smiling, as he prepares the machine gun.

CLOSE ON TESSIO

listening, smiling.

FULL VIEW

The men listening, as the boy's father prepares their guns.

EXT. WAREHOUSE AREA - NIGHT

Tessio and Clemenza quickly load racks of cheap dresses.

Vito sits behind the wheel of the truck.  He seems reserved,
and we get the impression that he is studying every move his
two friends are making.

INT. TENEMENT STAIRS - DAY

Clemenza runs up a flight of stairs with an armful of
dresses.  He knocks on a door, and a pretty HOUSEWIFE answers.

				CLEMENZA
			(Sicilian)
		Lady, I got a bargain on these
		dresses.  Five dollars each.  You
		gotta pay at least fifteen, maybe
		twenty in a store.  Look at them,
		first class.

He holds the dresses up and the woman seems interested.  She
handles a couple of them and stands aside so Clemenza can
enter her apartment.

				WOMAN
			(Sicilian)
		I don't know which one I like best.

She holds the dresses against her body, Clemenza approving
of each one; and then she goes to her purse and takes out
five singles and gives them to him.

				CLEMENZA
			(Sicilian)
		You'd look beautiful in all of
		these.  You should buy at least two.

				WOMAN
			(Sicilian)
		Are you kidding?  My husband will
		kill me if he knows I paid five
		dollars for one dress.

She holds one up, then another.  She is torn.  Clemenza
shakes his head and straightens the dress on her body.  His
hand brushes her arm; she looks at him smiling.

				CLEMENZA
			(Sicilian)
		You can have two for five.

She smiles back.

EXT. TENEMENT BUILDING - DAY

Clemenza jumps down the stairs, and out to the middle of the
street, where Vito and Tessio are waiting in the car with
some of the stock.

				TESSIO
			(Sicilian)
		What took so long?

				CLEMENZA
			(Sicilian)
		She couldn't decide.

Tessio and Clemenza each take more armsful of dresses and
divide the neighborhood.

				CLEMENZA
			(Sicilian)
		Vito, take the rest of the stock
		over to Dandine's warehouse; he'll
		move it to a wholesaler.

The three part.  Vito drives the truck off.

MOVING VIEW

Vito drives the truck through the downtown streets; he turns
a corner and stops for a light.

Suddenly, to his left, he sees the formidable figure of
Fanucci.

He grabs young Corleone by the shoulder.

CLOSE VIEW ON FANUCCI

frightening, revealing the large circular scar, now healed.

				FANUCCI
			(Sicilian)
		Ahhh, young fellow.  People tell me
		you're rich, you and your two
		friends.  Yet, you don't show
		enough respect to send a few
		dresses to my home.  You know I
		have three daughters.

Vito says nothing.  Fanucci thumbs through the stock.

				FANUCCI
			(Sicilian)
		This is my neighborhood.  You and
		your friends have to show me a
		little respect, ah?  This truck you
		hijacked was in my neighborhood.
		You should let me wet my beak a
		little.

Fanucci takes a few of the dresses.

				FANUCCI
			(Sicilian)
		I understand each of you cleared
		around six hundred dollars.  I
		expect two hundred dollars for my
		protection and I'll forget the
		insult.  After all, young people
		don't know the courtesies due a man
		like myself.

Vito smiles at him and nods.

				FANUCCI
			(Sicilian)
		Otherwise the police will come to
		see you and your wife and children
		will be dishonored and destitute.
		Of course, if my information as to
		your gains is incorrect, I'll dip
		my beak just a little.  Just a
		little, but no less than one
		hundred dollars, and don't try to
		deceive me, eh paisan?

				VITO
			(Sicilian)
			 (quietly)
		My two friends have my share of the
		money.  I'll have to speak to them
		after we deliver these to the
		wholesaler.

				FANUCCI
			(Sicilian)
		You tell your friends I expect them
		to let me wet my beak in the same
		manner.  Don't be afraid to tell
		them.  Clemenza and I know each
		other well, he understands these
		things.  Let yourself be guided by
		him.  He has more experience in
		these matters.

				VITO
			(Sicilian)
			 (shrugging innocently)
		You must understand, this is all
		new to me...

				FANUCCI
			(Sicilian)
		I understand...

				VITO
			(Sicilian)
		But thank you for speaking to me as
		a Godfather.

				FANUCCI
			(Sicilian)
			 (impressed)
		You're a good fellow.

He takes Vito's hands and clasps them in his own.

				FANUCCI
			(Sicilian)
		You have respect.  A fine thing in
		the young.  Next time, speak to me
		first, eh?  Perhaps I can help you
		make your plans.

Fanucci turns with the dresses draped over his arms, waving
to Vito.

Vito throws the truck in gear, and drives off.

CLOSE VIEW ON VITO

We know that throughout this encounter he has seethed with
an icy rage.

INT. VITO'S APARTMENT - MED. VIEW - NIGHT

He wife serves a dinner for her husband and his two friends.
They discuss Fanucci as they eat.

				TESSIO
			(Sicilian)
		Do you think he'd be satisfied with
		the two hundred dollars?  I think
		he would.

				CLEMENZA
			(Sicilian)
		That scar-faced bastard will find
		out what we got from the wholesaler.
		He won't take a dime less than
		three hundred dollars.

				TESSIO
			(Sicilian)
		What if we don't pay?

				CLEMENZA
			(Sicilian)
			 (gestures, it's hopeless)
		You know his friends...real animals.
		And his connections with the police.
		Sure he'd like us to tell him our
		plans so he can set us up for the
		cops and earn their gratitude.
		Then they would owe him a favor;
		that's how he operates.  We'll have
		to pay.  Three hundred, are we
		agreed?

				TESSIO
			(Sicilian)
		What can we do?

Clemenza doesn't even bother checking for Vito's opinion.

				CLEMENZA
			(Sicilian)
		They say Fanucci has a license from
		Maranzalla himself to work this
		neighborhood.

				VITO
			(Sicilian)
		If you like, why not give me fifty
		dollars each to pay Fanucci.  I
		guarantee he will accept that
		amount from me.

				TESSIO
			(Sicilian)
		When Fanucci says two hundred he
		means two hundred.  You can't talk
		with him.

				VITO
			(Sicilian)
		I'll reason with him.  Leave
		everything in my hands.  I'll
		settle this problem to your
		satisfaction.

Tessio and Clemenza regard him suspiciously.

				VITO
			(Sicilian)
		I never lie to people I've accepted
		as my friends.  Speak to Fanucci
		yourself tomorrow.  Let him ask you
		for the money, but don't pay it,
		and don't in any way quarrel with
		him.  Tell him you have to get the
		money and will send me as your
		messenger.  Let him understand that
		you're willing to pay what he asks,
		don't bargain.  I'll go to his
		house, and quarrel with him.  He
		likes me; enjoys explaining how
		things are here.  He uses ten
		sentences when he only needs one,
		so while he talks, I'll kill him.

Clemenza, irritated, makes a large belch, and washes his
food down with wine.

				CLEMENZA
			(Sicilian)
		Vitone!
			(to Tessio)
		Our driver has drunk too much wine.

				TESSIO
			(Sicilian)
			 (laughs at himself)
		He's going to kill Fanucci.

				CLEMENZA
			(Sicilian)
			 (stern)
		Then, after that, what?  Joe
		'Little Knife' Pisani; Willie
		Bufalino, maybe, Mr. Maranzalla
		himself, c'mon!

				VITO
			(Sicilian)
		Fanucci is not connected; he is
		alone.

				CLEMENZA
			(Sicilian)
			 (sarcastically)
		What?  You read it in the papers?

				VITO
			(Sicilian)
		This man informs to the police;
		this man allows his vengeance to be
		bought off... No, he is alone.

				TESSIO
			(Sicilian)
		If you're wrong...

				VITO
			(Sicilian)
		If I'm wrong, they will kill me.

Both Clemenza and Tessio are impressed with their young
friend; his willingness to risk his life on his perception
of the situation.

EXT. NEW YORK STREET - MED. VIEW - NIGHT

A ten piece Italian street band plays in front of the church
to commemorate the first night of the Festa di San Gennaro.
People swarm in crowds, delighted by the music, as out of
the church four men carry the statue of San Gennaro down to
the street.

MOVING VIEW

Clemenza moves along the booths that have been set up along
the street: sausage cooking on an open fire; pork livers and
sweetbeards.  He stops for a sandwich, and makes an irritated
gesture when the vendor expects to be paid.  He crosses to a
church-sponsored booth with a great Wheel of Fortune, and
slaps a dollar on a number.  Standing next to him is Vito;
they embrace.

				CLEMENZA
			(Sicilian)
			 (quietly)
		All three daughters are at church;
		he is alone.  It's important that
		you let his neighbors see you leave.
		Tessio has broken the latch on the
		skylight of his building.

The wheel stops; they both lose.

				CLEMENZA
			(English)
		See, Brother Carmello, even the
		church makes numbers.

				PRIEST
			(English)
		It's only the way we collect that's
		different.

Vito has left while Clemenza jokes with the Priest.

EXT. FESTA STREET - NIGHT

Vito passes the booths of food, crossing toward a small and
dark club.

INT. SOCIAL CLUB - NIGHT

We can still HEAR the crowds and music of the festa.  Vito
enters; the club is empty, except for the large white figure
sitting alone at a small table.  Fanucci barely acknowledges
Vito as he joins him.

Without a word, Vito counts out two hundred dollars on the
table.  Fanucci looks, then takes off his fedora and puts it
on the table over the money.

				FANUCCI
			(Sicilian)
		I think there's only two hundred
		dollars under my hat.
			(he peeks)
		I'm right.  Only two hundred dollars.

				VITO
			(Sicilian)
		I'm a little short.  I've been out
		of work.  Let me owe you the money
		for a few weeks.

				FANUCCI
			(Sicilian)
		Ah, you're a sharp young fellow.
		How is it I've never noticed you
		before
			(he takes the two
			hundred and pours
			some wine for Vito)
		You're too quiet for your own
		interest.  I could find some work
		for you to do that would be very
		profitable.
			(he rises)
		No hard feelings, eh?  If I can
		ever do you a service let me know.
		You've done a good job for yourself
		tonight.

EXT. FESTA STREET - MED. VIEW - NIGHT

By now the musicians have left, but still families are
walking the street, and stopping at the booths.

Fanucci stands there a moment; he is known by everyone, and
considers himself highly loved.

Then Fanucci begins the walk through the festa, on his way
home.

EXT. ROOFTOPS - MOVING VIEW - NIGHT

Vito silently moves along the rooftop; paralleling Fanucci's
walk.

We HEAR the sounds of the festa, and every so often catch a
glimpse of the patterned lights, and the crowds below.

EXT. FESTA STREETS - MOVING VIEW ON FANUCCI - NIGHT

walking through the crowded streets.  The statue of San
Gennaro is arranged in some midnight religious ceremony.

The VIEW LIFTS UP, to the rooftops.

EXT. THE ROOFTOPS - NIGHT

Vito makes the leap that separates two buildings; then
crosses toward the large skylight in the center of the
building.

EXT. THE STREETS - NIGHT

The procession in the streets is preceded by ten altar boys;
and the glittering Monstrance, something of an altar carried
out into the streets.

The priest begins this nocturnal service, as the crowds in
the street kneel down in prayer.

INT. FANUCCI'S BUILDING - NIGHT

Fanucci unlocks the door to his building; we can HEAR the
services in the background.

EXT. THE ROOFTOP - NIGHT

Vito tries the trap door on the roof; it is stuck firmly
shut; despite Clemenza's instructions.  He struggles with
it, but no luck.

From the distance, the Choir begins to Latin.  Vito moves
around the skylight, to an identical trap, tries this one;
it opens.

EXT. THE MONSTRANCE - MED. VIEW ON THE PRIEST - NIGHT

performing the services in Latin.  The ten altar boys are in
attendance.

EXT. THE ROOFTOP - NIGHT

Vito reaches down into the trap, and pulls out the newly
oiled gun that has been left for him.  He slides down into
the building.

INT. FANUCCI'S HALLWAY - DOWN ANGLE - NIGHT

Fanucci proceeds up the staircase with loud, heavy steps.
An OLD WOMAN on one of the flights sees him, and immediately
moves to her apartment.

				FANUCCI
			(Sicilian)
		What's the matter, Signora?  You
		don't say 'good evening'?

				WOMAN
			(Sicilian)
		'Good evening,' Signor Fanucci.

She quickly disappears behind her door.  Fanucci laughs,
continues up, singing to himself.  The MASS outside is
always in evidence.

INT. HALLWAY - NIGHT

Vito climbs down from the attic, and finds Fanucci's rear
door open.  He slips in, and makes his way past the open
windows, out of which pour the music and chanting of the
Mass.  Slowly and quietly he pulls them down, shut.

He moves quietly to a glass door, and peeks out.

WHAT HE SEES:

Three young women, Fanucci's DAUGHTERS, laughing and talking.

VIEW ON VITO

A slip up.  Tessio had said they were out.  He steps outside
to the alley where he can look into the apartment.

ANOTHER VIEW

Fanucci opens the door of his apartment, and enters.

CLOSE VIEW ON VITO

He begins to wrap the gun.

VIEW ON THE DAUGHTERS

Their father greets them with a kiss; and a little religious
gift he has bought for each.

CLOSE ON THE GUN

wrapped in this primitive method of a silencer.  The VIEW
TILTS to Vito, caught in the dilemma of having to kill all
or none of them.  Then something catches his eye.

WHAT HE SEES:

A small gray alley cat is attracted to the young man, comes
up to him and rubs itself against him.  Vito rubs the
animal, speaking softly in Sicilian, then, gaining its
confidence, lifts it up and carefully lets it into Fanucci's
apartment.

He steps back, holding the gun.  We HEAR some Italian
shouted in the house; a loud sound from the cat, and some of
the thumping footsteps of Fanucci.

VIEW ON VITO

holding the wrapped gun, waiting.

WHAT HE SEES:

The white blob of Fanucci opening the door and cursing in
Italian as he throws the cat out.

VIEW ON VITO

squeezing the trigger; the muffled, but still LOUD BLAST
resounding in the building.

VIEW ON FANUCCI

He holds onto the door frame, trying to stand erect, trying
to reach for his gun.  The force of his struggle has torn
the buttons off his jacket and made it swing loose.  His gun
is exposed but so is a spidery vein on the white shirtfront
of his stomach.  Carefully, as if plunging a needle into
this vein, Vito Corleone fires a second bullet.

Fanucci falls to his knees, propping the door open, giving a
terrible groan.  We begin to hear the VOICES of girls inside
the apartment.

Vito quickly opens his wallet, removes the two hundred,
quickly fires one last bullet into Fanucci's sweaty cheek.
Now the towel the gun was wrapped with catches fire,
literally on Vito's hand; quickly he throws it to the
ground, stamps it out...and disappears upward.

EXT. THE ROOFTOPS - MED. VIEW - NIGHT

Vito moves like a cat along the rooftops; we HEAR the
conclusion of the Mass down below.

CLOSE ON VITO

Pausing; we can SEE the great spectacle of lights and
candles on the streets below.

He empties the gun of bullets and smashes the barrel against
the side of the roof ledge.  He reverses it in his hand, and
breaks the butt into two separate halves against the chimney.
He smashes it again, and the pistol breaks into barrel and
handle, two separate pieces.

He then moves along the rooftop, dropping the two separate
fragments into various air shafts.

EXT. THE STREET PROCESSION - MOVING VIEW - NIGHT

The Priest, having completed the ceremony, follows as the
Monstrance is carried off through the streets, as the Choir
sings.

EXT. THE ROOFTOPS - MOVING VIEW - NIGHT

Vito is a dark figure, moving with agility across the
rooftops.

INT. FANUCCI'S VESTIBULE - MED. VIEW - NIGHT

The corpse that was Fanucci, stained with blood.

EXT. PROCESSION - CLOSE VIEW - NIGHT

The statue of San Gennaro, followed by the altar boys.

EXT. CORLEONE TENEMENT - MED. VIEW - NIGHT

Vito's wife; her baby and several friends and neighbors sit
happily on the front stoop of their tenement.  Some of the
men drink wine poured out of a pitcher; we can still HEAR
the music and night sounds of the Festa.

A neighbor is singing a Neapolitan song.

Quietly, without a word, and with only a momentary glance
from his wife, Vito joins the little group; takes a glass of
wine, and listens to the song.

CLOSE VIEW ON VITO

listening to the song.  He reaches out and takes the small
hand of his son.

				VITO
			(Sicilian)
		Santino, your papa loves you.

							DISSOLVE TO:

INT. ANTHONY'S TAHOE ROOM - NIGHT

The room is large, lit from the outside by a bright evening.
We can see the outline of many toys on the shelves built
along the wall.  We see the dark figure of Michael Corleone
enter the room and approach the bed where his son Anthony
lies curled in messy blankets.  Michael quietly arranges his
small hands and feet and covers the little boy.  Suddenly,
Anthony turns, his eyes open.  He is staring, perfectly
awake, at his father.

				MICHAEL
		Can't you sleep?

No answer.

				MICHAEL
		Are you alright?

				ANTHONY
		Yes.

				MICHAEL
		Did you like your party?

				ANTHONY
		I got lots of presents.

				MICHAEL
		Do you like them?

				ANTHONY
		I didn't know the people who gave
		them to me.

				MICHAEL
		They were friends.

He kisses his boy, and then turns.

				ANTHONY
		Did you see my present for you?

				MICHAEL
		No, where is it?

				ANTHONY
		On your pillow.

				MICHAEL
		I'm leaving very early tomorrow,
		before you wake up.

				ANTHONY
		I know.  How long will you be gone?

				MICHAEL
		Just a few days.

				ANTHONY
		Will you take me?

				MICHAEL
		I can't.

				ANTHONY
		Why do you have to go?

				MICHAEL
		To do business.

				ANTHONY
		I can help you.

				MICHAEL
		Some day you will.

Michael kisses him again.

INT. MICHAEL-KAY'S BEDROOM - NIGHT

The room is lit from a small night lamp on Michael's side of
the large bed.

Kay is huddled in blankets, asleep.  Michael closes the door
to his room, moves to his side of the bed, and glances down
to the pillow.

VIEW ON THE PILLOW

is a child's drawing of a long limousine, with a man in a
hat sitting in the back seat.

An arrow pointing to him is marked "DAD." Under it, a nine
year old's handwriting says: "Do you like it?  Check YES __
I liked it or NO __ I didn't like it." Michael turns,
looking for a pencil, and moves to the dresser, where he
places a check next to "YES."

He starts to cross back toward his side of the bed, when Kay
turns, almost in her sleep:

				KAY
		Michael?  Why are the drapes open?

His eyes dart back to the curved, beautifully leaded windows
of the room.  The DRAPES are opened.  Then, without a
second's hesitation, he leaps to the floor, still holding
his son's drawing, as a spray of machine gun bullets sweep
across the windows; glass shattering all over the room.

Kay screams out; rising, still half-asleep.  Michael crawls
toward her, and pulls her down to the floor to him.

Then, for a moment, there is silence, soon filled by the
shouts of men; as flashes of light sweep by the window, as
guards with flashlights come running.

Michael holds Kay to him, knowing they have both survived,
and then gently:

				MICHAEL
		Go with the kids.

EXT. THE TAHOE ESTATE - NIGHT

Suddenly, the great floodlights are turned on, bathing lawns
in an intense blue light.

Groups of ordinarily dressed security men drawn in from all
directions; a state of confusion prevails.  There is no sign
of the attackers.

VIEW BY MICHAEL'S HOUSE

Michael is joined by Rocco Lampone, his gun drawn.

				ROCCO
		They're still on the property.
		Maybe you better stay inside.

				MICHAEL
		Keep them alive.

Six men take up posts by Michael's house.

				ROCCO
		We'll try.

				MICHAEL
		It's important.

He returns inside.

EXT. MAIN GATE AND KENNELS - NIGHT

The character of the summer estate has changed: bright
floodlights illuminate the main points of entry: the main
gate; the waterway; the stone wall that encompasses the
estate on all sides.

Several men with flashlights reinforce the guard at the main
gate.

FULL VIEW

Off in the distance, we see another group of men with
flashlights combing the waterline.  We hear indistinguishable
shouts.

VIEW ON THE KENNELS

The wire gates are opened, and the trained dogs go out
yelping into the outer edge of the estate.

ROOFTOP

One of Rocco's men turns the large floodlight scanning
darkened forest areas, where men could hide.

MOVING VIEW

Men with flashlights and dogs.  Moving through the dark areas.

LOOSE VIEW

A small Corleone launch, with a bright spotlight slowly
cruises the boundaries of the estate.  We SEE the silhouette
of men with guns, quietly waiting and watching.

EXT. MICHAEL'S HOUSE - NIGHT

Some of the bodyguards by the shattered windows of Michael's
bedroom.

The curtains are drawn from inside.

INT. MICHAEL'S HOUSE - NIGHT

Kay, the children, and some women servants have come down
from the various rooms into the central living area, that
can be most easily secured.  The little girl is still
asleep; they make you think of an immigrant family, with
their blankets and frightened faces, all waiting in a
central room.

Michael goes up to Kay, squeezes her hand, and whispers:

				MICHAEL
		It will be all right.  We were lucky.

She says nothing; but her face expresses the anger she feels
over the jeopardy Michael has placed his children in.  She
holds her young daughter in her arms.

The door opens, and Rocco enters.  He quickly realizes he is
holding his gun in plain view in front of the family, and
puts it away.  Michael moves to him, and they talk a distance
away from Kay.

				ROCCO
		Your family all seem to be okay in
		the other houses; your Mother's
		still sleeping.

				MICHAEL
		And?

				ROCCO
		No sign of them yet; but they're
		still on the Estate.

We HEAR loud shouting from outside.

				DEANNA (O.S.)
		Goddamn you!  You're all nuts here,
		I'm not goin' to calm down...

MICHAEL'S VIEW

Through the door, that Rocco opens.

Deanna, in her nightgown, has been frightened by the
gunshots; while Fredo in his bathrobe, tries to get her back
into the house.

				FREDO
		Deanna, will you get back into the
		house!

				DEANNA
		I'm getting out of here I said;
		these guys all have guns!

				MICHAEL
		Fredo, can't you shut that woman up!
			(to Rocco's men)
		Get her in here!

The bodyguards, gracefully help Fredo bring the hysterical
Deanna into the safety of the house.

				DEANNA
			(whimpering)
		I don't want to stay here...

				FREDO
		Mike, what can I do, she's a
		hysterical woman...

				KAY
		Leave her alone!  You're talking as
		though she has no right to be
		frightened when there are machine
		guns going off in her backyard.

				MICHAEL
			(to Rocco)
		Have Tom Hagen meet me in the
		Harbor House.

EXT. TAHOE ESTATE - HIGH ANGLE - NIGHT

Michael walks the short distance from his house, to the
boathouse where he conducts his business away from his family.

A small group of bodyguards, carrying machine guns, make the
walk with him from all sides, a respectful distance away.
It gives the appearance of a lonely President moving in his
compound, followed by teams of Secret Service men.

The boathouse is already secured by teams of men, hastily
wakened from their lodge house; a barracks-like structure
where reinforcements are lodged just for this kind of
emergency.

FULL VIEW

In the distance, we can see the teams of men and dogs, with
their lights, guns and shouts, combing every inch of the
estate.

INT. THE BOATHOUSE - EMPTY VIEW - NIGHT

Michael alone in the great room.  He moves to a walk-in
safe, quickly runs through the combination, and opens it.
He takes out an envelope, and puts it into his pocket;
there's a KNOCK on the door, and Hagen enters.  He had been
asleep, and has quickly thrown on a robe.

				MICHAEL
		Sit down, Tom.

EXT. TAHOE BOATHOUSE - NIGHT

From outside the leaded windows, a disoriented Hagen sits
down; Michael starts to talk to him; obviously about
something very serious.

The patrol securing the boathouse, walk past the window.
Michael says something to Tom, who rises, and pulls the
drapes, obscuring OUR VIEW.

INT. TAHOE BOATHOUSE - NIGHT

Michael talks intimately to Tom.

				MICHAEL
		There's a lot I can't tell you, Tom.
		I know that's upset you in the
		past; and you've felt that it was
		because of some lack of trust or
		confidence.  But it is because I do
		trust you that I've kept so much
		secret from you.  It's precisely
		that at this moment, you are the
		only one that I can completely
		trust.  In time, you'll understand
		everything.

				HAGEN
			(nods with this statement)
		But your people... Neri... Rocco;
		you don't think...

				MICHAEL
		No, I have confidence in their
		loyalty... but this is life and
		death, and Tom, you are my brother.

Hagen in very moved.

				HAGEN
		Mikey, I hoped...

				MICHAEL
		No Tom, just listen.  All my people
		are businessmen; their loyalty is
		based on that.  One thing I learned
		from my father is to try to think
		as the people around you think...and
		on that basis, anything is possible.
		Fredo has a good heart, but he is
		weak...and stupid, and stupid
		people are the most dangerous of
		all.  I've kept you out of things,
		Tom, because I've always known that
		your instincts were legitimate, and
		I wanted you to know very little of
		things that would make you an
		accomplice, for your own protection.
		I never blamed you for the setbacks
		the family took under Sonny; I know
		you were in a position of limited
		power, and you did your best to
		advise and caution him.  What I am
		saying is that now, for how long I
		do not know, you will be the Don.
		If what I think has happened is
		true; I will leave tonight, and
		absolutely no one will know how to
		contact me.  And even you are not
		to try to reach me unless it is
		absolutely necessary.  I give you
		complete power: over Neri... Fredo,
		everyone.  I am trusting you with
		the lives of my wife and children,
		and the future of this family,
		solely resting on your judgment and
		talent.

VIEW ON HAGEN

A man who has steadily declined over the last five years,
realizing that total power and responsibility is being
placed on him.

				MICHAEL
			(continuing)
		...But Tom, you must know that I do
		this only because I believe you are
		the only one who is capable of
		taking over for me.

VIEW ON MICHAEL

taking out the envelope.

				MICHAEL
		I've prepared this; have had it for
		over a month.  It won't explain
		everything; but indicates where I
		will be, so in a sense, it is my
		life.
			(he hands the envelope
			to Hagen)
		Also, there are three tasks that
		must be executed immediately.  Pop
		would have given those to Luca --
		You knew Pop as well as anyone, act
		as though you were him.  It
		discusses Kay as well; that will be
		the most difficult.  The men who
		tried to kill me tonight, will
		never leave the estate.

				HAGEN
		Will we...be able to get who
		ordered it out of them?

				MICHAEL
		I don't think so.  Unless I'm very
		wrong...they're already dead.
		Killed by someone inside...very
		frightened that they botched it.
		That's why I am going to disappear
		in a few minutes, and leave
		everything to you.

				HAGEN
		But if you're wrong...

				MICHAEL
		If I'm wrong...

There is a KNOCK on the door.

				MICHAEL
		...I don't think I'm wrong.
			(he indicates the knock)
		Yes.

The door opens; it is Rocco; Michael rises, after making a
knowing glance toward Tom, and moves to talk quietly to a
frightened and agitated Rocco.

EXT. STONE WALL AND STREAM - MOVING VIEW - NIGHT

A group of men with flashlights and guns lead Michael, Tom
and Rocco to the stone bridge spanning the stream which runs
through the estate.

LOW CLOSE VIEW

Michael's dispassionate face, looking down.  THE VIEW MOVES
to Hagen's, and then down to the murky water under the
bridge, where we see the bodies of three strangers, lying in
the moving water; machine-type guns nearby, with their
throats cut.  Light from the many flashlights illuminates
the grotesque scene.

				MICHAEL (O.S.)
		Fish them out.

Several of the men wade down into the stream; Rocco helps,
and even Tom steps down to get a better look at who they
were.  They are total strangers; Rocco examines the type of
guns they used.

When they climb back onto the ground, Michael is gone.
Everyone notices it, but no one says anything.

Hagen stands there, holding the envelope Michael had given
him in his hand.

He realizes that now, he is the DON.

				HAGEN
		Get rid of the bodies.  Tomorrow
		morning I want a report made to the
		local police, and paper, that some
		explosives we keep on the property
		were accidentally ignited.

The men respond; Hagen makes the lonely walk back to the
lighted section of the compound, which now resembles a
prison camp.

							FADE OUT.

FADE IN:

INT. STATE SENATE FLOOR - DAY

The Senate is in session; Senator Geary is on the floor
during a vote.  An aide approaches him, with a slip of paper.

INT. GEARY'S OFFICE - DAY

The Senator steps behind his desk.

				SENATOR GEARY
		All right, Mr. Hagen, you've got
		ten minutes.

He flicks the switch of a small tape recorder.

				SENATOR GEARY
		...and the tape will be running.

				HAGEN
		Actually, I've come with good news;
		the Corleone family has done you a
		favor.

The Senator immediately shuts the tape recorder off.

				SENATOR GEARY
		What the hell are you talking about?

				HAGEN
		We know you're a busy man, with
		plenty of enemies -- we saw the
		opportunity to do you a favor, and
		we did.  No strings.

				SENATOR GEARY
		No strings.

				HAGEN
		You know there's a Senate
		Investigating Committee recently
		set up; we thought it would be
		unfortunate if they were to trace
		anything though-provoking to your
		name.

				SENATOR GEARY
		No one can trace anything to me; I
		pride myself on that.

				HAGEN
		Do you gamble?

				SENATOR GEARY
		A little; what's so thought-
		provoking about that?

				HAGEN
		Do you owe markers?

				SENATOR GEARY
		Maybe two, three thousand dollars.

Hagen leans forward, and deposits a handful of paper on the
Senator's desk.

				HAGEN
		The Corleone family has paid them
		off for you...as an expression of
		our esteem.

Geary quickly looks through the paid markers.

				SENATOR GEARY
		There's thirty grand worth of paid
		off markers -- I never owed that
		much.

				HAGEN
		Our mistake.  But what does it
		matter; it was our money.
			(rising)
		We don't even expect thanks.

				SENATOR GEARY
		You paid off thirty grand I never
		owed.

				HAGEN
		We'll keep it quiet; the people who
		know are trustworthy...the Committee
		needn't find out.

				SENATOR GEARY
		And what's the price of their not
		finding out.

				HAGEN
		Simple.  Be friendly like us.  Not
		hostile.

				SENATOR GEARY
			(he despises Hagen)
		Thanks...friend.

EXT. TAHOE ESTATE - FULL VIEW - DAY

There are more men on duty than usual; not that there are
guns apparent, but it's clear that the boundaries are being
patrolled.

VIEW BY MICHAEL'S HOUSE

Kay exits her house, followed by her children; she helps
them into her station wagon like any housewife, and drives
along the path leading to the main gate.

She's about to drive through, when one of the men steps in
front of her, raising his hand.

				KAY
			(graciously)
		Yes.

				MAN
		I'm sorry, Mrs. Corleone.  We're
		not to let you through.

				KAY
			(disbelieving)
		I'm going to the market.

				MAN
		If you could just give us a list,
		we'll pick up anything you want.

				KAY
		Whose orders are these?

				MAN
		Mr. Hagen's, ma'am.

We notice Hagen walking to them in the background.

				HAGEN
		Kay.

VIEW THROUGH THE GATE

Hagen approaches the car; Kay gets out so they can talk away
from the children.

				HAGEN
		I wanted to explain this myself...
		I had business in Carson City.

He walks with her a little way from the others; the children
run out of the station wagon, and start to play.

				HAGEN
		It's Michael's request...for your
		safety.  We can send out for
		anything you need.

				KAY
		I'm supposed to stay in my house.

				HAGEN
		Within the compound will be fine.

				KAY
		I was supposed to take the children
		to New England next week.

				HAGEN
		That's off now.

				KAY
		I'm going to see my parents.

				HAGEN
		Kay, Michael didn't tell me a lot;
		and what he did tell me, I can't
		repeat.  But the responsibility for
		you and the kids was the most
		important thing he left me with.

				KAY
		How long does this go on?

				HAGEN
		I don't know.
			(pause)
		I'm sorry, Kay...

				KAY
		Am I a prisoner?

				HAGEN
		That's not the way we look at it.

Angrily, without another word, Kay turns away from him, and
walks to her children, ignoring the running station wagon.

EXT. ITALIAN LUXURY LINER - DAY

The luxury liner making its way across the Atlantic.

INT. ITALIAN LUXURY LINER - MED. VIEW - DAY

The PURSER followed by several white uniformed associates
knocks on the door of something designated the "Leonardo
Suite." He is holding a telegram.

The door opens, and a tanned Merle peeks out of the door.

				PURSER
			(holding up the telegram)
		I'm terribly sorry to disturb you
		but we have received two telegrams.

				MERLE
			(reluctantly)
		Well...come in.

This entourage enters the suite, an impressive and
beautifully spacious luxury suite.  Connie is relaxing.

				CONNIE
		What is it?

				PURSER
		Yes.  One is from our office in New
		York.  The check that you wrote for
		your passage has been returned.

				CONNIE
		Can't be...

				MERLE
		Why don't you wire your bank?

				PURSER
		The other telegram is from your
		bank.  Your account has been closed
		and the company is warned not to
		extend any credit.

				CONNIE
		I'll take care of it in Naples.

				PURSER
		The company hopes so.  But for now,
		we have orders to change your
		accommodations.

And with that, the men in white begin to pack Connie and
Merle's luggage.

				CONNIE
		That son of a bitch!

INT. ITALIAN LUXURY LINER - TINY THIRD CLASS CABIN - NIGHT

Connie and Merle are attempting to sleep in the miniature
cabin in bunk beds.  The little space is crowded with their
trunks and luggage.  Merle can barely hang onto the bunk,
the boat pitches so violently below.

EXT. TRAIN TRACKS - MED. VIEW - DAY

A train speeds across the countryside.

INT. THE TRAIN - DAY

Inside the corridor, a porter advances, and knocks on the
door of a stateroom.  A voice tells him to enter.  OUR VIEW
enters with him as he carries a tray of lunch.  From this
POV we see Michael Corleone sitting in the compartment.

				PORTER
		Mr. Paul?

				MICHAEL
		Yes.

				PORTER
		You ordered lunch?

				MICHAEL
		Put it right there.

The porter does so; as he places the tray down, he catches a
glimpse of a second person in the compartment with Michael.

HIS VIEW

A very fierce, almost maniacal looking man, BUSSETTA.  He
nods that the porter should leave.

				MICHAEL
		Thank you.

The porter takes his advice and leaves quickly, closing the
door behind him.

VIEW THROUGH THE WINDOW

Michael and his mysterious companion have lunch together on
the moving train.

EXT. GULFSTREAM RACE TRACK IN MIAMI - HIGH FULL VIEW - DAY

The empty parking lot of the Gulfstream track, on an off-
race day.

CLOSER VIEW

Michael sits behind the wheel of a nondescript late model
car.  Bussetta sits in the rear.

Another car swings into the lot.  Michael starts his car,
and pulls out of the lot; the second car following.

NEW VIEW

This car pulls out and begins to follow them.  Michael
glances back by adjusting the rear view mirror, and nods to
Bussetta.

Michael's car begins to slow down, allowing the other car to
overtake them.

The overtaking car hesitates a moment, moving side by side
with them.

Michael glances toward the driver.

MICHAEL'S VIEW

We recognize Johnny Ola, who waves a greeting to Michael,
and then continues on to lead him.

EXT. SUBURBAN MIAMI NEIGHBORHOOD - DAY

Ola's car leads Michael's through a middle-class suburban
area of $30,000 to $40,000 homes.  There are small channels
with sporting and fishing boats parked near the houses.
Ola's car pulls up in front of a very simple, tract-type
home.  Michael's car parks nearby.

				MICHAEL
			(Sicilian)
			 (to Bussetta)
		You'll wait in the car.

Ola has gotten out of his car and walks up the little path
to the front door.  Michael waits.

Ola rings the bell, and after a moment, a rather pretty,
middle-aged WOMAN answers, remaining behind the screen door.
Ola says a few things to her and she disappears, leaving the
door open.

Ola comes down the steps, looks at Michael, nodding to him.
Ola then gets into his car and drives off.  Michael walks up
the walkway and enters the little house, closing the door
behind him.

This woman, TERRI ROTH, is in the kitchen, looking out at
Michael.

				TERRI
		I'm just going to make lunch.  How
		about a tuna fish sandwich?

				MICHAEL
		Thank you, Mrs. Roth.

She hurries halfway up the staircase.

				TERRI
		Hyman...HYMAN, your friend is here.
			(turning to Michael)
		Why don't you go right upstairs, Mr.
		Paul?

				MICHAEL
		Fine.

He continues upstairs; she goes into the kitchen.

				TERRI
		I'll give a yell when lunch is ready.

Michael continues up to a small den on the second floor; we
can HEAR the sound of a baseball game coming over the
television.

INT. HYMAN ROTH'S DEN - DAY

Michael enters the den: it's very comfortable, but somewhat
like a senior citizen's retirement home in Florida.

MICHAEL'S VIEW

There, sitting before the television is a small man in his
middle sixties, thin, with a wizened face, looking like a
small-time retired Jewish businessman.  This is HYMAN ROTH.

				ROTH
		Sit down, this is almost over.  You
		follow the baseball games?

				MICHAEL
		Not for a few years.

				ROTH
		I like sporting events -- I really
		enjoy watching them in the afternoon.
		One of the things I love about this
		country.  I loved baseball ever
		since Arnold Rothstein fixed the
		World Series of 1919...I heard you
		had some trouble.

				MICHAEL
		Yes.

				ROTH
		What a mistake; people behaving
		like that, with guns.
			(he shakes his head)
		It was my understanding we left all
		that behind.  But, let me tell you,
		the important thing is that you're
		all right.  Good health is the most
		important thing; more than success;
		more than power; more than money.

				MICHAEL
		The incident of the other night is
		a nuisance that I can take care of.
		I came to you because I want
		nothing to affect our agreement; I
		wanted to clear everything I'm
		going to do with you, just in case.

				ROTH
		You're a considerate young man.

				MICHAEL
		You're a great man, Mr. Roth, I
		have much to learn from you.

				ROTH
			(warmly)
		However I can help you...

				MICHAEL
		The Rosato Brothers have performed
		services for you in the past; I
		understand that they are under your
		protection.

				ROTH
			(simply)
		We do favors for each other...

				MICHAEL
		Technically, they are still under
		the Clemenza wing of the Corleone
		Family, now run by Frankie
		Pentangeli.  After Clemenza died,
		the Rosatos wanted territory of
		their own.  Pentangeli refused, and
		came to me, asking for permission
		to eliminate them.  I, of course,
		knew of their relationship with
		you, and in gratitude for your help
		with the Tropicana matter, turned
		him down.  Pentangeli was furious,
		and paid one hundred and fifty
		thousand dollars to have me killed.
		I was lucky and he was stupid.
		I'll visit him soon.
			(leaning toward the
			old man, sincerely)
		The important thing is that nothing
		jeopardize our plans, yours and
		mine.  This thing of ours, that we
		will build.

The old man touches Michael's hand, warmly.

				ROTH
		Nothing is more important.

				MICHAEL
			(quietly)
		Pentangeli is a dead man; do you
		object?

				ROTH
		It's always bad for business; but
		you have no choice.

				MICHAEL
		Then it's done.  I must choose his
		replacement: it cannot be Rosato.

				ROTH
		Of course you must keep control of
		your family.

He turns to Michael, turning the volume higher on the
television, and moving closer to his young partner.

				ROTH
		Michael, these things are
		unimportant.  Who should be the
		manager of a dime store, Joe or
		Jack?  Unimportant.  You do what
		you think is right.  You're a young
		man, and I'm old and sick.  What we
		do together in the next few months
		will be history, Michael; it has
		never been done before.  We will do
		this historical thing together, and
		even your Father could never dream
		it would be possible.  We are
		bigger than U.S. Steel, you and
		me... because in America, anything
		is possible!
			(pause)
		But soon I will be dead, and it
		will all belong to you.

There is a KNOCK on the door, and Terri Roth pushes the door
open with her hip.

				TERRI
		My goodness, you'll rupture your
		eardrums, Hyman.

She puts the tray down, and turns down the television.

EXT. ROTH'S HOUSE - MED. VIEW - DAY

The sinister Bussetta waits patiently in the rear seat of
the car, outside Roth's modest house.

EXT. DOWNTOWN NEW YORK - MOVING VIEW - DAY

A black Cadillac moves down the street, slowed by the
Festivities of the Festa that is in progress: people milling
around, buying souvenirs at the many stands set up.

Sausage and grilled meats are prepared, just as they were
years ago.  Electric lights are strung from the street
lamps, and brightly colored banners pronounce the "Festa of
the Madonna."

MOVING CLOSE VIEW

Willy Cicci drives, frustrated that he cannot go any faster.
Next to him, Frankie Pentangeli sits, catching a few seconds'
snooze.

MED. VIEW

The black car pulls up; another car that had been following
it parks nearby.

One of Pentangeli's button men gets out of the car, and
steps into a small Italian restaurant; he exits quickly, and
nods affirmatively toward Pentangeli's Cadillac.

The group of them step out quickly, men huddled around
Pentangeli, and enter the restaurant.

INT. THE RESTAURANT - DAY

The restaurant is quite empty, despite the excitement out on
the street.

Pentangeli immediately sits at a table with a tall, dark,
snappily dressed young man, CARMINE ROSATO.

Nearby, on the other side of the room is Rosato's brother,
TONY, seated with a group of their men.

At another table in the restaurant is a table of Pentangeli's
people: they are joined by bodyguards.

				PENTANGELI
		Rosato, where's your brother?

				ROSATO
		Sitting right behind you.

Pentangeli glances behind himself.

				PENTANGELI
		He don't want to talk?

				ROSATO
		We worked it all out beforehand.

				PENTANGELI
		Are we going to eat or what?

				ROSATO
		Sure, on me.  I got Diner's Club.

				PENTANGELI
			(sarcastically)
		Forget it; I'm suddenly without an
		appetite.  You're making big
		trouble, Carmine.

				ROSATO
		You weren't straight with us,
		Frankie, what else could we do?

				PENTANGELI
		We could have talked first, saved a
		lot of running around.

				ROSATO
		You wasn't listening, you didn't
		want to talk.

				PENTANGELI
		Don't I look like I'm listening?

				ROSATO
		We want Brooklyn one hundred
		percent.  No more taxes to you.  We
		want to be only loosely connected
		with your family -- sort of a
		under-family all of our own.  Then
		we can act on all internal matters
		without talking.  Also we want you
		to inform Michael Corleone that we
		can deal directly with him.

				PENTANGELI
		I'm a little hungry, maybe I'll
		order something.  Joe.
			(one of his men)
		Get me some bracciole or something.
		And pay cash.
			(to Rosato)
		And in return for these concessions,
		what do you do for me?

				ROSATO
		We will release the hostages,
		number one.  Number two, we're here
		for you to count on when you need
		us.  We're independent, but we're
		here if you need us.  In general,
		we'll cooperate with you and your
		businesses, and you in turn will
		cooperate with us.  Pari persu.

				PENTANGELI
		Pari Persu; what the fuck is Pari
		persu...?

				ROSATO
		My lawyer went over this beforehand.

				PENTANGELI
		What assurances do I have that
		there will be no more kidnapping,
		no more hits?

				ROSATO
		The same assurance we got from you.

				PENTANGELI
		What if I say shove it?

				ROSATO
		Then Carmine Fucillo and Tony Blue
		DeRosa will need to be fitted for
		slabs.

				PENTANGELI
		You want a war?

				ROSATO
		We got no choice.

				PENTANGELI
		You know if there's a way I'll go
		to the commission and the commission
		will side with me.  That puts me
		and the other New York families
		against you.

				ROSATO
		We got friends in the commission.

				PENTANGELI
			(getting angry)
		I'm talking about Italians!

				ROSATO
		What about Michael Corleone?

				PENTANGELI
		He supports me.

				ROSATO
		Maybe, yes... maybe no.

One of Pentangeli's men approaches with a plate of Italian
food.

Pentangeli stands up, angered by this remark of Rosato's; he
pushes the dish of food out of the surprised Bodyguard's
hands.

				PENTANGELI
		You drove old Pete Clemenza to his
		grave, Carmine; you and your
		brother.  Turning on him; trouble
		in his territories -- you and your
		demands.  I hold you responsible,
		just as though you shot him in the
		head.  And I ain't gonna let that
		go for long!

Pentangeli walks out of the restaurant; there's a little
tension between the bodyguards of the two factions.

				ROSATO (O.S.)
		Hey, Five-Angels...

He gives him the arm.

Frankie's face turns red, like he wants to have it out here
and now; but Willy Cicci calms his down, and they all make
their move out.

EXT. THE RESTAURANT - DAY

Pentangeli gets into the car.

				PENTANGELI
		Nobody I hate calls me Five-Angels
		to my face!

He slams the door.

EXT. PENTANGELI'S LONG BEACH ESTATE - DAY

Part of the old estate of Don Corleone.  By now, the wall
has been torn down, and the other houses sold off.

His car is parked; Pentangeli steps out, still angry over
the confrontation.  As he approaches the house, he notices
something strained about the bodyguards who discreetly guard
his house.  No one seems to want to tell him.

				PENTANGELI
		What's up?

Pentangeli glances over to the front door foyer.

PENTANGELI'S VIEW

The strange and silent Bussetta, the man who now always
travels with Michael.

INT. PENTANGELI'S HOUSE - DAY

Pentangeli enters; he sees his WIFE, standing oddly in the
hallway.

				PENTANGELI
			(Sicilian)
		What's this?

				WIFE
		Michael Corleone.

				PENTANGELI
		One Michael Corleone...Dove?

				WIFE
			(Sicilian)
		He's in your study.

He knows it is very very serious for Michael to be here in
his home.

He automatically moves into his study.

INT. PENTANGELI'S STUDY - DAY

Michael stands quietly in the room.  This was once his
father's study, although it is totally redecorated.
Pentangeli starts sweating, and moves toward the young Don,
and kisses his hand.

				PENTANGELI
		Don Corleone, I wish you let me
		know you was coming.  We could have
		prepared something for you.

				MICHAEL
		I didn't want you to know I was
		coming.  You heard what happened in
		my home?

				PENTANGELI
		Michael, yes, we was all relieved...

				MICHAEL
			(furious)
		In my home!  In the same room where
		my wife was sleeping; where my
		children come in their pajamas, and
		play with their toys.

He's terrified Pentangeli with his anger; then, just as
suddenly, he talks quietly, calmly.

				MICHAEL
		I want you to help me take my
		revenge.

				PENTANGELI
		Michael, anything.  What is it I
		can do for you?

				MICHAEL
		I want you to settle these troubles
		with the Rosato Brothers.

				PENTANGELI
		I was just going to contact you,
		Michael; we just had a 'sit-down' -
		in fact, I just come from there.

				MICHAEL
		I want you to settle on their terms.

				PENTANGELI
		Mike, I don't understand.  Don't
		ask me to do that.

				MICHAEL
		Trust me; do as I ask.

				PENTANGELI
		It would be the beginning of the
		end for my family.  How can I keep
		all my other territories in like if
		I let two wise-guys stand up and
		demand this and that, and then give
		it to them?

				MICHAEL
		Frankie...do you respect me?  Do I
		have your loyalty?

				PENTANGELI
		Always... But sometimes I don't
		understand.  I know I'll never have
		your kind of brains, in big deals.
		But Mike, this is a street thing.
		And Hyman Roth in Miami is behind
		the Rosato Brothers.

				MICHAEL
		I know.

				PENTANGELI
		Then why do you want me to lay down
		to them?

				MICHAEL
			(coldly, but convincing)
		Frankie, Roth tried to have me
		killed.  I'm sure it was him, but I
		don't know yet why.

				PENTANGELI
		Jesus Christ, Michael, then let's
		hit 'em now, while we still got the
		muscle.

				MICHAEL
		This was my father's old study.
		When I was a kid, we had to be
		quiet when we played near here.
		When I was older, I learned many
		things from him here.  I was happy
		that this house never went to
		strangers; first Clemenza took it
		over, and then you.  My father
		taught me, in this room, never to
		act until you know everything
		that's behind things.  Never.  If
		Hyman Roth sees that I interceded
		with you in the Rosato Brothers'
		favor, he'll think his relationship
		with me is still sound.  I'm going
		somewhere to meet him tomorrow.  We
		have friends in some very important
		business that we're making.  Do
		this for me; you make the peace
		with the Rosato Brothers on their
		terms.  Let the word out that I
		forced you; you're not happy wit
		hit, but acquiesced, just because
		of me.  It will get back to Hyman
		Roth.  Do this, Frankie.  You can
		trust me.

				PENTANGELI
		Sure, Mike.  I'll go along.

				MICHAEL
		Good.

They embrace; Michael kisses him.  He looks at the young
Don, thoughtfully.

INT. TROPICANA HOTEL - CLOSE VIEW - DAY

The money trays are carefully unloaded from the gaming
tables, and put on a cart with others.

The cart, preceded and followed by security guards, is then
wheeled through the casino, into a private, counting room.

INT. COUNTING ROOM - MED. VIEW - DAY

The guards leave the room; the door is locked after them,
leaving only Hagen.  Neri and an ACCOUNTANT, a very fat man.
The numbered boxes are opened, and cash and checks are
spread out on the counting table.

The accountant begins with amazing speed and skill, to count
and divide the money.

				NERI
		Fifteen percent skim?

				HAGEN
		Twenty-five this time.

The accountant stops, and looks up to Neri.

				NERI
		It might show.

				HAGEN
		Mike wants it.

Neri nods, and the accountant continues.  Neri opens a door,
allowing a sandy-haired man, a COURIER, into the room.  The
cream is placed into his pouch personally by Neri.

				NERI
		We've never sent this much with one
		courier.

				HAGEN
			(to the courier)
		Your plans are a little different
		this time.  You skip Miami, and go
		straight to Geneva.  It's to be
		deposited to this number.
			(handing him a small envelope)
		And it's got to be there by Monday
		morning, no slip-up.

				COURIER
		I think I was 'picked-up' last trip.
		That hour layover I had at Kennedy.
		I went over and bought a paper...

Neri has finished putting the 'creamed' money into the pouch.

				NERI
		Those were our people.

				COURIER
		Okay, just thought you should know.

He is just about to close and lock the pouch, when Hagen
gestures that he should wait, and adds more stacks of
carefully packaged bills into the pouch.  Then Neri locks
it, and handcuffs it to the courier's arm, looking
inquiringly at Hagen.

				HAGEN
		Let them count.

The courier is shown out through a private door, and then
the first door is opened.  Two accountants come in with the
guards, and the trays are opened, and the counting process
is begun all over again, this time with the State Tally
sheets.

INT. TROPICANA CORRIDOR - MOVING VIEW - DAY

The courier continues on his way; followed by Hagen and Neri.

				NERI
		What's up?

				HAGEN
		No questions.

				NERI
		I got to ask questions, Tom,
		there's three million dollars cash
		in that pouch; Mike is gone and I
		have no word from him.

				HAGEN
		Al, as far as you're concerned, I'm
		the Don.

				NERI
		How do I know you haven't gone into
		business for yourself?

This hurts Tom; but he is a reasonable man, and he knows he
owes Neri some explanation.

				HAGEN
		You've been through a lot with us
		so I'm going to give you the truth.
		Mike knows it was someone within
		the compound that set him up for
		that hit.  So nobody is to know
		where he is, not you, not Rocco,
		not even his brother Fredo.  Sorry,
		Al, I know how you feel about
		Mike...but he still remembers Tessio.

EXT. KEY WEST - NIGHT

Michael is led to a desolate, night-lit private dock.  He is
followed by the ever-present Bussetta, and they are helped
onto a light-weight, luxury cabin cruiser.  The crew cast
off various ropes, and the boat sets out into the night.

							FADE OUT.

FADE IN:

EXT. TAHOE ESTATE - DAY

A seaplane lands nicely by the private Corleone harbor;
Hagen disembarks with his inevitable overloaded briefcase.
He continues down the ramp, past several Buttonmen, dressed
in summer casual attire, and who resemble secret service men
rather than thugs.

His wife THERESA lies on a blanket on the great lawn, with
her youngest children, who run to their father for a kiss.

				THERESA
		Hungry?

				HAGEN
		Just a little.

				THERESA
		I've invited Mama, Sandra and the
		kids for barbecue.

				HAGEN
		What about Kay?

				THERESA
		I couldn't find her.  She's been so
		broody, sticks to herself.

EXT. TAHOE LAWN BARBECUE - MED. VIEW - DAY

Hagen and Sonny's boys are throwing a football around on the
lawn; the littler kids running after them.

Coals are burning in the old style stone barbecue, and
several tables are set for the family.

In the distance, there is always evidence of the bodyguards.

Theresa, Mama and Sandra prepare the steaks.

Hagen relaxes in a sports shirt.

				HAGEN
		Let me try Kay.

He crosses the lawn, to the house on the beach where Michael
and his family live.  Is about to knock on the door:

				HAGEN'S SON
		Hey, Pop, heads up!

The football is flying in his direction; he catches it and
throws it back.  Then he cracks the door open, and peeks in.

INT. MICHAEL'S HOUSE - DAY

				HAGEN
		Kay?

He steps in, the beautiful summer living room is neat, but
empty.

				HAGEN
		Anyone hungry?

He moves through the house more quickly; into the dining and
recreation room areas.  A cat jumps off a pile of cushions
and runs across the room.

				HAGEN
		Hello?

				SANDRA (O.S.)
		She's gone, Tom.

Sandra has followed him into Michael's house.

				HAGEN
		What do you mean gone?

				SANDRA
		The Barretts from Rubicon Bay came
		by in a new speedboat.  Rocco tried
		to say she wasn't in, but Kay
		spotted them and asked if they
		would take her and the kids for a
		ride.  That was three hours ago.

				HAGEN
			(furious)
		Why didn't someone tell me!

				SANDRA
		I wanted to tell you alone; your
		wife doesn't know what's going on.

Hagen rushes out of the house.

EXT. TAHOE LAWN - DAY

Hagen moves quickly out of Michael's house; moving across
the lawn to the boathouse.

				HAGEN'S SON
		Hey, Dad!

This time he ignores the thrown ball, and moves directly to
Rocco, who is by some men near the boathouse.

				HAGEN
		Rocco!

				ROCCO
		I know.  I went down to the Barrett
		house.  But she's gone.  They drove
		her and the kids to North Tahoe
		airport.

				HAGEN
		Goddamn it, where were you?

				ROCCO
		I was in my house.  Willy tried,
		but it would have taken some
		strong-arm to stop her, and he
		figured you wouldn't want that.

INT. THE BOATHOUSE - DAY

They enter the boathouse.

				HAGEN
			(to one of the men)
		Get me a Scotch and water.

The man hurries behind the bar.

				ROCCO
		She took a flight to San Francisco.
		We figure she's going to connect to
		New Hampshire; her parents' place.

				HAGEN
			(almost to himself)
		I can't let him down.

He swallows the drink down in several gulps.  And then looks
up to his men watching him.  He's embarrassed to have shown
such weakness.

				HAGEN
		All right, let me think a minute.

Rocco clears the men out.

				ROCCO
		Me too, Tom?

				HAGEN
		Yeah, give me a minute.

Rocco gone, Hagen moves behind the enormous bar, and pours
himself a giant drink.  He drinks that, and calms himself.

				HAGEN
		Oh Christ, Pop.  It was so good
		when you were alive.  I felt I
		could handle anything...

EXT. VIEW FROM BOAT - FULL VIEW - DAY

A beautiful coastal view of a tropical Caribbean city.  An
extraordinary view, high buildings, palm trees, all set
right on the bay.

MED. CLOSE VIEW ON MICHAEL

on the cruiser, Bussetta a little distance away, watching,
but never speaking.  The dark-skinned CAPTAIN of the cruiser
keeps pointing repeatedly.

				CAPTAIN
		Habana, Habana.

EXT. HAVANA STREET - MOVING VIEW - DAY

Michael and Bussetta are driven in a Mercury sedan, making
its way through the streets of Havana.

CLOSE VIEW ON MICHAEL

looking out the window.

MICHAEL'S POV

Crowded streets, occasional roving bands playing for the
tourists; there is much evidence of tourism: Americans
walking through the streets with cameras.  Occasionally, we
see a Cuban with a row of numbers attached to his hat,
carrying a big sheet of the daily lottery numbers.  From all
of these street impressions, the city is booming with
activity, but there is also much evidence of whores and
pimps and little children begging in the streets.

MED. VIEW

The big American car stops at an intersection.  Bussetta is
sitting in the forward passenger side; while Michael is in
the back.  He hears tapping on the window; he turns and sees
four Cuban boys tapping on his window and extending their
hands, and rubbing their stomachs as though they were hungry.
The Cuban driver rolls down his window and shouts them away
in Spanish.

INT. HAVANA CASINO LOBBY - MOVING VIEW - NIGHT

Michael is led through a beautiful wooden lobby of the
hotel, done in Spanish style, apparently just recently
completed.  He is approached by a thin, mousy man, SAM ROTH,
who ushers him toward the casino entrance.

				SAM ROTH
		Hiya, Mr. Corleone, I'm Sam Roth.
		Welcome to the Capri; my brother's
		upstairs.  You wanta take a rest
		before you see him, or can I get
		you something, anything at all?

				MICHAEL
		No, I'm fine.

He leads Michael into the main casino.

				SAM ROTH
		This is it!  We think it makes
		Vegas look like the corner crap game.

				MICHAEL
		Very impressive.

				SAM ROTH
		Jake, Jake, come over here.  Mike,
		I want you to meet Jake Cohen; he
		manages the casino for us.

				COHEN
			(appreciating
			Michael's status)
		Mr. Corleone.

Sam turns to Bussetta and extends his glad-hand.

				SAM ROTH
		Pleasure to meet you, I'm sure...

He gets no response whatsoever from Bussetta.

INT. PRESIDENTIAL PALACE - NIGHT

An extremely tall, well-built Cuban, tanned and wearing an
attractive mustache, LEON, in his middle forties, reads from
a prepared paper.  His sentences are translated by a smaller
man, standing to his rear.

				LEON
			(Spanish)
		Most respected gentlemen, allow me
		to welcome you to the City of
		Havana, the Republic of Cuba on
		behalf of His Excellency, Fulgencio
		Batista.

THE VIEW BEGINS TO MOVE along the various men gathered for
this meeting.

				LEON (O.S.)
		I'd like to thank this distinguished
		group of American Industrialists,
		for continuing to work with Cuba,
		for the greatest period of
		prosperity in her entire history.
		Mr. William Proxmiro, representing
		the General Fruit Company... Messrs.
		Corngold and Dant, of the United
		Telephone and Telegraph Company; Mr.
		Petty, regional Vice-President of
		the Pan American Mining Corporation;
		and, of course, our friend Mr.
		Robert Allen, of South American
		Sugar.  Mr. Nash of the American
		State Department.  And Mr. Hyman
		Roth of Miami, and Michael Corleone
		of Nevada representing our
		Associates in Tourism and Leisure
		Activities.

VIEW ON THE ENTIRE GROUP

Leon pauses to take a drink of water.  Then proudly, he
lifts a shiny yellow telephone for all to see.

				LEON
		The President would like to take
		this opportunity to thank U T&T for
		their lovely gift: a solid gold
		telephone!  He thought all you
		gentlemen would care to take a look
		at it.

He hands the heavy phone set to one of his aides, and it is
passed in turn to each of the men in attendance.

				CORNGOLD
		Your Excellency, perhaps you could
		discuss the status of rebel activity
		and how this may affect our
		businesses.

MED. CLOSE VIEW ON MICHAEL

He receives the telephone, and glances at it before passing
it on to Hyman Roth.

				LEON (O.S.)
		Of course.  The rebel movement is
		basically unpopular, and since July
		of 1958 has been contained in the
		Oriente Province, in the mountains
		of the Sierra Muestre.

Michael passes the phone on to Roth.

				LEON
			(continuing)
		We began a highly successful
		offensive against them in March,
		and activities within the city
		itself are at a minimum.  I can
		assure you we'll tolerate no
		guerrillas in the casinos or
		swimming pools!

General subdued laughter.

A CUBAN STREET - LATE DAY

Police are stopping traffic.  Michael's Mercury is among the
cars; a police officer, seeing that some important person is
being driven, walks up to the driver.  He leans forward, and
says something in Spanish to the driver.

The driver, in turn, leans over to Michael.

				DRIVER
		He says it will just be a short
		time and they'll let us through.

Michael looks out the window.

MICHAEL'S VIEW

The old building has been totally surrounded by police and
military vehicles.  Right at this moment, they are waiting
lazily, but soldiers are there with automatic weapons ready.
There is a momentary commotion inside the building, and the
men brace up.  A Captain of the Army detachment says
something in Spanish over a megaphone; and his men put their
weapons at the ready, as other policemen lead a group of
civilians out of the building with their hands up.

They are moved over to some military truck, where they are
frisked before being loaded.

All of a sudden, one of the civilian rebels breaks loose,
and rushes toward the command vehicle.  He hurls himself
into the vehicle, as two police try to pull him out.  A
second later, and there is an explosion; the man obviously
having hidden a grenade on his body, sacrificing his own
life to take the life of the Captain.

There is a commotion, but the military quickly quell it.

CLOSE VIEW ON MICHAEL

watching.  The police rush to Michael's car and guide it
outside of the trouble area.

MED. VIEW

as they lead and escort the Mercury out of the area.

EXT. HAVANA COUNTRY CLUB - CLOSE VIEW - DAY

Some glasses; rum is poured into them; then Coca Cola.
Quarter limes are squeezed.

				SAM ROTH (O.S.)
		Rum... Coca Cola...a squeeze of
		fresh lime...

Sam prepares the drinks for his brother, Hyman, and a group
of men, including Michael.

				MAN
		Cuba Libres.

				MICHAEL
		I was told the Cubans now call this
		drink: "La Mentira."

				ROTH
		I still don't speak Spanish, Michael.

				MICHAEL
		It means... "The Lie."

A moment's hesitation, then a few of the men laugh.  Now two
Cubans in white carry a table which has a lovely small cake
on it.

				SAM ROTH
		The cake is here.

They all raise their glasses to the old man.

				EVERYONE
			(ad lib)
		Happy Birthday!

Roth glances at the cake and its inscription, is pleased.

				ROTH
		I hope my age is correct: I am
		always accurate about my age.

Some laugh.  He nods, and they begin to cut it, put a piece
on plates, and carry them to the different men.

				ROTH
		Everything we've learned in Vegas
		is true here; but we can go further.
		The bigger, the swankier, the
		plusher the store, the more a sense
		of legitimacy, and the bigger
		business we do.
			(looking at the plate
			brought to him)
		A smaller piece.  What we've
		proposed to the Cuban Government is
		that it put up half the cash on a
		dollar for dollar basis.
			(accepting a smaller piece)
		Thank you.  We can find people in
		the United States who will put up
		our share for a small piece of the
		action, yet we will retain control.

				ONE OF THE MEN
		How much?

				ROTH
		A hundred million dollars.  But
		only if this Government relaxes its
		restrictions on importing building
		materials; we'll need some new
		laws, too, but that will be no
		difficulty.

				ANOTHER MAN
		What are import duties now?

				ROTH
		As much as seventy percent.  Also,
		I'm working out an arrangement with
		the Minister of Labor so that all
		our pit bosses, stick-men and
		Dealers, can be considered
		specialized technicians eligible
		for two year visas.  As of now
		they're only allowed in Cuba for
		six months at a time.  In short,
		we're in a full partnership with
		the Cuban Government.

VIEW ON MICHAEL

is handed a piece of cake.  Roth moves over to a folder of
documents.

				ROTH
			(continuing)
		Here are applications from Friends
		all over the States.  I understand
		Santo Virgilio in Tampa is trying
		to make his own deal.  Well, the
		Cuban Government will brush him off.
		The Lakeville Road Boys are going
		to take over the Nacionale here.
		I'm planning a new hotel casino to
		be known as Riviera.  The new Capri
		will go to the Corleone Family.

MED. VIEW

The cake is sliced and carried to each of the men.

				ROTH
		Then there's the Sevilla Biltmore;
		the Havana Hilton, which is going
		to cost twenty-four million --
		Cuban banks will put up half, the
		Teamsters will bankroll the rest.
		Generally, there will be friends
		for all our friends including the
		Lieutenant Governor of Nevada;
		Eddie Levine of Newport will bring
		in the Pennino Brothers, Dino and
		Eddie; they'll handle actual casino
		operations.

And seeing that all of his friends have been served, Roth
raises his fork.

				ROTH
		Enjoy.

				MICHAEL
		I saw an interesting thing today.
		A man was being arrested by the
		Military Police; probably an urban
		guerrilla.  Rather than be taken
		alive, he exploded a grenade hidden
		in his jacket, taking the command
		vehicle with him.

The various men look up as Michael eats his cake, wondering
what the point of it is.

				MICHAEL
		It occurred to me: the police are
		paid to fight, and the Rebels are
		not.

				SAM ROTH
		So?

				MICHAEL
		So, that occurred to me.

VIEW ON ROTH

He understands Michael's point, if the others do not.

				ROTH
		This country has had rebels for the
		last fifty years; it's part of
		their blood.  Believe me, I know...
		I've been coming here since the
		twenties; we were running molasses
		out of Havana when you were a baby.
		To trucks owned by your father.
			(he chuckles warmly
			over the memory)
		We'll talk when we're alone.

And he returns his attention to the men who are gathered
with him on his birthday.

EXT. ROTH'S PRIVATE TERRACE - DAY

Michael sits alone with the old man, on a terrace that
overlooks the city.

				ROTH
		You have to be careful what you say
		in front of the others... they
		frighten easy.  It's always been
		that way, most men frighten easy.

				MICHAEL
		We're making a big investment in
		Cuba.  That's my only concern.

				ROTH
		My concern is that the three
		million never arrived at Batista's
		numbered account in Switzerland.
		He thinks it's because you have
		second thoughts about his ability
		to stop the rebels.

				MICHAEL
		The money was sent.

				ROTH
		Then you have to trace it.  Michael,
		people here look at me as a reliable
		man.  I can't afford not to be
		looked on as a reliable man.  But
		you know all that; there's nothing
		you can learn from me.  You
		shouldn't have to put up with a
		sick old man as a partner.

				MICHAEL
		I wouldn't consider anyone else.

				ROTH
		Except the President of the United
		States.

He laughs slyly, as though this is some private joke between
them.  Then his laughter becomes a cough, which he painfully
stifles with a handkerchief.

				ROTH
		If only I could live to see it,
		kid; to be there with you.  How
		beautifully we've done it, step by
		step.  Here, protected, free to
		make our profits without the
		Justice Department, the FBI; ninety
		miles away in partnership with a
		friendly government.  Ninety miles,
		just a small step, looking for a
		man who desperately wants to be
		President of the United States, and
		having the cash to make it possible.

				MICHAEL
		You'll be there to see it; you'll
		be there.

INT. MICHAEL'S SUITE - NIGHT

The telephone has just rung; Michael listens.

				OPERATOR
		We have your call to Tahoe, Nevada,
		sir.

				MICHAEL
		Thank you.
			(click, click)
		Tom?  Tom, is that you?

				ROCCO (O.S.)
		No, Tom's out of town.  This is
		Rocco.  Who is this?

Michael is openly disturbed that Hagen is not there.  He
hangs up without answering.

EXT. NEW ENGLAND HOUSE - DAY

Tom Hagen steps out of a taxicab a bit tentatively, and then
steps toward the door of a pleasant New England house.  He
rings the bell and waits, hat in hand.  A moment later, the
door opens, and Kay is standing there.

				KAY
		I'm not surprised to see you, Tom.

INT. SMALL ROOM - NEW ENGLAND HOUSE - MED. VIEW - DAY

Out to the yard, where we can see glimpses of little Anthony
playing by himself.

				KAY (O.S.)
		I can't love a man like that; I
		can't live with him, I can't let
		him be father to my children.  Look.

The little boy, moodily by himself.

VIEW ON KAY

obviously moved.

				KAY
		He's not like a little boy... he
		doesn't talk to me; he doesn't want
		to play; he doesn't like other
		children, he doesn't like toys.
		It's as though he's waiting for the
		time he can take his Father's place.
			(almost in tears)
		You know what he told me when he
		was four years old.  He said he had
		killed his Grandfather...

VIEW ON HAGEN

listening, calmly.

				KAY
		... He said he had shot his
		Grandfather with a gun, and then he
		died in the garden.  And he asked
		me... he asked me, Tom, if that
		meant now his father would shoot
		him out of... revenge.
			(she cries)
		How does a four year old boy learn
		the word... 'revenge'?

				HAGEN
		Kay... Kay...

VIEW ON KAY

				KAY
		What kind of a family is this...
		are we human beings?  He knows his
		Father killed his Uncle Carlo.  He
		heard Connie.

				HAGEN
		You don't know that's true.  But
		Kay, just for the sake of an
		argument, let's assume it is, I'm
		not saying it is, remember, but...
		What if I gave you what might be
		some justification for what he
		did... or rather some possible
		justification for what he possibly
		did.

				KAY
		That's the first time I've seen the
		lawyer side of you, Tom.  It's not
		your best side.

				HAGEN
		Okay, just hear me out.  What if
		Carlo had been paid to help get
		Sonny killed?  What if his beating
		of Connie that time was a deliberate
		plot to get Sonny out into the open?
		Then what?  And what if the Don, a
		great man, couldn't bring himself
		to do what he had to do, avenge his
		son's death by killing his
		daughter's husband?  What if that,
		finally, was too much for him, and
		he made Michael his successor,
		knowing that Michael would take
		that load off his shoulders, would
		take that guilt?

				KAY
		He's not the same as when I met him.

				HAGEN
		If he were, he'd be dead by now.
		You'd be a widow.  You'd have no
		problem.

				KAY
		What the hell does that mean?  Come
		on, Tom, speak out straight once in
		your life.  I know Michael can't,
		but you're not Sicilian, you can
		tell a woman the truth; you can
		treat her like an equal, a fellow
		human being.

There is a long silence.

Then Hagen shakes his head; he can tell her no more.

				HAGEN
		If you told Michael what I've told
		you today, I'm a dead man.

				KAY
		When is it finally over?  I want it
		to be over before my baby is born.

				HAGEN
		I don't know.  I hope soon; but
		it's not over yet, and that's why
		you and the kids have to come back
		to me.

He looks at her; it's clear that he has been entrusted with
her safety and her children's.

He is a kind, good man, and seems very nervous and
overwrought.

VIEW ON THE WINDOW

Little Anthony is pressing his face against the glass pane,
as though he senses the adults are discussing something of
importance to him.

INT. TROPICANA HOTEL-CASINO - MED. VIEW - NIGHT

The Baccarat table.  Busy, hundred dollar bills being played.

				LOUDSPEAKER
		Mr. Corleone; Mr. Freddie Corleone,
		telephone please.

				PIT BOSS
		Not here.

VIEW ON THE CRAP TABLES

The play is fast; pit boss presiding; but no sign of Fredo.

				LOUDSPEAKER
		Telephone for Mr. Corleone.

ANOTHER PART OF THE CASINO

We see Neri, ominous, presiding over the entire store.  He
picks up a pit telephone.

				NERI
		He's backstage.
			(and hangs up disgustedly)

INT. TROPICANA BACKSTAGE AREA - MED. VIEW - NIGHT

Fredo is entertaining two showgirls done up in feathers and
what-have-you.

				FREDO
		C'mon, you got fifteen minutes
		before the finale!  I want to show
		you a trick with feathers.

				STAGEHAND
		Phone for you.

				FREDO
		Don't go away; wait a minute.

He takes the phone; we can catch a VIEW of the show going on
from the wings.

				FREDO
			(on the phone)
		Yeah.  Okay.  Who?  Mikey?  But...
		Si... si, caposco.
			(in Sicilian)
		Sure... how much?  I understand.
		Jesus, three million... I won't let
		you down.  Sure.

He hangs up thoughtfully.

				ONE OF THE GIRLS
		Freddie; we still got twelve
		minutes before the finale!

				FREDO
		Yeah... some other time.

EXT. NEW YORK BAR - DAY

There is a light rain.  Pentangeli steps out of his car;
points to Willy Cicci.

				PENTANGELI
		Wait in the car.

He walks up the street, to the bar, where he is greeted by
the tall, handsome Carmine Rosato.  They shake hands.
Pentangeli looks in his hand.

CLOSE VIEW

Rosato has put a crisp one hundred dollar bill in his hand,
folded sharply in two.

				PENTANGELI
		What's this?

				ROSATO
		That's a lucky C note for our new
		deal.

He puts his arm around Pentangeli, and they walk into the bar.

INT. THE BAR - DAY

The bar is fairly empty; and very dark.  Pentangeli and
Rosato step up to the bar; the bartender momentarily stops
polishing glasses to pour a couple of drinks.

				ROSATO
		We were all real happy about your
		decision, Frankie; you're not goin'
		to regret it.

He holds up the glass.

				PENTANGELI
		I don't like the C-note.  I take it
		like an insult.

Suddenly, a garrote is thrown around Pentangeli's throat;
and he is forcefully yanked back into the shadows, all the
way into a wooden telephone booth.

CLOSE VIEW

The folded hundred dollar bill resting on the bar.

MED. CLOSE - THE PHONE BOOTH

We see only Pentangeli's feet and legs, struggling.  We HEAR
the terrible sounds of a man being strangled.

CLOSE ON ROSATO

Calm, and then he sees something that disturbs him.

				ROSATO
		Shit, your friend the cop!

Suddenly, the side door opens, and a shaft of sunlight cuts
through the darkness.

				COP
		Everything all right in there,
		Ritchie?  The door was open.

CLOSE ON THE PHONE BOOTH

Pentangeli's feet stop moving.

				RITCHIE
		Just cleaning up.
			(strained voice)
		You okay?

				COP
		Is that something on the floor?

				ROSATO
		Take him!

				VOICE
		Okay.

				RITCHIE
		Not here; not a cop, not here!

Two figures race through the shadows and race through the
doors.

				COP
			(shouting to his
			partner, in uniform)
		Stutz!  Watch out, Stutz!

EXT. THE BAR - DAY

We see that a patrol car had stopped for its routine visit.
STUTZ, the second patrolman, is just stepping out of his
car; Pentangeli's bodyguard, seeing the commotion, leaps out.
Three men, including Rosato, rush out.  There is gunfire;
Cicci is wounded.

MED. CLOSE

The patrolmen is grazed across the face; trying to stop the
flow of blood with his hand.

NEW VIEW

The three assailants jump into the car and drive off.

INT. THE BAR - DAY

The stricken Pentangeli comes back to life.  He can barely
move his lips.

				PENTANGELI
		The bastard.  The dirty bastard, he
		gave me a C-note.  He gave me a C-
		note.

He sees the patrolman leaning over him.

EXT. PATROL CAR - DAY

The Sergeant is on the car radio.

				SERGEANT
		Frankie Pentangeli murder attempt.
		Patrolman Stutz shot.  Sahara
		Lounge - Utica Avenue and Claredon
		Road.  White Cadillac three or four
		men took off from scene.  Need
		ambulance; Stutz is bad.  Taking
		Pentangeli into custody...

INT. ROTH'S SUITE IN HAVANA - MED. CLOSE VIEW ON ROTH - DAY

His wizened face, pale.  Right now, though, his eyes have a
sparkle as he watches three million dollars in cold cash
being counted on a card table in front of him.

His brother Sam is present, and the sandy-haired Courier, a
little nervous; the one who had left from the Tropicana with
the Corleone skim-money.  Also Johnny Ola.  The money is
evidently all there; Roth picks up a packet; probably a
hundred thousand dollars, and throws it over to the Courier.

				ROTH
		Make it fast; I don't want to
		chance him being seen.

				COURIER
			(frightened)
		What about the arrangements?  How
		can I be sure about the arrangements?

				OLA
		Relax.  You're under our protection;
		the Corleone family will never find
		you.

Ola leads the Courier to the adjoining room where two
smartly dressed Military (Cuban) Police are standing, and a
civilian.  The Courier sees them, looks back to Ola.  One of
the police steps forward, placing the Courier under arrest;
handcuffing him.

				COURIER
		Hey, what's this?

The other takes the packet of money, and hands it to the
civilian, who places it in the briefcase he carries.  The
other officer kneels down and fastens leg manacles.

				COURIER
		The arrangements... YOU BASTARDS!
		What...

The Captain strikes him expertly across the side of his head
with his pistol.

Ola closes the door on this scene.

EXT. THE HAVANA CAPRI - DAY

Fredo Corleone steps out of a car, squints up at the sunshine
and palm trees.  He is holding on tightly to a small satchel,
which he won't let the bellman carry along with his other
things.

INT. MICHAEL'S SUITE - MED. CLOSE VIEW - DAY

Michael and Fredo in a brother's embrace; they kiss each
other.  Fredo is still in his jacket, holding the satchel.

				FREDO
		Mikey.  How are you?

He glances up at Bussetta, who doesn't say a word.  Fredo
extends his hand.

				FREDO
		Hiya, Freddie Corleone.

				MICHAEL
		Mio fratello.

Then Bussetta offers his hand back to Fredo.

				FREDO
			(taking off his jacket)
		What a trip, Jesus Christ, the
		whole time I'm thinking what if
		someone knew what I got in here.

He undoes the combination of the briefcase starts taking out
cash.  Then he stops, remembering that there's a stranger in
the room.

				FREDO
		Oh, 'scuse me.

				MICHAEL
		It's all right.  He stays with me
		all the time.

				FREDO
		Oh.  Mikey, what's up?  I'm totally
		in the dark.

				MICHAEL
		We're making an investment in Havana.

				FREDO
		Great, Havana's great.  Lots of
		activity in Havana!  Anybody I know
		here.  Five-Angels?  Anybody?

				MICHAEL
		Johnny Ola... Hyman Roth.

				FREDO
		I never met them.

				MICHAEL
		Pentangeli's dead.  He was ambushed
		by the Rosato Brothers.
			(pause)
		Didn't you know that?

				FREDO
		No.  No, I didn't.  Who tells me
		anything?  I been kept in the dark
		so long, I'm getting used to it.

				MICHAEL
		I want you to help me, Fredo.

				FREDO
		That's what I'm here for.

				MICHAEL
		Tonight I want to relax with you.
		The Senator from Nevada is here
		with some people from Washington.
		I want to show them a good time in
		Havana.

				FREDO
		Count on me; that's my specialty.

				MICHAEL
		I'd like to come along.  There's
		been a lot of strain, and I've been
		cooped up in this room for three
		days.

				FREDO
		Me and you, great!  Gimme an hour
		to wash my face and do my research
		and we'll have these Washington
		suckers right where you want 'em.
			(then a thought
			strikes him)
		Poor Frankie Five-Angels.  He
		always wanted to die in bed...with
		a broad.

INT. ROTH'S SUITE - MED. VIEW - DAY

Michael stands at Roth's door carrying the briefcase that
Fredo had brought.

A hotel DOCTOR takes Hyman Roth's blood pressure, while his
wife waits nervously.

				DOCTOR
			(Spanish)
		You must not exert yourself; I will
		write out a prescription and come
		back tomorrow.

				HOTEL MAN
		He's going to write a prescription.

				ROTH
		I want my own doctor; fly him in
		from Miami.  I don't trust a doctor
		who can't speak English.

The doctor is shown out.  Roth gestures to the hotel man,
who also leaves.  Then he looks to his wife.

				ROTH
		Honey, go down to the casino?

				TERRI
		If you feel better...

				ROTH
		I do.  Play the Bingo game.

They kiss, and she leaves.  Also Bussetta and Ola remain.

				ROTH
		My sixth sense tells me you have a
		bag full of money in your hand.

Ola locks the door; Michael nods, and opens the bag, spilling
its contents on the card table.

				MICHAEL
		This doubles my investment.

				ROTH
		Still no word of your courier?
		We'll find him.  But at least this
		will satisfy our friends here.
		You've been invited to the New Year
		reception at the Presidential Home.
		I understand your brother is here
		as well; I hope he'll come.

				MICHAEL
		Six million dollars in cash is a
		high price for a piece of a country
		in the middle of a revolution.

Roth looks patiently at Michael, as though he were a child
who hadn't minded the lesson that he had been taught over
and over again.

				ROTH
		You're a careful kid, and that's
		good.  But look.  An international
		dispatch on the wire service.
		American journalism, not propaganda.
		The government troops have all but
		eliminated the rebels.  All but
		their radio station.

				MICHAEL
		I've read it; I'm pleased that the
		government is doing so well.  As a
		heavy investor, I'm pleased.  How
		did the doctor find you?

				ROTH
		Terrible.  I'd give twice this
		amount to take a piss without it
		hurting.

				MICHAEL
		Who had Frankie Pantangeli killed?

				ROTH
			(taken a bit off-balance)
		Why...the Rosato Brothers.

				MICHAEL
		I know that; but who gave the go
		ahead.

Roth glances to Ola; he is not a fool; he realizes Michael
has begun to suspect him.

				MICHAEL
		I know it wasn't me...so that
		leaves you.

				ROTH
		There was this kid that I grew up
		with; he was a couple years younger
		than me, and sort of looked up to
		me, you know.  We did our first
		work together, worked our way out
		of the street.  Things were good
		and we made the most of it.  During
		prohibition, we ran molasses up to
		Canada and made a fortune; your
		father too.  I guess as much as
		anyone, I loved him and trusted him.
		Later on he had an idea to make a
		city out of a desert stop-over for
		G.I.'s on the way to the West Coast.
		That kid's name was Moe Greene, and
		the city he invented was Las Vegas.
		This was a great man; a man with
		vision and guts; and there isn't
		even a plaque or a signpost or a
		statue of him in that town.  Someone
		put a bullet through his eye; no
		one knows who gave the order.  When
		I heard about it I wasn't angry.  I
		knew Moe; I knew he was headstrong,
		and talking loud, and saying stupid
		things.  So when he turned up dead,
		I let it go, and said to myself:
		this is the business we've chosen.
		I never asked, who gave the go
		ahead because it had nothing to do
		with business.

He regards Michael silently a moment.

				ROTH
			(continuing)
		There's three million dollars on
		that table.  I'm going to lie down,
		maybe take a nap.  When I wake up,
		if it's still there, I'll know I
		have a partner.  If it's gone, then
		I'll know I don't.

The old man turns, and moves in his slippers, toward his
bedroom.

INT. THE CORRIDOR - DAY

Michael closes the door, and moves down the hallway.  He is
followed by Bussetta, who had waited in the corridor.

				MICHAEL
			(Sicilian)
		How sick do you think the old man is?

				BUSSETTA
			(Sicilian)
		He'll live longer than me.

INT. TROPICOR NIGHT CLUB - VIEW ON THE SHOW - NIGHT

A Havana extravaganza, with tall, beautiful showgirls done
up in flamboyant, 'South-of-the-Border' Carmen Miranda
costumes; the lead singer is a six foot blonde doing "Rum
and Coca Cola" in that style.  Her name is YOLANDA.

MED. VIEW

At a large round table, located in an obvious VIP section of
the high, tropically draped room with living ferns and other
tropical planting with artificial stars.

Michael rises, to be introduced by Fredo to some conservative
looking Senatorial types, including Senator Pat Geary of
Nevada.  We notice Bussetta standing nearby.

				FREDO
		Does everyone know everyone, or
		nobody knows nobody.  Here, my
		brother, Michael Corleone... well,
		you know Senator Geary.

Geary warmly shakes Michael's hand.

				SENATOR GEARY
		Good to see you, Mike; I'm glad we
		can spend this time together.

				FREDO
		This is Senator Payton from Florida;
		Judge DeMalco from New York...
		Senator Ream... Mr. Questadt from
		California, he's a lawyer with the
		Price-Control Administration.  And
		Fred Corngold of U T&T.

They all make themselves comfortable.  A waiter with a tray
of drinks appears.

				FREDO
		Gentlemen... your pleasure?  Cuba
		Libres, Pina Coladas, you name it.

				SENATOR GEARY
		I'll take a Yolanda.

Laughter.

				FREDO
		Later, later.  All those girls look
		like they're on stilts!

The various tropical drinks are distributed.

				SENATOR GEARY
		To a night in Havana!

They all join in.

				FREDO
			(aside to Michael)
		Jeeze, it's great you came along,
		Mike... You know, we've never spent
		a night out on the town together.
		I always thought you looked down on
		me for liking a good time.

				MICHAEL
		I never looked down on you, Fredo.
		You don't look down at a brother.

INT. THE CASINO - NIGHT

By now the group has made its way into the casino.  Some of
them are crowded around the crap table; Senator Geary is
with the enormous and beautiful Yolanda, who barely speaks
English.  There are other girls with some of the men; not
with Michael, who gambles dollars while talking to Corngold.

				CORNGOLD
		Our information is that Castro is
		dead.  There are maybe a few
		hundred die-hards in the Sierra
		Muestra; but government troops are
		going to clean them out any day.

Johnny Ola approaches Michael.

				OLA
		Mike, can I talk to you.

Michael follows Ola toward the Baccarat table; a watchful
Bussetta moves, a distance away, with them.

				OLA
		Listen, this Senator from Florida
		already has a hundred grand worth
		of markers on the table.

We can see Senator Ream at the table, making thousand dollar
bets on the Bank.

				OLA
		They asked him to sign paper to
		take down the markers; but he got
		mad; told them to wait until he was
		finished.

				MICHAEL
		Let him gamble.

				OLA
		Okay.  You know he doesn't have
		that kind of money.

				FREDO
		Mike said let him gamble.

Fredo puts his arm around his brother; he is high with the
first attention Mike has ever given him, as though finally
he is being taken seriously; as though his brother needs him.

				FREDO
		Mike, I got something special up my
		sleeve for these boys.  You ever
		hear of "Superman?" And I don't
		mean the comic book.

				MICHAEL
		No.

				FREDO
		Wait'll you see!

INT. HAVANA BAR - NIGHT

Our group are in a large Havana bar; the walls totally
covered with hundreds of fifths of different types of rum
and other liquor.

A couple of the girls from the show are out with the men;
Yolanda herself is giving them a private song and dance.

Fredo is a little loaded, and especially attentive to
Michael this night.

				FREDO
		Mikey, why would they ever hit poor
		old Frankie Five-Angels?  I loved
		that ole sonuvabitch.  I remember
		when he was just a 'button,' when
		we were kids.  We used to put
		bedsheets on our heads, you know,
		like we were ghosts.  An' ole
		Frankie come peek into our room,
		we'd jump up, and he'd always
		pretend like he was really scared.
		You remember?

				MICHAEL
		It was hard to have him killed.

				FREDO
		You?  What do you mean you, I
		thought...

				MICHAEL
		It was hard to have him killed.

				FREDO
		You?  What do you mean you, I
		thought...

				MICHAEL
		It was Frankie tried to have me hit.

				FREDO
		No.  I mean, are you sure?

				MICHAEL
		You know otherwise, Freddie?

				FREDO
		Me?  NO, no, I don't know anything.
		Fellas!  You're all falling asleep.
		We got to see Superman.

CLOSE ON MICHAEL

A growing feeling about his brother.

EXT. GARISH HAVANA STREET - NIGHT

The street is lit with tons of neon signs; it is alive with
people; some roving bands of musicians.  Everywhere are
little boys running around, begging for money.  And in
doorways and windows are silent, dark-skinned women.

				SENATOR REAM
			(pushing away from
			the palm outstretched
			little hands of the boys)
		Goddamn beggers.  Goddamn city of
		beggars and pimps and whores.  And
		we bend over backwards to support
		them with the goddamn sugar quota.

				FREDO
			(to Geary)
		What's eating him?

				SENATOR GEARY
		He lost a quarter million dollars
		at the casino.

				SENATOR REAM
		...goddamn city of whores...

				SENATOR GEARY
		He gave them a bad check.

INT. 'SUPERMAN SHOW' - MED. VIEW - NIGHT

A large room with a succession of platforms arranged step-
like around a circular area which becomes a stage.

There are a hundred or so people, practically all men,
tourists and business men, standing on the different levels,
forming the audience.

In the center of the stage is a thick, telephone type pole,
to which is tied a young Cuban girl, in a flimsy white
sacrificial slip.  A small band, mostly drummers, play some
Latin music.

MED. VIEW

Fredo's party standing on the ramp, looking down at the
spectacle.  They're a little woozy from the drinks and late
hour.  Michael is with them, but now we sense he is using
this time, with all exhausted and drunk, to come to some
important conclusions.

				QUESTADT
		Why do we have to stand?

				FREDO
		Everyone stands.  But it's worth
		it, watch!

VIEW ON THE ARENA

Now two high priestesses, scantily clad, bring in a tall and
muscular Cuban, done up in chains and loin cloth, as though
he were a captured slave.  This is SUPERMAN

VIEW ACROSS THE MEN TO THE STAGE

				FREDO
		That's him; that's Superman!

Some preliminary pornographic proceedings go on, as the
priestesses lead the slave to the virgin tied to the post.
The music is percussive and wild.

MED. VIEW ON THE MEN

				SENATOR GEARY
		Ohmygod.  I don't believe it.

				QUESTADT
		It's got to be fake.

				FREDO
		That's why they call him Superman.
		Johnny Ola told me about this; I
		didn't believe it.

CLOSE on Michael turning away.  Not because of the spectacle
which he finds disgusting, but at what his brother is saying.

				FREDO (O.S.)
		... but seeing is believing.  Ole
		Johnny knows all the places.  I
		tol' you... can you believe it?

If Michael would ever allow himself to cry, it would be now.

				FREDO
			(continuing)
		The old man Roth, would never come;
		but Johnny knows these places like
		the back of his hand...

							FADE OUT.

FADE IN:

INT. MICHAEL'S SUITE - MED. VIEW - MORNING

Michael is alone in his bedroom; it seems as though he
hasn't slept very much, but sits by his window, looking out
at the city.  He is troubled and tired.

His radio is on:

				RADIO
			(Spanish)
		"This is Rebel Radio: Rebel troops
		of Column Four 'Jose Marti' took
		the town of Baire yesterday at 8:30
		p.m.  The enemy has retreated..."

EXT. CUBAN STREET - MORNING

This street in Havana is like a Caribbean tourist city with
no indication of the revolution in progress.

Michael walks along the street, alone, past the Cubans on
their way to work; past the American ladies who have gotten
up early for their shopping spree.

				RADIO
			(Spanish)
			 (continuing)
		... An important military action is
		developing along a 35-kilometer
		stretch of the Central Highway.
		Numerous enemy garrisons are left
		with two alternatives, surrender or
		annihilation...

One full block away, Bussetta rides in the front seat of the
dark Mercury, driving slowly, giving Michael his privacy,
but never letting him out of Bussetta's sight.

CLOSE ON MICHAEL

watching.

MICHAEL'S VIEW

Shopkeepers happily luring the tourists into their shops in
broken English.  Havana is prosperous.

				RADIO
			(continuing)
		... Victories in war depend on a
		minimum on weapons and to a maximum
		on morale...

VIEW ON MICHAEL

glances back to the dark car following him.  In a moment, it
pulls up to him, and he gets into the back seat.

EXT. AMERICAN MILITARY MISSION - VIEW ON MICHAEL - DAY

standing by his car, looking through the cyclone fencing
that borders this military training camp operated by the
American Army near the city.

				RADIO
		... War is not a simple question of
		rifles, bullets, guns and planes...

CLOSER VIEW INTO THE CAMP

EXT. HAVANA STREET - DAY

A street singer, followed by a guitarist sings Jose Marti's
words of "Guantanamera." It is solemn, as though it is a
song of protest, a song of the revolution.

Nearby, in a restaurant, Michael has lunch with Fredo.

				MICHAEL
		How is your wife, Fredo...your
		marriage?

				FREDO
			(eating)
		You know her; drives me crazy, one
		minute she's a popsicle, the next
		she's all vinegar.  Sometimes I
		think... I think - I should a
		married someone, like you did.  To
		have kids, to have a family.

Michael turns, distracted for a moment at something the
singer has sung.

				MICHAEL
		"Yo soy un hombre sincero..."
		I am a sincere man,
		From the land of the palms...

				FREDO
		What's that?

				MICHAEL
		The song.  Are you sincere with me,
		Fredo?

				FREDO
		Sincere.  What are you talking
		about, of course I'm sincere with
		you, Mike.

				MICHAEL
		Then I'm going to confide in you;
		trust you with something.

				FREDO
			(Sicilian)
		Mike, are you crazy, I'm your
		brother.

				MICHAEL
		Tonight we've been invited to a
		reception at the Presidential
		Palace; to bring in the New Year.
		You and I will go in a special car
		that's being sent.  They'll have
		cocktails... then dinner, and a
		reception with the President.  When
		it's over, it will be suggested
		that you take Questadt and his
		friends from Washington to spend
		the night with some women.  I'll go
		home alone in the car; and before I
		reach the hotel, I'll be
		assassinated.

				FREDO
		...Who?

				MICHAEL
		The same man who tried in Nevada...
		Hyman Roth, not Pentangeli.

				FREDO
		But, you told me yourself...

				MICHAEL
		It was never Pentangeli... I've
		always known that.  It was Roth all
		along.  He talks to me as a son; as
		his successor, but the old man
		thinks he'll live forever.

				FREDO
		What do you want me to do?

				MICHAEL
		To go tonight, with me, as though
		we know nothing.  I've already made
		my move.

				FREDO
		What is it?  Can I help?

				MICHAEL
		The old man will never bring in the
		New Year.

Fredo realizes what he means; looks immediately to Bussetta,
who had been sitting near the door and the musicians.  He is
gone.

INT. HOTEL CORRIDOR - MOVING VIEW ON BUSSETTA - NIGHT

The first time ever away from Michael, moving toward us
quickly.  He stops, knocks on the door of Roth's suite.
Then quickly for a man his size, he moves without noise to
the adjoining door, opens it with a key, and disappears
inside.

A moment elapses on the empty corridor, and then a roused
Johnny Ola, opens the first door.  He steps out into the
corridor, to see who had knocked.  Confused, he is about to
return inside, when Bussetta easily breaks his neck in two
from behind.

INT. THE SUITE - MED. VIEW - NIGHT

as Bussetta quietly pulls the limp body of Johnny Ola, his
head bent at an impossible angle, and lays it at the foot of
the couch.

EXT. PRESIDENTIAL PALACE - NIGHT

Guards who are regular troops patrol the Palace in twos,
carrying machine weapons.

Now an elite officer, checks the identification of the
various cars carrying dignitaries, as they are driven up to
the Palace.  The one being inspected at the moment contains
Fredo and Michael.  We can see the beautifully dressed
people on their way to the reception, and sense the cheerful
mood of this New Year's Eve.

INT. THE SUITE - NIGHT

Bussetta bends over Ola's body, tying the wrists and knees
with electrical extensions.  He then easily carries the body
to the small balcony which all the rooms have.

EXT. THE BALCONY - NIGHT

Bussetta swings the body over the side of the balcony
railing; tying the extension cord to the railing, and
suspending the body so that it is invisible both from the
inside and out during the night.

INT. PRESIDENTIAL PALACE - VIEW ON THE MAIN FOYER - NIGHT

The PRESIDENT, his WIFE and six oldest CHILDREN great
formally the many beautifully and affluently dressed guests.
He speaks to them in Spanish, as one by one they file to him.

Michael and Fredo are presented in a group with several
other Americans, including several of the American
businessmen with interests in Cuba.

EXT. STREETS OF HAVANA - MED. VIEW - NIGHT

The excitement of the night is beginning to build; people
are out in the streets; poor people, but they are
enthusiastic and lively.

NEW VIEW

Traffic stops, as an ambulance speeds its way to a hospital;
SIREN going.

INT. THE SUITE - NIGHT

Bussetta delicately picks up a small satin cushion that had
fallen from the couch, and replaces it as though nothing had
happened.  Slowly he cracks the door open which adjoins
Roth's bedroom.  There is a slight commotion; whispered
voices.

BUSSETTA'S VIEW

Terri, Mrs. Roth, is crying.  A group of men lift Hyman
Roth's frail body onto a stretcher.

CLOSE ON BUSSETTA

realizes that this is the man he is to kill.

CLOSER VIEW ON ROTH

He is alive; breathing hard with his mouth dry and open.
The doctor examines him, and then gives instructions to the
orderly who carries him out, presumably to the ambulance.

Bussetta closes the door on this VIEW.

INT. PRESIDENTIAL PALACE - NIGHT

An orchestra plays for the guests, as an army of waiters
serve champagne and hors d'oeuvres.  Michael relaxes with
Senator Geary, Major Leon, and several of the Americans.

				QUESTADT
		The embargo on arms shipments from
		the U.S. to your government, was
		just a necessary public relations
		move... Only last month, your air
		force received a major shipment of
		rockets...

Michael glances at his watch; Fredo concentrates on this.

				SENATOR GEARY
		We believe in non-intervention...
		but the agreement stipulates that
		our forces may be withdrawn... but
		as you've seen, we have not
		withdrawn them.

				CORNGOLD
		And my guess is that President
		Eisenhower won't pull out while we
		have over three billion invested
		over here.

				MICHAEL
		Fredo.  Where are you going?

				FREDO
		Nowhere, Mike.  I wanted to get a
		refill.  How about you?

EXT. HAVANA HOSPITAL - MED. VIEW - NIGHT

The ambulance makes its way up to the emergency section of
the hospital.  The orderlies quickly carry the old man
inside.  His wife and the doctor, and several of his men,
follow in another car.

THE VIEW ALTERS

and we see Bussetta waiting in the shadows.

EXT. HAVANA STREETS - NIGHT

The growing crowds of Cubans begin their celebration.

NEW VIEW

A Cuban military detachment speeds along in the night,
motorcyclists clear a path through the celebrants.

INT. PRESIDENTIAL PALACE - FULL VIEW - NIGHT

A full sitdown dinner is being served the guests.  Michael
sits at a table at dinner with several of the distinguished
Cubans, and some of the American businessmen.

				QUESTADT
		What's kept Mr. Roth?

Fredo looks up at Michael.

In the back of the room, we notice the detachment of military
moving quickly through the reception room on their way to
the President's private quarters.  Michael notices it as well.

INT. THE HOSPITAL CORRIDOR - FULL VIEW - NIGHT

The activity at the end of the hall has come to rest; we can
tell that the doctor tells Mrs. Roth that she should go, the
old man will be taken to a room where he can rest.
Gradually, these people leave him in the care of the hospital
staff.

Bussetta watches from the distance of the hallway; after the
old man has been moved, he quietly walks down the hallway to
the room.

HIS VIEW

A nurse sits in the room in attendance; Hyman Roth is
asleep, his mouth wide open, breathing noisily.

VIEW ON BUSSETTA

hears footsteps, quickly steps away from the door, and into
another room.

Some nurses and attendants speak to the nurse in the room in
Spanish; one has brought a small bottle of wine, and
obviously they are inviting the nurse to have a New Year's
toast with them.  They laugh; and the nurse steps away from
the room for a moment.

Bussetta moves slowly back into the room, alone with the
helpless Roth.

INT. PRESIDENTIAL PALACE - FULL VIEW ON THE GUESTS - NIGHT

seeing in the New Year; a great banner is hoisted up in
Spanish, welcoming 1959.

Hands are shaken; kisses exchanged.

MED. CLOSE VIEW

Michael and Fredo in an embrace; they kiss one another.

				MICHAEL
		I've arranged for a plane; we're
		going to Miami in an hour.  Try not
		to make a big thing of it.

He kisses his brother once again.

				MICHAEL
			(Sicilian)
		I know it was you, Fredo.  You've
		broken my heart.

Slowly, understanding, Fredo backs away from his brother,
taking the kiss another way.

A little distance away, Major Leon notices an old woman, one
of the President's maids, moving across the alcove, carrying
her suitcases.

				LEON
		What a pity; she's crying.  Must
		have been fired, and she's been
		with the President's family for
		twenty years.

EXT. HAVANA STREETS - NIGHT

The gathered crowd joyously welcomes the New Year.  We
notice the continual military movement.

MED. VIEW

A family surreptitiously leaves their home, carrying
suitcases and belongings.

INT. ROTH'S HOSPITAL ROOM - NIGHT

Bussetta raises a hospital pillow, and easily begins to
smother the thin old man, who can barely struggle.

OUT IN THE HALL

A detachment of military move quickly, accompanied by some
of Roth's men, as though they have important news that must
be dealt with.

They pass the small group of aides and nurses welcoming the
New Year.

Seeing them, the nurse assigned to him, puts down her glass
and moves quickly to the room.

She opens the door, and lays bare the sight of Bussetta
smothering Roth.  Bussetta turns quickly; and one of the
military takes out his pistol and shoots several times at
his head.

INT. PRESIDENTIAL PALACE - NIGHT

The entire reception has been disrupted for an announcement;
all the guests in their formal dress and evening gowns,
standing with frightened faces like first class passengers
on a doomed ship.  The President himself, his back to our
VIEW, is making an announcement in Spanish.  While he
speaks, we notice continuous movement of his personal staff,
carrying suitcases and possessions.

				PRESIDENT
		...Because of serious setbacks of
		our troops in Guantanamo and
		Santiago, we feel reluctantly, that
		we must leave the Capital at once.
		Myself and my family must bid you
		goodbye, and good fortune.  We will
		go directly to Ciudad Trujillo.

The crowd is stunned; already whispers are moving throughout
the guests.

The only one who is not completely taken off guard is
Michael, who quietly steps back, and disappears from the room.

				PRESIDENT
		...My only regret is that there
		could not have been more warning...
		As my last official act as
		President, I hereby appoint a
		provisional government with Dr.
		Carlos M. Piedra, as its President.

By now, there is only one thought among the guests: how can
they get out, and with what.

EXT. PRESIDENTIAL PALACE - NIGHT

We see evidence of the confusion at this late hour; already
cars are beginning to move; people leaving the Palace in
haste.  Michael moves quickly toward his car.  He sees
Fredo, watching him in fear.

				MICHAEL
		Come with me.  It's your only way
		of getting out!

VIEW ON FREDO

Terrified of his brother, and what he knows; Fredo backs
away into the growing noise and confusion of the crowd.

VIEW ON MICHAEL

Finally, he has to step into the car and it roars off.

EXT. HAVANA STREETS - NIGHT

Rebel cars with loudspeakers have already picked up the news
that Batista has conceded...this throws the crowds already
gathered for the New Year into cheers of joy.

They harass a wealthy family who are trying to get away in
their car.

The people pull them out of the car, opening their suitcases,
out of which spill piles of cash and jewelry into street.

Michael's car makes its way as the crowd cheers: "El animale
se fue!"

EXT. THE UNITED STATES EMBASSY - MED. VIEW - NIGHT

Crowds of panicked and frightened tourists, and Batistianos
are trying to get to the safety of the Embassy with the
families and possessions.

We see Geary, and some of the Americans we had met, working
their way through the crowds, shouting that they are
Americans in order to get preference on the line.  Often
that declaration brings 'boos' from the crowds.

Sometimes the joyous Cubans will let a family through, but
again, taking away the suitcases, rich leather, filled with
money and valuables.  Money seems to be stuffed everywhere.

EXT. THE YACHT CLUB - NIGHT

All forms of private transportation are jammed with people
trying to get out, holding cash in their hands for anyone
with a yacht or small boat to get them to Florida.

A car pulls up; and we see Sam Roth, Terri Roth and some of
their men, carry the sickly, but still alive Hyman Roth to a
private cruiser which is protected by men with machine guns.

Within seconds, they are on their way to Miami.

EXT. THE PRIVATE AIRPORT - NIGHT

Things are no different at the airport; where anything that
can fly is being jammed with refugees and their money.

A wealthy family is arguing with the pilot of a fast
airplane; trying to force cash on him, and his family into
the plane.  The PILOT steadfastly refuses, although checking
his watch, as though his passengers are late.  He speaks
only English.

				PILOT
		No, this is a private plane.  No,
		this plane is taken.

Finally Michael's Mercury pulls up, and Michael approaches
the Pilot.

				MICHAEL
		He isn't here.

				PILOT
		We've got to leave, they'll take
		this thing apart.

				MICHAEL
		All right.  Go now.

The Pilot lets Michael in, as the Cuban screams curses at
them, and begins searching for another plane for his family.

INT. THE PLANE - VIEW ON THE PILOT - NIGHT

as the propeller turns over.

EXT. THE AIRPORT - FULL VIEW - NIGHT

Groups of the cheering, celebrating Cubans sing
"Guantanamera," now as a song of triumph.

INT. THE PLANE - MOVING VIEW - MICHAEL - NIGHT

Closer to him, his personal and business life caught in the
middle of history.

EXT. NEW YORK STREET - MED. VIEW - DAY (1920)

He stops to pick out some choice oranges and peaches from a
fruit stand.  Then he reaches into his pocket for change.

				VENDOR
		No, no.  It is my pleasure to make
		this a gift.

CLOSE VIEW ON VITO

				VITO
		You are kind.  If ever I can do
		something for you, in return,
		please come to me.

INT. VITO'S TENEMENT - DAY

Despite his new position of 'respect,' there is little
changed about his home.  Only that they have lived there a
while now, and the rooms are fuller with the inevitable
possessions a young family acquires.

He kisses his wife, who seems a big apprehensive.  He shows
her the fruit; and from her reaction knows she has something
on her mind.

				VITO
			(Sicilian)
		What is it?

				CARMELLA
			(Sicilian)
		Come...

They step into the tiny parlor, where we see an older woman,
waiting nervously.

				CARMELLA
		The Signora is a friend of mine.
		She has a favor to ask of you.

				VITO
			(Sicilian)
		Why do you come to me?

				SIGNORA COLOMBO
			(Sicilian)
		She told me to ask you.

He seems surprised; looks to his wife.

				CARMELLA
		She is having some trouble.  Her
		landlord has received complaints
		because of her dog.  He told her to
		get rid of it, but her boy loved
		it, so they tried to hide it.  When
		the landlord found out, he was so
		angry, he ordered her to leave.
		Even if she truly will let the dog
		go.

				SIGNORA COLOMBO
			(Sicilian)
		He said he would have the police
		put us out.

				VITO
			(thoughtfully)
		I can give you some money to help
		you move, is that what you want?

				SIGNORA COLOMBO
		My friends are all here; how can I
		move to another neighborhood with
		strangers?  I want you to speak to
		the landlord to let me stay.

Vito nods to the frightened old woman.

				VITO
		It's done then.  You won't have to
		move; I'll speak to him tomorrow
		morning.

Carmella breaks into a smile; which her husband des not
acknowledge.

The old woman starts to leave the room; but she is not
convinced.

				SIGNORA COLOMBO
		You're sure he'll say yes, the
		landlord?

				VITO
		I'm sure he's a good-hearted fellow.
		Once I explain how things are with
		you, I'm sure he'll take pity on
		your misfortunes.  Don't let it
		trouble you any more.
			(as he shows her out)
		Guard your health, for the sake of
		your children.

EXT. TENEMENT BLOCK - DAY

SIGNOR ROBERTO, a pompous, rather well-dressed Patrone
angrily walks down the steps of one of his tenement buildings.

He carries a check list, and makes marks with a pencil
concerning the condition of his various buildings; a broken
window here, some missing tile there.  He bends over to pick
up some garbage left by a thoughtless tenant, muttering to
himself, when he sees the shoes and legs of a young worker.

				VITO (O.S.)
		Signore Roberto...

He rises to be face to face with a polite Vito Corleone.

				VITO
		The friend of my wife, a poor widow
		with no man to protect her, tells
		me that for some reason she has
		been ordered to move from your
		building.  She is in despair.  She
		has no money, she has no friends
		except those that live here.

Signor Roberto brusquely answers, and continues on his way.

				ROBERTO
		I have already rented the apartment
		to another family.

MOVING SHOT ON THE TWO

				VITO
		I told her I would speak to you,
		that you are a reasonable man who
		acted out of some misunderstanding.
		She has gotten rid of the animal
		that caused all the trouble, so why
		shouldn't she stay.  As one Italian
		to another, I ask you the favor.

				ROBERTO
		I've already rented it; I cannot
		disappoint the new tenants.  They're
		paying a higher rent.

				VITO
		How much more a month?

				ROBERTO
		Eh...
			(we sense he is lying)
		Five dollars more.

Vito reaches into his pocket, and takes out a roll of bills.

				VITO
		Here is the six month's increase in
		advance.  You needn't speak to her
		about it, she's a proud woman.  See
		me again in another six months.
		But of course, you'll let her keep
		her dog.

				ROBERTO
		Like hell!  And who the hell are
		you to give me orders.  Watch your
		manners or you'll be on your
		Sicilian ass in the street there.

Vito raises his hands in surprise; his voice is reasonable.

				VITO
		I'm asking you a favor, only that.
		One never knows when one might need
		a friend, isn't that true?  Here,
		take this money as a sign of my
		good-will, and make your own
		decision.  I won't quarrel with it.
			(he puts the money in
			Roberto's hand)
		Do me this little favor, just take
		it and think carefully.  Tomorrow
		morning if you want to give me the
		money back, by all means do so.  If
		you want the woman out of your
		house, how can I stop you?  It's
		your property, after all.  If you
		don't want the dog in there, I can
		understand.  I dislike dogs myself.
			(he pats Roberto on
			the shoulder)
		Do me this service, eh?  I won't
		forget it.  Ask your friends in
		this neighborhood about me, they'll
		tell you I'm a man who believes in
		showing his gratitude.

Without a word more, Vito leaves a hypnotized Roberto
standing in front of the tenement, his hand clasping the
money.

EXT. NEIGHBORHOOD STREET - DAY

A thin young man, almost gawky, walks down the street in
this Italian neighborhood, his name is HYMAN SUCHOWSKY.

He carries his tools as he comes home from work.  He is
pursued and tormented by a couple of Italian youths, about
his own age, eighteen.

				ITALIAN BOY
		Kid, where do you live?

				ANOTHER
		Where'd you get those nigger lips?

He tries not to be intimidated; finally one of the boys,
steps in front of him and stops him.

				ITALIAN BOY
		Say 'bread' in Italian.

				ANOTHER
		He dunno.

				ITALIAN BOY
		Go on; how do you say 'bread' in
		Italian?  If you're from the
		neighborhood, you should know how
		to say 'bread' in Italian.

An amused Peter Clemenza steps forward from a local coffee
house, to preside over the fuss.  He's a 'big' man in the
neighborhood, and loves a fight.

				CLEMENZA
		What's up?

				ITALIAN BOY
		This kid lives around here, but he
		can't say bread in Italian.

				CLEMENZA
		That's 'cause he's Jew.  Look at
		those pregnant lips!

He giggles at his own joke.

				ITALIAN BOY
		Are you a Jewboy?

The boy doesn't answer, tries to keep going.

				ITALIAN BOY
		Well, if you're not a Jew, say
		'bread' in Italian.  See, he can't.

And with that, he rounds a blow squarely to the boy's face,
sending him sprawling to the cement, his tools flying with a
clatter.

The other Italian immediately joins in with a few kicks to
the boy's stomach.  Hyman tries to fight back; grabs a hold
of his tormentor's foot, and brings him down on the cement
as well.  For a moment, they are rolling around on the
sidewalk, two against one, Hyman taking the worst of it.

				CLEMENZA
		Alright, alright, cut it out.

				SECOND ITALIAN
		What for?  He killed Jesus Christ!

Clemenza pulls him off, and kicks him in the ass.

				CLEMENZA
		I said cut it out!
			(to the beaten kid)
		What's your name?

				HYMAN
		Hyman Suchowsky.

				ITALIAN BOY
		I don't believe it.  In our
		neighborhood, with a name like that!

				CLEMENZA
		What are those tools?  You work on
		cars?

				HYMAN
		Yeah.

				CLEMENZA
		Maybe I know how you can make a
		couple of extra bucks working as a
		mechanic.

The boy seems agreeable.

				CLEMENZA
		But you gotta know how to keep your
		mouth shut, and fer Chrissakes, get
		rid of that name.  I'll call you
		Johnny Lips.
			(he giggles at his
			own humor again)
		Come on...

He leads the boy down the street, whispering to him, on the
side:

				CLEMENZA
		Bread in Italian is pane.  P-A-N-E,
		pane.  Don't forget.

INT. NEW GENCO WAREHOUSE - DAY

A newly acquired warehouse, stocked with cases of the new
product "GENCO PURA" olive oil.  It is the beginning of a
new business, in the American tradition.  Now they have one
rattling old truck, and a few stock boys.

Genco has become the accountant-business manager, based on
the experience working with his father.  But it is clear,
that Vito is the leader, and undisputed 'President' of the
new enterprise.

Genco moves through the darkness of the warehouse, to the
small divided area that Vito uses as his office.

				GENCO
			(Sicilian)
		The 'patrone' is here.

				VITO
		Chi?

				GENCO
		Roberto.  Who owns the 'rat-holes.'

Vito nods that he will see him; and soon Roberto enters, on
tiptoe, his hat in his hand, and in a apologetic voice.

				ROBERTO
		Excuse me, I hope I am not a
		disturbance, Don Corleone.

				VITO
		Yes.

				ROBERTO
		What a terrible misunderstanding.
		Of course, Signora Colombo can stay
		in the flat.  Who were those
		miserable tenants to complain about
		noise from a poor animal...when
		they pay such low rent.

Then abruptly, he puts the roll of money on Vito's table,
and steps back a respectful distance.

				ROBERTO
		Your good heart in helping the poor
		widow has shamed me, and I want to
		show that I, too, have some
		Christian charity.  Her rent will
		remain what it was.

				VITO
		What was that?

				ROBERTO
		In fact, reduced, bu five dollars!

Vito embraces him warmly.

				VITO
		I accept your generosity...

				ROBERTO
		I won't keep you another minute...

He quickly takes his leave, bowing several times, and then
makes it back to the safety of the warehouse; he sighs,
deflates his lungs, and mops his brow; his bones have turned
too jelly with fear at his narrow escape.  He all but runs
out of the warehouse.

Genco laughs as he watches.

				GENCO
		We won't see him for weeks!  He'll
		stay in bed in the Bronx!

Clemenza has been waiting with his new mechanic.  We notice
the subtle difference in the way he treats Vito.  He is no
longer a junior apprentice in their petty crimes; but an
imposing leader.

				CLEMENZA
		This kid is good with cars; he
		kiijed at the truck, and says he
		can keep it going.

Vito looks over the lanky young man.

				CLEMENZA
		What's your name?

				HYMAN
		Suchowsky.  Hyman Suchowsky.

				CLEMENZA
		He's gonna dump that; I call him
		Johnny Lips.

				VITO
		Who is the greatest man you can
		think of?

				CLEMENZA
		Go on, answer him when he talks to
		you.  Tell him: Columbus, Marconi...
		Garibaldi.

				HYMAN
		Arnold Rothstein.

				VITO
		Then take that as your name: Hyman
		Rothstein.

Genco is out in the alley; he calls out with glee.

				GENCO
		Vitone!  Look at this!

Vito moves out to the smiling Genco; Clemenza and the newly
christened Hyman Rothstein follow a distance behind.

EXT. THE ALLEY - DAY

Genco stands beaming, as two workers raise up high, the
freshly painted sign: "GENCO OLIVE OIL COMPANY."

				GENCO
			(enthusiastically)
		God bless America!  We're in
		business!

The young men watch as the sign is hoisted into place.  OUR
VIEW goes from one to the other: Clemenza, Genco, Vito and
Hyman Rothstein.

							DISSOLVE TO:

INT. SENATE CAUCUS ROOM - MED. CLOSE VIEW - DAY

Willy Cicci, Pentangeli's associate and bodyguard takes a
drink of water.

				SENATOR (O.S.)
		Mr. Cicci.  From the year 1927 to
		the present time, you were an
		employee of the "Genco Olive Oil
		Company."

				CICCI
		That's right.

				SENATOR (O.S.)
		But in actuality, you were a member
		of the Corleone Crime organization.

				CICCI
		The Corleone Family, Senator.  We
		called it, "The Family."

				SENATOR (O.S.)
		What position did you occupy?

				CICCI
		At first, like everybody, I was a
		soldier.

VIEW ON SENATOR KANE

A thin, angular Baptist with a Mid-Western accent.

				SENATOR KANE
		What is that exactly?

				CICCI
		A button.  You know, Senator.

				SENATOR KANE
		No, I don't know, explain that
		exactly.

				CICCI
		When the boss says push the button
		on a guy, I push the button, see,
		Senator?

The Senators treat Cicci with a surface courtesy, as if he
were a curious kind of animal, not really human.  Cicci
reacts to this by being even more brutally forthright than
he has to be, to show his contempt for what he considers a
hypocrisy.

The VIEW ALTERS from Senator Kane to the Committee's
attorney, Mr. Questadt.

				QUESTADT
		You mean you killed people at the
		behest of your superiors?

				CICCI
		That's right, counsellor.

				QUESTADT
		And the head of your family was
		Michael Corleone.

				CICCI
		Yeah, counsellor, Michael Corleone.

				SENATOR KANE
		Did you ever get such an order
		directly from Michael Corleone?

				CICCI
		No, Senator, I never talked to him.

				SENATOR SAVOY
			(very autocratic,
			deep South,
			gentlemanly man)
		There was always a buffer, someone
		in between you who gave you orders.

				CICCI
		Yeah, a buffer, the Family had a
		lot of buffers.

EXT. THE TROPICANA IN VEGAS - MED. VIEW - DAY

A limousine pulls up at a private area near the side of the
hotel.  Michael exits the limousine followed by Hagen and
Neri.

				MICHAEL
		Do you think they have somebody to
		back up Cicci?

				HAGEN
		No.  But if they do have somebody,
		you'll do three years for perjury
		if you give them so much as a wrong
		middle name.

Michael smiles to him, but it's a cold, deadly smile.

				HAGEN
		Michael, take the Fifth all the
		way, that way you can't get into
		trouble.

EXT. PRIVATE BALCONY OF CORLEONE APARTMENT AT TROPICANA - DAY

A Corleone bodyguard waits outside on the balcony overlooking
the pool area.  Through the translucent draperies, we see a
grouping of me.

INT. CORLEONE APARTMENT AT THE TROPICANA - DAY

Michael, Hagen, Neri and Rocco are seated in this luxury in
the hotel.  Michael sits in a comfortable chair in his
apartment.  Neri comes and brings him a drink without
asking, but Michael refuses it.

				MICHAEL
		Al, get me a wet towel.  Does Kay
		know I'm back?

Hagen nods.

				MICHAEL
		Did the boy get something from me
		for Christmas?

				HAGEN
		I took care of it.

				MICHAEL
		What was it, so I'll know.

				HAGEN
		A little care he can ride in with
		an electric motor.

Neri comes around with a wet face towel, which Michael uses
to cool his eyes.  He puts the used towel down on the table.

				MICHAEL
		Fellas, can you wait outside a
		minute?

They know what he means and leave the apartment, going out
to the balcony where we can see them but they cannot hear.
Only Hagen remains.

				MICHAEL
		Where's my brother?

				HAGEN
		Roth got out on a private boat.
		He's in a hospital in Miami.  Had a
		stroke but he's recovered okay.
		Bussetta's dead.

				MICHAEL
		I asked about Fredo?

				HAGEN
		The new government arrested him,
		held him for a couple of days with
		a lot of the other casino people,
		including Roth's brother, Sam.  The
		American Embassy arranged flights
		for citizens; I'm not sure, but I
		think he's somewhere in New York.

				MICHAEL
		I want you to reach Fredo.  I know
		he's scared, but have one of our
		people reach him.  Assure him that
		there will be no reprisals.  Tell
		him that I know Roth misled him.

				HAGEN
		My information is that Fredo
		thought it was a kidnapping.  Roth
		assured him nothing would happen to
		you.

				MICHAEL
			(indicating Rocco and
			Neri on the balcony)
		They can come in now.

				HAGEN
		Wait... there's something else.

				MICHAEL
		Alright.

Hagen pauses; doesn't know how to begin.

				MICHAEL
			(impatiently)
		Go on, tell me.

				HAGEN
		Kay had a miscarriage; she lost the
		baby.

After a moment:

				MICHAEL
		Was it a boy or a girl?

				HAGEN
		Mike, at three and a half...

				MICHAEL
		What is it, can't you give me
		straight answers anymore!

				HAGEN
		It was a boy.

				MICHAEL
		And Kay...she's all right?

				HAGEN
		She took the Senate Investigation
		worse.

				MICHAEL
		Does she blame it on me?  The baby?

				HAGEN
		I don't know.

EXT. TAHOE ESTATE - DAY

The first snow of the New Year has fallen; the trees are
bare, and there is hush all over this part of the Sierras.
Michael is driven in his car, looking out at the familiar
sight of the home he has been forced to be away from.

VIEW ON MICHAEL

looking out from his window.  The last time he had seen the
estate it was warm, and the trees were full.

MOVING VIEW

approaching the great stone gates; closed.  The bodyguards
are not readily visible, but they are there.  The iron gates
are opened, and one of the men makes a simple nod of respect,
as the car pulls in.

NEW VIEW

Inside the estate, the private roads have been freshly
plowed, and occasionally a worker will pause to watch the
car as it passes.

The Grandchildren are in school now, and so the estate is
especially quiet.  Although there are signs that children
live here; a bicycle, a sled, a swing and gymnastic set, wet
and with a rim of snow still on it.

INT. MICHAEL'S HOUSE - VIEW FROM INSIDE THE HOUSE - DAY

to the outside, where Michael walks slowly.  He stops and
looks at a little Italian red sportscar made for children.

NEW VIEW

The front door opens, and Michael enters his own home.  It
is very quiet, no one is at home to greet him.  He can see
the evidence of his family; things his wife and his children
have been using, and left on a sofa or a table.

He moves toward his and Kay's bedroom, where we can HEAR the
SOUND of a sewing machine running.

Quietly he opens the door.

MICHAEL'S VIEW

into the bedroom.  Kay is sitting by the window, lit by the
cold afternoon light, at work with her sewing machine.  She
hasn't noticed that he's in the room yet, and goes on with
her work.

VIEW ON MICHAEL

stands there a moment, watching, not making a sound.  And
then without a word, he steps back, and closes the door, so
that she doesn't see him.

VIEW FROM INSIDE THE HOUSE

onto Michael, moving outside, walking through the snow, he
moves to the house next to his own.

INT. CONNIE'S HOUSE - DAY

This is the house where Mama lives with Connie's children,
Connie so rarely is there.

He steps in; his mother is asleep in a chair in the living
room.  He moves to her, and bends low, whispers.

				MICHAEL
		Mom... Mom...

She opens her eyes, which are red and small with age.

				MICHAEL
			(Sicilian)
		It's Michael.  How are you, Mom?

				MAMA
			(Sicilian)
		I'm alright.  Will you stay home
		for awhile?

				MICHAEL
			(Sicilian)
		There are still things I have to do.

				MAMA
			(Sicilian)
		Well, we can all have a nice dinner
		together tonight.  How are your eyes?

				MICHAEL
		Alright.  They bother me once in
		awhile.
			(a pause as he thinks)
		Tell me, when Pop had troubles...
		did he ever think, even to himself,
		that he had gone the wrong way;
		that maybe by trying to be strong
		and trying to protect his family,
		that he could... that he could...
		lose it instead?

				MAMA
			(Sicilian)
		You talk about the baby.  She can
		have another baby.

				MICHAEL
			(Sicilian)
		No, I meant lose his family.

				MAMA
			(as best she ever
			understood it)
		Your family?  How can you ever lose
		your family?

				MICHAEL
			(almost to himself)
		But times are different...

FULL VIEW IN ROOM - MICHAEL AND HIS MOTHER

Quietly we HEAR the music of a small band playing an Italian
march.  From the orchestration, we know it is from the past.

							DISSOLVE TO:

EXT. TRAIN STATION AT CORLEONE - DAY

Vitone and his young family: Mama, Santino, Fredo and the
baby Michael are met at the small station in Sicily by
friends, and Mama's relatives.  There is a small band,
playing for the occasion.  A small man has brought a motor
car to pick the family up; and there are certain dark men,
with shotguns slung over their shoulders to preside over the
occasion.

The family is helped into the car; the luggage is packed on
the roof, and the car drives off.  The second car, with
bodyguards following.

EXT. DON TOMASINO'S VILLA OUTSIDE OF CORLEONE - DAY

The villa is bloomed with flowers and DON TOMASINO at this
point is a man in his late twenties.  He embraces Vitone and
pats the heads of his children, and leads them all into the
garden.

INT. THE VILLA - SUMPTUOUS MED. VIEW - LATE DAY

A sumptuous table is set for the visiting family from
America.  There is a warm atmosphere as Vito, his wife and
children eat.  Tomasino and his family received presents
from Carmella and to Tomasino's mother, and gifts are given
to all of the children.

All typically American representing some of the prosperity
and interests in the consumer goods that followed a great war.

EXT. CORLEONE PLAZA - DAY

The family exits the church on the plaza of the town.  Vito
shakes hands warmly with the priest.

INT. VILLAGE COTTAGE - NIGHT

The door is open -- the footsteps of a man enter the room.
We follow these footsteps without quite knowing to whom they
belong.  They lead us to a bed, where we see asleep an OLD
MAN.  He sleeps in his undershirt and is sweating, covered
by mosquito netting.

VIEW ALTERS

and we realize that it is young Vito looking at the MAN.

We remember that the man is MOSCA, one of three men, who
almost twenty years before had hunted down Vito when he was
a boy.  With lightning speed, Vito slashes through the
mosquito netting with a knife.  And with the movement
precise as a butcher's he disembowls this man.

EXT. OLIVE OIL WAREHOUSE - FULL VIEW

Vito has brought his wife and children to see the Olive Oil
Depot which is the link to his New York importing business.
They go inside.

INT. OLIVE OIL WAREHOUSE - DAY

They are led by one of Vito's associates through rows and
rows of large vats of olive oil.  Vito very proudly shows
his associates in Italy the olive oil can that will be used
in the United States.  They all stand around at the link to
their new importing business and share a toast of wine.

EXT. THE BAY - DAY

A team of Sicilian fisherman are at work mending their nets.
One sings accompanied by a guitar.

VIEW MOVES TO ONE OF THE OLD FISHERMAN

He is recognized as the second of the men who had hunted
Vito down.  STROLLO.  As he walks we notice there is a
figure that is moving through the drying sails and barrels,
it is Vito.  He moves quietly, stepping up behind the old
man.  In an instant, he has thrown a garrote around his
throat, twisting it tight, so that there is very little
sound.

Then, almost silently dragging him through the space hidden
by the drying sails.

EXT. THE IMPRESSIVE ESTATE OF DON FRANCESCO - DAY

We see an old car approach.  Its driver is the young Tomasino.
Sitting in the car with him is Vito.

The car stops at the gates, and an old guard sees and
recognizes Tomasino, opens the gates allowing them to enter.

MED. VIEW

on an almost decrepit DON FRANCESCO.  He must be in his
early nineties, sitting as powerful and as impressive as
ever, in his throne-like chair from which he manages the
power as the Mafia Chieftan of this village.  Young Don
Tomasino is speaking.

We notice in a little distance in the rear, there are some
younger shepherds with shotguns thrown over their shoulders.

				TOMASINO
			(Sicilian)
		Don Francesco, if you will honor
		me, by allowing me to introduce my
		associate in America, in New York.
		His name is Vito Corleone.

The old man and his eyes glance up at a notion of a man who
has taken the name of this town as his name.

				TOMASINO
		We will supply him with olive oil
		exclusively in the town of Corleone.
		His company is called the "Genco
		Olive Oil Company." Here we have
		brought you an indication of how he
		will sell the product.

Tomasino respectfully puts a can of olive oil where the old
man can look at it.  The old man nods, accepting the notion
of this business.

				TOMASINO
			(Sicilian)
		We have come to ask your blessing
		and permission to continue this
		enterprise.

				DON FRANCESCO
			(Sicilian)
			 (in a shrill, high,
			raspy voice)
		Where is this young man?

				TOMASINO
		He is right here, standing next to
		me, Don Francesco.

				DON FRANCESCO
			(Sicilian)
		Have him come closer, I can't see
		very well.

Vito takes those several steps, so that he is standing right
in front of the old man.

VIEW ON DON FRANCESCO

looking up, squinting against the sun.

DON FRANCESCO'S VIEW

Strangely backlit, almost blurry image of the young man from
America.

				DON FRANCESCO
			(Sicilian)
		What is your name?

				VITO
			(Sicilian)
		Vito Corleone.

				DON FRANCESCO
			(Sicilian)
		You took the name of this town, eh?
		What was your father's name?

				VITO
			(Sicilian)
		Antonio Andolini.

CLOSE VIEW ON THE OLD MAN

The recognition of the name throws a shudder through him.
It is as though he recognizes that this is the boy; the son
of his old enemy, whom he had killed, and whose sons he had
tried to wipe out.  The old man raises his feeble hands
signalling his guard, and in his weak voice, he shouts:

				DON FRANCESCO
			(Sicilian)
		Kill him!  Kill him!

But he is too late; Vito steps forward.

				VITO
			(Sicilian)
		In the name of my Father, and my
		Brother...

And uses the knife, ritualistically plunging it into the old
man's belly, and then up to his throat, which is severed.

VIEW ON TOMASINO

has drawn his pistol and quickly shoots one of the guards,
helping Vito to escape back into the motor car.

VIEW ON A GUARD

raising his shotgun.

VIEW ON THE MOTOR CAR

Just as Tomasino is about to get into the car, the shotgun
is fired, and he is hit in the legs.

Vito manages to pull him up into the car, and they make
their escape.

EXT. RAILROAD STATION IN CORLEONE - DAY

Some of the townspeople have come bringing flowers and gifts
for Vito and his family.

His wife is radiant with the flowers given her.

The train has arrived and the crowd shout "Ciao, come back
soon."

THE VIEW ALTERS

revealing his good friend Tomasino, waving from his
wheelchair.

VIEW ON VITO

and his wife.  She holds up the baby Michael, and helps him
wave his hand.

INT. SENATE CAUCUS ROOM - MED. CLOSE VIEW ON MICHAEL - DAY

				SENATOR KANE (O.S.)
		Are you the son of Vito Corleone?

				MICHAEL
		Yes.

				SENATOR KING
		Did he use at times an alias?  Was
		this alias in certain circles
		GODFATHER?

				MICHAEL
		It was not an alias.  GODFATHER was
		a term of affection, used by his
		friends, one of respect.

				SENATOR WEEKLER
			(Senator from New
			York, very smooth,
			partly liberal,
			Tammany Hall)
		Let me agree with that.  Many of my
		constituents are Italian and have
		been honored with that certain
		friendship by my close Italian
		friends.  Up to this point before I
		have to leave this hearing to join
		my own committee, let me say, that
		this hearing on the Mafia is in no
		way a slur on the Italians by the
		Senate; nor is it meant to be; nor
		will I allow it to be.  Italian
		Americans are the hardest working,
		most law abiding patriotic Americans
		of our country.  It is a shame and
		a pity that a few rotten apples
		give them a bad name.  We are here
		to weed those rotten apples out of
		the vast healthy barrel of Italian
		Americans, who are one of the
		backbones of our country.

There is a pause for a while, while the New York Senator
poses for the TV cameras and leaves the hearing so that he
will not be associated with hearing the rough stuff.

				SENATOR KANE
		I'm sure we all agree with our
		esteemed colleague.  Now, Mr.
		Corleone, you have been advised as
		to your legal rights.  We have had
		testimony from a preceding witness
		who states you are head of the most
		powerful Mafia family in this
		country.  Are you?

				MICHAEL
		No.

				SENATOR KANE
		This witness has testified that you
		are personally responsible for the
		murder of a New York Police Captain
		in the year 1947 and with him a man
		named Virgil Sollozzo.  Do you deny
		this?

				MICHAEL
		I deny his every charge.

				SENATOR KANE
		Is it true that in the year 1950
		you devised the murder of the heads
		of the Five Families in New York,
		to assume and consolidate your
		nefarious power?

				MICHAEL
		That is a complete falsehood.

				SENATOR KANE
		Is it true that you own a
		controlling interest in three of
		the major hotels in Las Vegas?

				MICHAEL
		That is not true.  I own some stock
		in some of the hotels, but only
		very small amounts.  I also own
		some American Telephone and IBM
		stock.

Michael had checked this point with Hagen, before answering,
and then once again after the answer.

				SENATOR ROGERS
		Why is it necessary for your
		counsel to advise you on that
		question?

				MICHAEL
		Senator, I've observed the head of
		General Motors before a Senate
		Committee, and his lawyer whispered
		in his ear.  That was not commented
		upon in the way you have just done.

				SENATOR KANE
		Mr. Corleone, do you have any hotel
		interests in the state of Arizona?
		Or any gambling interests in that
		state?

				MICHAEL
		I do not.

				SENATOR KANE
		Do you have interests or control
		over gambling and narcotics in the
		state of New York.

				MICHAEL
		I do not.

A pause.  Silence, as the Chairman whispers something to his
assistant.

Tom Hagen takes a paper out of his briefcase, and addresses
the Chair.

				HAGEN
		Senator, my client would like to
		read a statement for the record.

				SENATOR KANE
		I don't think that's necessary.

				HAGEN
		Sir, my client has answered every
		question asked by this committee
		with the utmost cooperation and
		sincerity.  He has not taken that
		Fifth Amendment as it was his right
		to do, and which because of the
		extreme legal complexity of this
		hearing, counsel advised him to do.
		So, I think in all fairness this
		committee should hear his statement
		and put it in the record.

				SENATOR KANE
		Very well.

At this point Senator Rogers contemptuously walks out of the
hearing room.

				MICHAEL
			(reading)
		In the hopes of clearing my family
		name, in the sincere desire to give
		my children their fair share of the
		American way of life without a
		blemish on their name and background
		I have appeared before this
		committee and given it all the
		cooperation in my power.  I consider
		my being called before this
		committee an act of prejudice to
		all Americans of Italian extraction.
		I consider it a great dishonor to
		me personally to have to deny that
		I am a criminal.  I wish to have
		the following noted for the record.
		That I served my country faithfully
		and honorably in World War II and
		was awarded the Distinguished
		Service Cross for actions in
		defense of my country.  That I have
		never been arrested or indicted for
		any crime whatsoever... that no
		proof linking me to any criminal
		conspiracy, whether it is called
		Mafia or Cosa Nostra or whatever
		other name you wish to give, has
		ever been made public.  Only one
		man has made charges against me,
		and that man is known to be a
		murderer, arsonist and rapist.  And
		yet this committee had used this
		person to besmirch my name.  My
		personal protest can only be made
		to the people of this country.  I
		can only thank God that in this
		country we have a legal system and
		courts of law to protect innocent
		people from wild accusation.  I
		thank God for our democratic due
		process of Law that shields me from
		the false charges made by this
		committee's witness.  I have not
		taken refuge behind the Fifth
		Amendment, though counsel advised
		me to do so.  I challenge this
		committee to produce any witness or
		evidence against me, and if they do
		not, I hope they will have the
		decency to clear my name with the
		same publicity with which they have
		now besmirched it.  I ask this
		without malice, in the interests of
		fair play.

The television cameras have documented this moment, as Hagen
hands the document over to the committee lawyer.

				SENATOR ROGERS
		We are all impressed.  The committee
		will now recess over the weekend.
		However, it will continue Monday
		morning, at eleven a.m.  At that
		time, this committee will then
		produce a witness directly linking
		Mr. Corleone to the charges we have
		made.  And then, Mr. Corleone may
		very well by liable for indictments
		of perjury.  However, this document
		will be made a matter of record.

EXT. ARMY POST - DAY

An army post somewhere in the East.  It is safely guarded.

INT. HOUSE ON THE POST - DAY

where Pentangeli is being held by his constant companions,
the two FBI MEN.

				PENTANGELI
		Ten to one shot, you said.  Ten to
		one shot in my favor, and I lose.

				FBI MAN #1
		Get a good night's sleep.  We got a
		new suit, new shirt, new tie, and
		I'm going to shave you myself.
		Tomorrow we want you to look
		respectable for fifty million of
		your fellow Americans.

				PENTANGELI
		My life won't be worth a nickel
		after tomorrow.

				FBI MAN #1
		We have a special home for you for
		the rest of your life.  Nobody gets
		near you.  You're not going any
		place.

				PENTANGELI
		Yeah, some deal I made.

				FBI MAN #2
		You live like a king.  You'll be a
		hero.  You'll live better in here
		than most people on the outside.

				PENTANGELI
		Some deal.
			(pause)
		I just wish Mike had took the Fifth.

				FBI MAN #1
		Why'd you do it, Frankie?  After
		all these years, why'd you turn
		against him?

				PENTANGELI
		I didn't turn against nobody; he
		turned against me.

EXT. THE BOATHOUSE ALCOVE - DAY

A somewhat frightened Fredo Corleone sits in the easy chair
overlooking the lake in this canopied section of the
boathouse.  Rocco sits with him.

INT. BOATHOUSE - DAY

Michael is in the dark room with Hagen and Neri.

				MICHAEL
		How did they get their hands on
		Pentangeli?

				HAGEN
		Roth engineered it, Michael.  He
		made Pentangeli think you hit him.
		Deliberately letting him get off
		alive.  Then the New York detectives
		turned Frankie over to the FBI.  My
		informants say he was half dead and
		scared stiff -- talking out loud
		that you had turned on him and
		tried to kill him.  Anyway, they
		had him on possession, dealing in
		heroin, murder one and a lot more.
		There's no way we can get to him
		and you've opened yourself to five
		points of perjury.

				NERI
		They've got him airtight.  He's in
		a military base, twenty-four hour
		guards.  Trying to kill him is like
		trying to like the President --
		it's impossible.

				MICHAEL
		What does Fredo know?

				HAGEN
		He says he doesn't know anything,
		and I believe him.  Roth played
		this one beautifully.

				MICHAEL
		Alright.  I'm going to go outside
		and talk to Fredo.

EXT. BOATHOUSE FOYER - DAY

Fredo sits on the couch.  When Rocco sees Michael, he
automatically takes his leave.  Michael sits in the chair
opposite Fredo.

				FREDO
			(after a pause)
		I don't have a lot to say, Michael.

				MICHAEL
		We have time.

				FREDO
		I was kept pretty much in the dark.
		I didn't know all that much.

				MICHAEL
		What about now, is there anything
		you can help me out with?

				FREDO
		I know they get Pentangeli, that's
		all I know.

Fredo gets up, walks to the glass panel that separates the
terrace from the lake.

				FREDO
		I didn't know it was a hit.  I
		swear to you I didn't know.  Johnny
		Ola contacted me in Beverly Hills --
		said he wanted to talk.  He said
		you and Roth were in on some big
		deal, and there was a place for me
		in it if I could help them out.
		They said you were being tough on
		the negotiation, and if they had a
		little bit of help, they could
		close it fast and it would be good
		for you.

				MICHAEL
		And you believed that story.

				FREDO
		He said there was something good in
		it for me...me on my own.

				MICHAEL
		I've always taken care of you.

				FREDO
		Taken care of me.  Mike, you're my
		kid brother, and you take care of
		my.  Did you ever think of that.
		Ever once?  Send Fredo off to do
		this, send Fredo to take care of
		that... take care of some little
		unimportant night club here, and
		there; pick somebody up at the
		airport.  Mike, I'm your older
		brother; I was stepped over!

				MICHAEL
		It's the way Pop wanted it.

				FREDO
		It wasn't the way I wanted it!  I
		can handle things.  I'm not dumb
		Christ, not like everyone says.
		I'm smart; and I want respect.

				MICHAEL
		There's nothing more you can tell
		me about this investigation?

				FREDO
		The lawyer; Questadt, he belongs to
		Roth.

				MICHAEL
		You're nothing to me now, Fredo;
		not a brother, not a friend, I
		don't want to know you, or what
		happens to you.  I don't want to
		see you at the hotels, or near my
		home.  When you visit our Mother, I
		want to know a day in advance, so I
		won't be there.  Do you understand?

Michael turns, and starts to leave.  A frightened voice
behind him:

				FREDO
		Mikey?

Michael doesn't stop, doesn't turn back.  He continues off
through the veranda, and out the summer doors.

Neri stops by him.

				MICHAEL
		I don't want anything to happen to
		him while my Mother's alive.

Michael leaves.

EXT. ARMY POST - DAY

Five cars brimming with Army guards and Agents are waiting
to move Pentangeli.  There is one empty car.

INT. GUARDED HOUSE - DAY

The two FBI Agents are helping Pentangeli get dressed.  He's
in brightly colored striped shorts and bare-chested.  The
Agents help him with the shirt and tie.  One holds out the
trousers but Pentangeli ignores it and looks at himself in
the mirror.

				FBI MAN #1
		Ready, Frankie.

				PENTANGELI
		Let's go.

The Agents open the door, and precede him, surveying the
area.  They check the cars waiting, each with two Agents.
They check the gate and note the military sentries.  Then
they stand aside, and let Pentangeli come out.  They get
close to his side, and it is obvious they will protect his
life with their own.

EXT. ARMY POST - DAY

The Agents put him in the front seat of the empty car, and
get in with him, one at each side.  Another Agent drives.
Now, the first cars start out; the Sentries opening the
gates, and letting the caravan pass.

An Army supply truck comes very close to them, and the
Agents next to Pentangeli become very tense.  Pentangeli
grins.  Then the truck passes on, and they relax.

INT. SENATE CAUCUS ROOM - DAY

The room is crowded with TV journalists, cameras, etc.  We
pick Pentangeli up, closely guarded, being led to witness
chair.

Pentangeli is seated, and made to take his oath.  FBI Agents
are all around him.

MED. VIEW

Anyone given entrance to the caucus room is being frisked.
The five Senators take their places.

VIEW ON HAGEN

waiting at his long table, very nervous.  He seems startled
by the appearance of Pentangeli.

VIEW ON PENTANGELI

catching Hagen's eye.  It's as though he is pleading for
some kind of understanding of the fact that he has become a
traitor.

VIEW ON HAGEN

cold; then he turns away.

VIEW ON THE ENTRANCE

The bustle is settling down; then Michael Corleone enters,
and with him is someone very peculiar and out of keeping for
this setting.  A burly-chested imposing man of middle age.
Very powerful-looking with frightening magnetic eyes.  His
dress is odd: boots, rough tie, and shirt.  He could be the
tenor out of a Sicilian opera.  He is clearly a country Don,
direct from Sicily, and he dominates the room.

VIEW ON PENTANGELI

At first his view is blocked.  Then he sees Michael and is a
bit shamefaced, but still defiant.

PENTANGELI'S POV

Michael returns his glances without emotion.  Then the VIEW
ALTERS, revealing the Sicilian.

VIEW ON PENTANGELI

He is terror stricken; obviously he recognizes the man.

VIEW ON HAGEN'S TABLE

Michael and the Sicilian sit by Hagen, where they can stare
directly at Pentangeli; he is frozen with fear.

VIEW ON THE SENATOR

Notices the tension in the room.  The Chairman commences:

				SENATOR KANE
		We have here a witness who will
		testify further on Michael
		Corleone's rule of the criminal
		empire that controls gambling in
		this country and perhaps in other
		countries.  This witness had no
		buffer between himself and Michael
		Corleone.  He can corroborate our
		charges on enough counts for this
		committee to consider a charge of
		perjury against Michael Corleone.
			(then he turns to Pentangeli)
		Your name please, for the record.

				PENTANGELI
		Frank Pentangeli.

				SENATOR KANE
		Were you a member of the Corleone
		Family?  Were you under the
		Caporegime Peter Clemenza, under
		Vito Corleone, known as the
		Godfather?

There is a long silence.

VIEW ON PENTANGELI

He seems unable to speak.

VIEW ON THE SICILIAN

gazing at him.

VIEW ON PENTANGELI

				PENTANGELI
		I never knew no Godfather.  I got
		my own family.

Senator Kane is stunned.  The two FBI men are alert, their
eyes searching the room for what has intimidated their
witness at the last moment.

				SENATOR KANE
		Mr. Pentangeli, you are
		contradicting your confessions to
		our investigators; I ask you again,
		were you a member of a crime
		organization headed by Michael
		Corleone?

				PENTANGELI
		No.  I never heard of it.  I never
		heard of nothing like that.  I was
		in the olive oil business with his
		father a long time ago.  That's all.

				SENATOR KANE
		We have your confession that you
		murdered on the orders of Michael
		Corleone.  Do you deny that
		confession and do you know what
		denying that confession will mean
		to you?

The die is cast and like a good soldier, Pentangeli will go
all the way now.  So he is brazen in his defiance of the
Senator.

				PENTANGELI
		The FBI guys promised me a deal.
		So I made up a lot of stuff about
		Michael Corleone.  Because then,
		that's what they wanted.  But it
		was all lies.  Everything.  They
		said Michael Corleone did this,
		Michael Corleone did that.  So I
		said, "Yeah, sure."

He makes a big grin to show how he has made fools of
everybody.

VIEW ON THE FBI AGENTS

glancing around the room; their eyes have settled on the
Sicilian.  One of them scribbles a note on a piece of paper,
and passes it to the Committee lawyer.  Then in turn it goes
to Senator Kane.

				SENATOR KANE
		Mr. Hagen, would you kindly identify
		to this committee that gentleman
		sitting on your right hand?

				HAGEN
			(coolly)
		Yes, sir.  His name is Vincenzo
		Pentangeli.

				SENATOR KANE
		Is he related to the witness?

				HAGEN
		He is, I believe, a brother.

VIEW ON MICHAEL AND VINCENZO PENTANGELI

They wait with no expression.

				SENATOR KANE
			(to Vincenzo Pentangeli)
		Sir, I would like you to take the
		stand.

Vincenzo stares at him, uncomprehending.  There may just be
a shadow of contempt.  He doesn't answer.

				HAGEN
		Sir, the gentleman does not
		understand English.  He would not
		in any case, take the stand.  He
		came, at his own expense, to aid
		his brother in his trouble.  He is
		not under any jurisdiction of our
		government and his reputation in
		his own country is impeccable.

				SENATOR KANE
			(furious)
		The witness is excused; take him out.

The guards and FBI Agents quickly remove Pentangeli, as
everybody else in the room is required to sit still.

				HAGEN
		Senator Kane.

				SENATOR KANE
		This meeting is adjourned.

				HAGEN
			(rising and shouting)
		This committee owes an apology!

				SENATOR KANE
		The committee is adjourned until
		further notice.

For the first time, in the midst of the confusion, Hagen
smiles.  A bitter, contemptuous smile.

VIEW ON MICHAEL

The modest champion.  He rises and they take their leave.

VIEW ON THE TWO FBI AGENTS

They watch the Corleone party as they exit.

INT. WASHINGTON HOTEL CORRIDOR - DAY

The Corleone nurse is waiting, playing with the little girl
MARY.  A distance away, the boy, Anthony, is standing by
himself.

INT. MICHAEL'S SUITE - WASHINGTON HOTEL - DAY

The door to Michael's suite opens; Rocco leans in.

				ROCCO
		It's Kay.

Michael is sitting in an easy chair; he seems to have
difficulty with his eyes.

				MICHAEL
		On the phone?

				ROCCO
		No, she's here.

Michael rises, surprised.  Rocco steps back, and Kay enters.

				MICHAEL
		I had no idea...

				KAY
		I wanted to see you before you went
		back to Nevada.  Also, the
		children - Michael, they're here.

				MICHAEL
		Where?

				KAY
		In a minute.  They're outside with
		Esther.  I'm very happy for you...
		I suppose I knew that you're simply
		too smart for anyone ever to beat
		you.

				MICHAEL
		Why don't you sit down?

				KAY
		I'm not going to stay long; I can't.

				MICHAEL
		There are a lot of things I want to
		talk to you about.  Things I've
		been thinking about -- changes I
		want to make.

				KAY
		I think it's too late for changes,
		Michael.  I promised myself I
		wouldn't talk about it and I've
		gone and spoiled it.

				MICHAEL
		Why too late?

				KAY
		Tell me, Michael.  What really
		happened with Pentangeli?

				MICHAEL
		His brother came to help him.

				KAY
		I didn't even know he had a brother.
		And where is he now?

				MICHAEL
		On a plane back to Sicily.

				KAY
		And that's all he had to do.  Just
		show his face.

				MICHAEL
		That's all.  You see, in Sicily, in
		the old days... there was only one
		legitimate reason to kill a blood
		relative... only one.  IF he was a
		traitor.

				KAY
		You would have killed his brother?

				MICHAEL
		Kay, you've got it wrong.  That
		kind of thing's all over, I promised
		you.  This was between the two
		brothers.  Years ago Frankie had a
		young girlfriend; he called her his
		co-wife.  That was his joke, but he
		meant it.  He wouldn't divorce his
		wife... because she was a great
		cook.  He said he girlfriend made a
		spaghetti sauce once and it was so
		terrible he knew he could never
		marry her.  He set her up in a
		house in Jersey.  She had to be
		faithful... and she had to have kids.
		And she did, two, a boy and a girl.
		He had her checked out and watched
		so she couldn't cheat... but the
		girl couldn't stand that kind of
		life.  She begged him to let her go.
		He did.  He gave her money and made
		her give up the kids.  Then Frankie
		took them to Italy, and had them
		brought up by his brother Vincenzo.
		Where he knew they'd by safe.

Kay begins to realize.

				MICHAEL
		When he saw his brother in the
		hearing room, he knew what was at
		stake.
			(pause)
		I don't think Vincenzo would have
		done it.  He loves the kids, too.
		Omerta, Kay.  Honor, silence.  It
		had nothing to do with me.  It was
		between those brothers.

				KAY
		I'll bring the children up now;
		they want to say goodbye.

				MICHAEL
		Kay, I told you...

				KAY
		Goodbye, Michael.

				MICHAEL
		I won't let you leave!  Christ, do
		you think I'm going to let you leave.

				KAY
			(meekly)
		Michael.

				MICHAEL
		No, I don't want to hear anything.
		There are things between men and
		women that will not change; things
		that have been the same for
		thousands of years.  You are my
		wife, and they are my children...
		and I love you and I will not let
		you leave, because you are MINE!

				KAY
		Oh, I do feel things for you,
		Michael; but now, I think it's pity.
		For the first time since I've known
		you, you seem so helpless.  You
		held me a prisoner once; will you
		try again?

				MICHAEL
		If that's what it takes; then yes,
		I will.

				KAY
		At this moment, I feel no love for
		you at all.  I never thought that
		could happen, but it has.

				MICHAEL
		We'll go back tonight.  Bring the
		children.

				KAY
		You haven't heard me.

He moves to her; he does love her, and is tender with her.

				MICHAEL
		How can I let you leave; how can I
		let you take my children away?
		Don't you know me?  You understand,
		it's an impossibility.  I would
		never let it happen; no, never, not
		if it took all my strength, all my
		cunning.  But in time, soon, you'll
		feel differently.  You see, you'll
		be happy that I stopped you.  I
		know you.  You'll forget about
		this; you'll forget about the baby
		we lost... and we'll go on, you and
		I.

				KAY
		The baby I lost...

				MICHAEL
		I know what it meant... and I'm
		prepared to make it up to you.  I
		will make changes; I can.
			(he clenches his fist tightly)
		I CAN change; that I have learned,
		that I have the strength to change...
		And we have another child, a boy...
		and you'll forget the miscarriage.

				KAY
		It wasn't a miscarriage.  And you
		with your cunning, couldn't you
		figure it out!  It was an abortion;
		an abortion, like our marriage is
		an abortion, something unholy and
		evil.  I don't want your son; I
		wouldn't bring another of your sons
		into this world.  An abortion,
		Michael... it was a son, and I had
		it killed, but this must all end!

VIEW ON MICHAEL

He had no hint, not in his wildest imagination could he have
guessed that she would do such a thing.

				KAY
		And I know that now it's over; I
		knew it then, there would be no way
		you could ever forgive me, not with
		this Sicilian thing that goes back
		two thousand years.

He is silent, though raging -- then, with all his passion,
and his strength, he raises his arms, and strikes her across
her neck, literally knocking her down to the floor, and
hurting her badly.

				MICHAEL
			(coldly)
		You won't take my children.

							FADE OUT.

FADE IN:

EXT. THE CORLEONE ESTATE AT TAHOE - FULL VIEW - DAY

A collection of dark cars and black limousines are gathered
to one side.  A few drivers wait quietly.

And then, to the other extreme of the estate, is a small
grouping of about twenty to thirty people, gathered near
Michael's house.

MED. CLOSE SHOT

Connie Corleone, dressed simply and now showing her age
without the carefully applied makeup which we have been used
to, kneels down before the shrine of Santa Theresa, and puts
down a bouquet of flowers, along with others that have been
placed there.  We see that some have the simple silk ribbon
with the word "Mama" hand-lettered upon it.

Her two children stand close behind her; they had been
raised by their Grandmother.

Connie steps back, and moves through the small group of
friends and relatives, into Michael's house.

INT. MICHAEL'S HOUSE - CONNIE'S VIEW - DAY

Fredo, kneeling by the coffin of his mother in a portion of
the house that has been set aside for the wake.  Fredo
concludes his prayer, wipes away the tears in his eyes and
steps away from the coffin.

He stops when he notices Neri, a little distance away,
looking at him.

VIEW ON NERI

After a moment, he nods respectfully to Fredo, and steps
forward, moving to the old woman's coffin.  Fredo moves to
Hagen, who is there with his wife and children.

				FREDO
		Tom.  Where's Mike?

				HAGEN
			(difficult to tell him)
		He's waiting for you to leave.

				FREDO
		Can I talk to him?

				HAGEN
		No chance.  I'm sorry, Freddie.

				CONNIE
			(who has heard this)
		Can I see him?

				HAGEN
		He's in the boathouse.

INT. THE BOATHOUSE - MED. VIEW - DAY

Michael sits quietly in the darkened room in one of the big
sofas, dressed immaculately in suit and tie.  His two
children, also dressed for the wake sit opposite him in the
other oversized sofa, their shoes not touching the floor.
We regard this tableau for a long moment.

				CONNIE (O.S.)
			(quietly)
		Michael?  It's Connie.

She comes in, and sits down by his knees.

				CONNIE
		I want to stay close to home now,
		is that alright?

Michael nods.

				CONNIE
		Is Kay coming?

				MICHAEL
		No.

				CONNIE
		Michael, Fredo's in the house with
		Mama.  He asked for you, and Tom
		said he couldn't see you.

				MICHAEL
		Tom is right.

				CONNIE
		Kids, why don't you go outside for
		a while?

The children don't move; Connie realizes they will only
listen to Michael.

				CONNIE
		I want to talk to you, Michael.

				MICHAEL
		The children can stay.

				CONNIE
		I hated you for so long, Michael;
		for so many years.  I think I did
		things to myself, to hurt myself,
		so that you would know -- and you
		would be hurt too.  But I understand
		you now; I think I do.  You were
		being strong for all of us, like
		Papa was.  And I forgive you, and
		want to be close to you now.  Can't
		you forgive Fredo; he's so sweet,
		and helpless without you.

Slowly, Michael puts his hand on her hair, and touches her
gently.

				CONNIE
		You need me, Michael.  I want to be
		with you now.

INT. MICHAEL'S HOUSE - DAY

Friends, relatives; Francesca and her new husband, Gardner
and their baby; Sandra Corleone; Teresa, her children; all
the familiar faces of the family are present, quietly paying
their respects to Mama.

Some of the men can be seen in the kitchen, drinking wine,
and talking in low voices.

Fredo is there, broken-hearted over the loss of his Mother;
like some lost child with no friends.

MED. VIEW

Michael enters the room, followed by Connie, who tends
little Mary and Anthony.

He approaches his brother, and then embraces.  Fredo breaks
into tears.

				FREDO
		Christ, Mike.  Jesus Christ, Mike.

VIEW ON MICHAEL

embracing his brother, he glances up.

VIEW ON NERI

quiet, and deadly.

EXT. THE TAHOE ESTATE - MED. VIEW - DAY

Tom Hagen is talking in the distance to his wife, and one of
his older sons; he kisses, and moves toward the boathouse.
After crossing the lawn, he stops.

VIEW ON SANDRA CORLEONE

waiting there; obviously wanting to talk to him.  He
continues, and she walks with him.

MOVING VIEW ON THE TWO

as they cross toward the boathouse.

				SANDRA
		You're going to talk to him now.

				HAGEN
		Yes.

				SANDRA
		Will you tell him?

				HAGEN
		I don't know.

She stops him.

				SANDRA
		Tom, think of yourself for once.
		Don't let this opportunity slip
		through your fingers; don't do it.
		We're all trapped here, don't you
		see?

He continues past her, without answering her.  Continues up
to the boathouse.  He stops before he enters.

HAGEN'S VIEW

Fredo is sitting by the edge of the harbor with Michael's
son Anthony; he is helping him with some fishing rig.

INT. THE BOATHOUSE - VIEW ON MICHAEL - DAY

looking through the window at his son and brother.  Neri
sits in the room, dressed informally.

				MICHAEL
			(without looking back)
		Sit down, Tom.  Have you heard
		about our friend and partner, Mr.
		Hyman Roth?

				HAGEN
		I know he's in Israel.

				NERI
			(hands Hagen the paper)
		The High Court of Israel turned
		down his request to live as a
		'returned Jew.' His passport's been
		invalidated except for return to
		the U.S.  He landed in Buenos Aires
		yesterday, offered a gift of one
		million dollars if they would give
		him citizenship.  They turned him
		down.

				HAGEN
			(reading)
		He's going to try Panama...

				MICHAEL
		They won't take him; not for a
		million, not for ten million.

				HAGEN
		His medical condition is reported
		as... "terminal."

				MICHAEL
		He's been dying of the same heart
		attack for twenty years.

				HAGEN
		That plane goes to Miami...

				MICHAEL
		I want it met.

				HAGEN
			(understanding)
		Mike, it's impossible.  He'll be
		met by the Internal Revenue; the
		Customs Service, and half the FBI.

				MICHAEL
		I don't like it when you use the
		word impossible; nothing is
		impossible...

				HAGEN
		Mike, it would be like trying to
		kill the President; there's no way
		we can get to him.

				MICHAEL
		I'm surprised at you, Tom.  If
		there's anything certain; certain
		in life; if history has taught us
		anything, it's that you can kill...
			(he stops, then coldly)
		ANYBODY.  But perhaps your
		relucatance is because you've come
		to tell me that you're moving your
		family to Vegas, that you've been
		offered the Vice-Presidency of the
		Houstan Hotels there.  Or weren't
		you going to tell me at all?

				HAGEN
		Are you so hungry for traitors; do
		you want to find them everywhere?

				MICHAEL
		They are everywhere!

				HAGEN
		I turned Houstan down; I didn't see
		why I should tell you about an
		offer I turned down.
			(Michael begins to
			confuse him)
		Are you sure, Mikey?  Are you sure
		of what we're doing; what we'll
		gain; what does the family gain?
		Forget that, Mike; I already know
		the answer.

				MICHAEL
		I know you do, Tom.  Then I can
		count on you to help me do the
		things I have to do.  If not, call
		Houstan, and become a Vice-President.
		Take your family and your mistress
		and move them to Las Vegas.

				HAGEN
		Why do you hurt me, Michael?  I've
		always been loyal to you.

				MICHAEL
		Good.  Then you're staying.

				HAGEN
		I'm staying.
			(he pauses...then,
			without being asked)
		Don't ever enjoy the cruel part of
		all this; Sonny never listened to
		me about that.
			(then he sits down,
			and opens his briefcase)
		Now, explain everything to me.

EXT. THE HARBOR - DAY

Fredo sits with Anthony, with a silly-looking fishing hat on
his head, covered with lure and flies.

				FREDO
		Anthony, ole buddy, your Uncle
		Fredo's gonna teach you how to
		catch the big fish.  You know, when
		I was a kid, I did this amazing
		thing.  I went out on a fishing
		trip; me and my brothers and my
		Pop, and no one could catch a fish
		except me.  And this was my secret:
			(confidentially)
		Every time I would put the line
		down I would say a "Hail Mary" and
		every time I said a "Hail Mary" I
		would catch a fish.  Now, when it's
		sunset, we're gonna go out on the
		lake, and we're gonna try it.

INT. GUARDED HOUSE - DAY

The guards step aside as Tom Hagen enters the foyer of the
house.  He shows a court order to them and they lead him up
the stairs where he knocks on the door.

INT. GUARDED HOUSE - DAY

There is a KNOCK at the door.  The two guards show Hagen in
and Hagen presents the court order to one of the FBI men.

				HAGEN
		I think I prefer to see my client
		privately.

				PENTANGELI
		The room has a bug in it.

				HAGEN
			(to the FBI men)
		I'd like to go outside with him, in
		the open air.

				FBI MAN #1
		This room is not bugged.

				HAGEN
		You have guards outside and the
		electric fence.  There's no security
		reason for not letting us talk in
		the yard.

				FBI MAN #1
		Okay.

They pass out of the room.

EXT. THE ARMY POST - DAY

Hagen and Pentangeli outside, by the electric fence.  They
cannot be overheard.  Pentangeli takes out some cigars and
offers Hagen one.  Hagen takes it and Pentangeli lights both
their cigars.  They puff on them contentedly.  They are
comfortable together, almost.

				HAGEN
		Everything is going to be okay,
		Frankie, don't worry.

				PENTANGELI
		Did my brother go back?

				HAGEN
		Yeah, but don't worry.

				PENTANGELI
		He's ten times tougher than me, my
		brother.  He's old-fashioned.

				HAGEN
		Yeah.  He wouldn't even go out to
		dinner.  Just wanted to go home.

				PENTANGELI
		That's my brother.  Nothing could
		get him away from that two mule
		town.  He coulda been big over
		here -- he could of had his own
		Family.

				HAGEN
		You're right.

				PENTANGELI
		Tom, what do I do now?

The light is beginning to turn reddish as the sun falls.

				HAGEN
		Frankie, you were always interested
		in politics, in history.  I remember
		you talking about Hitler back in
		'43.  We were young then.

				PENTANGELI
		Yeah, I still read a lot.  They
		bring me stuff.

				HAGEN
		You were around the old timers who
		dreamed up how the Families should
		be organized, how they based it on
		the old Roman Legions, and called
		them 'Regimes'... with the 'Capos'
		and 'Soldiers,' and it worked.

				PENTANGELI
		Yeah, it worked.  Those were great
		old days.  We was like the Roman
		Empire.  The Corleone family was
		like the Roman Empire.

				HAGEN
			(sadly)
		Yeah, it was once.

They both puff on their cigars.  Pentangeli lets himself be
carried away by thoughts of old days of glory; Hagen thinks
of other days too.

				HAGEN
			(very gently)
		The Roman Empire... when a plot
		against the Emperor failed, the
		plotters were always given a chance
		to let their families keep their
		fortunes.

				PENTANGELI
		Yeah, but only the rich guys.  The
		little guys got knocked off.  If
		they got arrested and executed, all
		their estate went to the Emperor.
		If they just went home and killed
		themselves, up front, nothing
		happened.

				HAGEN
		Yeah, that was a good break.  A
		nice deal.

Pentangeli looks at Hagen; he understands.

				PENTANGELI
		They went home and sat in a hot
		bath and opened their veins, and
		bled to death.  Sometimes they gave
		a little party before they did it.

Hagen throws away his cigar.  Pentangeli puffs on his.

				HAGEN
		Don't worry about anything, Frankie
		Five-Angels.

				PENTANGELI
		Thanks, Tom.  Thanks.

They shake hands.  The FBI Agents come out to let Hagen out
the gate.  Pentangeli is led back to the house.

				FBI MAN #1
		Your lawyer tell you he can get
		that 600 years reduced to 500?

Pentangeli puffs on his cigar and reflects.

				PENTANGELI
		You boys sure you can't get me a
		broad for tonight?  Give me a
		little party?

				FBI MAN #2
		We got some nice books.

Pentangeli puffs on his cigar and gives the Agent a smile an
old man gives a child.  He starts upstairs.

				PENTANGELI
		I guess I'll just take a hot bath.

EXT. THE ARMY POST - DAY

Hagen walks away; glances back.  Then gets into his waiting
car and drives off.

INT. THE BOATHOUSE - FULL VIEW - SUNSET

Michael sits alone in the empty boathouse; in the shadows.

INT. BOAT DOCK - SUNSET

Neri stands by the dock area under the boathouse.  He pushes
the button which lowers a boat by winch and tackle.  He
wears a fishing cap.

He steps into the boat, and pulls the small outboard, which
glides the boat out into the harbor.

MED. VIEW

The boat pulls up alongside Fredo and Anthony.

				FREDO
		Here we go; and remember the secret.

He lifts Anthony into the boat.

				CONNIE (O.S.)
		Anthony.

THEIR VIEW

Connie, in houseclothes, is calling Anthony.

				FREDO
		He's here; we're goin' fishing.

				CONNIE
		He can't go; Michael wants to take
		him into Reno.

				FREDO
		Ah.  Okay, kid, you got to go to
		Reno with your Pop.

He lifts the boy out of the boat, and puts him on the shore.

				FREDO
		I'll catch one for you, with the
		secret.

				CONNIE
		Hurry, Anthony.

Neri stands the motor; and the boat with the two fisherman
glides off.

VIEW ON MICHAEL

watching, from the dark window of the boathouse.

INT. HIGH SECURITY HOUSE IN ARMY POST - NIGHT

The FBI man knocks on the bathroom door in the house where
they have kept Pentangeli.

				FBI MAN #1
		Frankie, open up.  You okay?

No answer; he hammers on the door.  Using his elbow, and
then a kick he breaks into the bathroom.

HIS VIEW

Pentangeli lying in a tub of water.  His stomach shows above
it.  His wrists are cut and covered with blood.  The bath
water has a purplish tone.

							DISSOLVE TO:

EXT. LAKE TAHOE - MED. VIEW - SUNSET

Fredo and Neri are fishing, each with lines out.  The VIEW
MOVES CLOSER, and we can hear Fredo as he holds the pole.

				FREDO
		... the Lord is with thee.  Blessed
		art thou amongst women, and blessed
		is the fruit of thy womb, Jesus.

LONG SHOT

The boat on the shimmery lake.

				FREDO
		... Holy Mary, Mother of God, pray
		for us...

We hear a quiet, echoing GUNSHOT; and then silence.

							DISSOLVE TO:

INT. MIAMI AIRPORT - NIGHT

An exhausted Hyman Roth, ill-shaven, and in shirt-sleeves in
taken into custody by a swarm of Customs, and FBI men.  They
allow him to be photographed by press people; and television
cameramen.

				FBI MAN
		Mr. Roth, we have to take you into
		custody.

				ROTH
		Yes, I know.

Some flashbulbs go off.

				REPORTER
		Can you give us your reaction to
		the High Court of Israel's ruling.

				ROTH
		I am a retired investor on a
		pension, and I wished to live there
		as a Jew  in the twilight of my
		life...

				LAWYER
		Mr. Roth is not a well man; he's
		tired of running.

				ROTH
		I'm an old man; at my age, it's too
		late to start worrying.

				REPORTER
		Is it true you are worth over three
		hundred million dollars, Mr. Roth?

				ROTH
		I'm a retired investor, living on a
		pension... I came home to vote in
		the Presidential election, because
		they wouldn't give me an absentee
		ballot...

The newsmen and photographers all laugh, as the FBI men move
him away.

CLOSE VIEW

One of the newspapermen laughing we recognize to be Rocco
Lampone.

He moves closer to Roth, and shoves his revolver right
against his head, and in a second, on camera, assassinates
Roth.  People scream, as Rocco attempts to run down the
airport corridor, limping as he does.

FBI men easily pick him off.

							FADE OUT.

EXT. THE DRIVEWAY BY MICHAEL'S HOUSE - DAY

A taxi cab waits by the house; its driver sleeping with a
newspaper over his face.

INT. MICHAEL'S HOUSE - DAY

The cleaning woman, Esther, who had been with Kay for years,
sits by the dining room table, weeping profusely.  Behind
her, in the recreation room, we can see the tableau of Kay
sitting on the couch, her little daughter Mary, between her
knees, talking quietly about things we cannot hear.  Her son
Anthony sits by himself, in another chair by the side of the
room.

MED. VIEW

Connie comes into the house quickly, and moves toward them.

				CONNIE
		Kay, you have to go.

This prompts Esther to weep all the more.  Kay hugs her
daughter, and kisses her many times.

				CONNIE
		You have to hurry; he's coming.

Kay puts her coat on; then stands, and reaches out for her
son.

				KAY
		Anthony, kiss Mama goodbye.

He doesn't move.

				CONNIE
			(angrily)
		Anthony, you kiss your Mother
		goodbye!

He rises, and walks to her.  Hugs her lifelessly.

MED. CLOSE VIEW

on Kay, kissing her boy.

				KAY
		Anthony, say goodbye; your Mama
		loves you.

				ANTHONY
		Goodbye.

She restrains any tears; she has become too strong for tears.
Kay starts to go; picks up Mary, kisses her, and starts to go.

NEW VIEW

She steps out the kitchen door; then she cannot help herself.
Crouches down, outside, and calls to her son.

				KAY
		Anthony, kiss me once.

Then she looks up, and slowly rises.

HER VIEW

Michael has stepped into the dining room.  He seems older
somehow; as though some sickness has taken more years away
from him.

VIEW ON KAY

looks at him; instinctively, she takes a step back.

VIEW ON MICHAEL

slowly steps toward her.

VIEW ON KAY

Another step back; the door is still open.

VIEW ON MICHAEL

He moves closer to the door; stops, looks at her.  And then
closes it obscuring any view of her.

							DISSOLVE TO:

EXT. TAHOE ESTATE - DAY

It is late fall -- most of the leaves have fallen on the
grounds and there is quite a wind.

MED. VIEW

The water is whipped up by the wind, and the waves are high
as they break against the pavilion.  We HEAR the MUSIC of
time passing, of Michael, of the Godfather over these images.

VIEW ON THE SWIMMING POOLS

They have not been used in several months; they are drained
and the bottoms are mossy and dark.

VIEW ON THE MAIN GATE

Leaves blowing past it; we don't see the button men; only a
hint of someone in the gatehouse.

VIEW ON THE HOUSES

Some of the houses have had the summer awnings taken down,
and put away.  Some of the windows have been boarded up.

VIEW ON THE KENNELS

There are still the guard dogs; some sleeping, some moving
impatiently.

As the MUSIC concludes its statement.

MED. VIEW

The peninsula of the private Corleone Harbor.  We see the
figures of two people, seated at a table.

MED. VIEW

Michael sits at a table having a sparse lunch.  He is
attended by his sister Connie, who seems to be the closest
person now living on the estate with him.  We see from the
way she pampers him with his lunch, that she has fallen into
the role of a surrogate Mother-Wife.  He seems older than
his years, as though his illness, diabetes, has taken its
toll.

				CONNIE
		Don't worry; I'm sure he got here
		on time.  The roads from the
		airport are so windy, it takes
		forever; I've driven them myself.

She picks up some of the serving plates that he has left
untouched.

				CONNIE
		I'll bring him out to you as soon
		as he comes.

She moves back to the main house.

MED. CLOSE VIEW ON MICHAEL

He turns and looks at the rough water of the lake for a
moment.  He slowly takes a sip of wine.

EXT. A PLACE IN THE GARDEN - DAY

There are a few chairs.

MED. VIEW ON ANTHONY CORLEONE

He is eighteen years old.

				ANTHONY
		Hello, Dad.

VIEW ON MICHAEL

squinting up at his son.

				MICHAEL
		Anthony.

He rises, and reaches up to his son, who is now taller than
he; he embraces him.

				MICHAEL
		You've grown so tall... so tall in
		the last year.  You're much taller
		than me.

				ANTHONY
		I was taller than you when I was
		fourteen.

				MICHAEL
		Sit down.  Your Aunt Connie and I
		waited for you to have some lunch,
		but now it's all dried out.

				ANTHONY
		I'm not hungry.

				MICHAEL
		Well, that's alright... alright.
		Good.  You'll graduate in another
		year, isn't that right?  You know...
		I never finished college.  I was a
		good student, but I never finished.
		Of course, there was a war then.

Connie approaches them.

				CONNIE
		Don't let me interrupt anything,
		this will just take a second.  Here.
			(she takes out a
			small needle, and
			begins to prepare it)
		Your father has to have his insulin
		shot.  Why don't you go to your
		room and put your things away,
		Anthony.

She begins to give Michael the shot.

				MICHAEL
		Hurry back; we'll talk.  We'll talk.

Anthony goes on his way to the house with his things.
Connie gives Michael the shot.

				CONNIE
		Whenever I see that lake so cold, I
		think of poor Fredo, drowned.  Lake
		Tahoe is very cold.  They say if a
		person drowns in it, that the body
		will remain mid-suspended --
		perfectly preserved.  Some say it
		will remain forever.

She finishes the shot, puts her things away.

				CONNIE
		Your boy will be right back.

She leaves.

VIEW ON MICHAEL

Alone in the garden.

OUR VIEW begins to MOVE CLOSER to him.  We begin to HEAR
MUSIC of the forties; happy music, swing music, as we move
CLOSER to Michael.

							DISSOLVE TO:

INT. OLD CORLEONE HOUSE - MED. VIEW - NIGHT

SONNY CORLEONE, his arm wrapped around a smiling red-faced
Carlo Rizzi, pulls him into the Corleone dining room.

				SONNY
		Hey, who knows my buddy Carlo Rizzi.
		Here... my brother Fredo, here's my
		Mom.  Mom, whatcha got cooking?
		And Carlo, this is my kid sister
		Connie.  Here, pull up a chair,
		Carol is sitting next to Connie.
		Oh, the droopy kid over there is
		Mike.  The college boy.

An older, lanky man enters the room, his arms laden with
presents.  This is TESSIO.

				TESSIO
		Buon Natale, everybody.  Buon
		Natale...
			(he smiles at Tom Hagen)
		Hi, Tom, how's every little thing?

				HAGEN
			(helping him with the presents)
		Wonderful, Sal.

Now the study door opens, and DON CORLEONE enters.

				DON CORLEONE
		Is dinner ready?

				MOM
		Two minutes.

The Don happily regards his family; his sons and daughters
and even some Grandchildren.  He raises a glass.

				DON CORLEONE
		A good life, a long life to all my
		children, and friends.  To my
		grandchildren, and those that will
		be.  To our family.

They all drink.

They refill glasses; then Tessio proposes a toast.

				TESSIO
		To our Godfather.

They all drink.

INT. THE DINING ROOM - MED. VIEW - NIGHT

The family is happily at Christmas dinner.  Don Corleone
seated at the head of the table.

				SONNY
		What'd you think of those Japs, eh?
		The nerve of those Japs, coming
		right here in our own backyard
		dropping bombs!

				HAGEN
		Well, we could have expected it
		after the embargo.

				SONNY
		Hey!  Expect it or not, those Japs
		don't have a right to drop bombs in
		our backyard.  Whose side you on?

				MAMA
		Please, do we have to talk about
		the war at the table?  On Christmas,
		much less.

VIEW ON MICHAEL

He has been listening to this discussion.

				MICHAEL
		Pop, I've decided I'm going to
		enlist.

A quiet hush descends over the table, as though everyone
knows the effect this will have on the old man.  Sonny tries
to make light of it.

				SONNY
		Kid, stay in college.  The girls
		are cuter, if you know what I mean.

				HAGEN
		Pop had to pull a lot of strings to
		get you your deferment.

				MICHAEL
		I never asked for it; I don't want
		it.

VIEW ON DON CORLEONE

Disturbed; but wise and prudent.

				DON CORLEONE
		My son wants to talk about this,
		and so we'll talk, but not at the
		dinner table.

He rises, and starts across the room toward his study.  Then
he looks back.

				DON CORLEONE
		Michael.

He disappears into his study.  Michael rises, glances around.
People are generally tense over the situation.  Michael
follows his father into the study.

INT. DON CORLEONE'S OLD STUDY - NIGHT

The Don closes the door behind his son, and then moves
across the room.  He stops at the little bar there, and
pours himself a brandy.

				DON CORLEONE
		Would you like some?

				MICHAEL
		No, Dad.

				DON CORLEONE
		Now what is this talk about joining
		the army?  Eh?

				MICHAEL
		It's not talk; I'm doing it.

				DON CORLEONE
		You would risk your life for
		strangers?

				MICHAEL
		Not for strangers; for my country.

				DON CORLEONE
		Anyone not in your family, is a
		stranger.  Believe me, when trouble
		comes, your country won't take care
		of you.

				MICHAEL
		That's how it was in the old world,
		Pop, but this is not Sicily.

				DON CORLEONE
		I know.  I know, Michael.  It's
		Christmas, your brothers and sister
		are all here -- we are happy.
		Let's not spoil this.  Go your own
		way, but when you are ready, come
		to me the way a son should.  I have
		hopes for you...

CLOSE VIEW ON MICHAEL

looking at his father with a mixture of great love, and also
fear, and confusion.

				MICHAEL
		I won't be a man like you.

							DISSOLVE TO:

EXT. THE TAHOE ESTATE - HIGH FULL VIEW - DAY

The leaves are blowing.  MUSIC comes up.

Michael and his young son, Anthony, walk through the grounds
of the estate, talking about things we cannot hear.
================================================ FILE: samples/go/decent/data/godfather3.html ================================================


                       THE GODFATHER PART III


          
                             Written by

                 Mario Puzo & Francis Ford Coppola



                                         FIRST DRAFT. 3/22/79                       

                         


                         FADE IN:

                         BEFORE TITLES:
          The screen is black. First we hear the sound of a
          single trumpet playing slowly and sadly, the notes
          faintly resonant as if echoing through the narrow
          streets of some old hill village in Sicily. Now,
          confusingly, we see a slant of light move past us
          and another, and as our surroundings become more
          visible, we discover that we are moving through a
          pine forest lit by shafts of morning sunlight. Now,.
          suddenly, we come out of the trees and find ourselves
          on the Nevada shore of Lake Tahoe.

          It is a clear, cold morning in September of Nineteen
          Fifty-nine. We now see the Corleone compound at
          lakeside, half-hidden by pines and firs. We move
          toward it, past the gates and guard houses, past
          the guest houses, past the kennels for the guard
          dogs, and finally to the front driveway of the main
          house where a conservative sedan is being loaded by
          a Chauffeur and a pair. of large dark-headed men named
          Al Neri and Rocco Lampone. Rocco limps slightly.
          As suitcases are being placed in the trunk of the
          car, the front door to the main house opens and Tom
          Hagen, a trim, serious, balding man in a business
          suit, appears He crosses to the corner of the house
          and looks off.

          HAGEN'S POINT OF VIEW - THE LAKE AND LKSIDE
          Standing by the shore of the lake is a little boy,.
          Tony Adams Corleone, aged about ten. The boy, dressed
          for travel, is looking off at the lake, his back to-
          ward us.

          REVERSE ANGLE - ON TONY
           As he looks out at the lake we might sense that he
           is troubled and puzzled, although he is managing to
           keep his expression stoic. Hagen can be seen in the
          background, by the house 9 After a moment:

                         

                         

                         

                         

          .T

          2.

                         CONTINIIED S

                         HAGEN
          Tony.

                         (THEN)
          Time to go.
          There is a beat and Tony composes himself, turns and
          moves up toward the house and Tom Hagen.

          EXT. FRONT OF TAEOE HOUSE AND DRIVEWAY - DAY

          The heavy, black sedan is loaded. Connie Corleane
          and a Housekeeper, in uniform, are bringing Mary,
          about five, out to the car. She too is dressed for
          travelling. As they put her into the car --

                         CONNIE
          In you get ---

                         MARY
          Will. Daddy be at the airport?

          - ANOTHER ANGLE
          as Tom and Tony come up.

                         HAGEN
          No. Ere wanted me to tell you both
          how sorry he was.

                         TONY

                         (TO CONNIE)
          Aunt Connie ..- ?
          Connie would apparently prefer to avoid answering any
          questions.

                         CONNIE
          8e sure and give my love to your
          mother.

                         NERI
          It isn't like you won't be back
          from time to time.

                         LAMPCNE
          I' 11 bet we' 11 all. be together
          for Christmas. Wait and see.
          Tony gems into the car. He locks out the window to--
          ward the house. We begin XXI-N Ti. TZS 2 C 2=17"s

                         

                         

                         

                         

                         E

          3.

                         TEE DRIVEWAY
          Hagen is the last one into the car. As it starts down
          the driveway on its way to the airport in Reno, Connie,
          Lampone, Neri and the Housekeeper all wave. The auto-
          matic gate at the foot of the driveway opens. The
          Guard at the gate flicks a half-salute as the Limousine
          passes through.
          TEE GROUP r n FONT OF EOQSE
          As the group breaks, Connie and the Housekeeper going
          back inside, Neri and Lampone drifting off, we pan up-
          ward to a window in the second-story and zoom in.
          Michael Corleone has moved the curtain aside with one
          hand and is looking after the disappearing limousine,
          his expression unreadable-but somehow sad.

          ZNT. AIRLMPR - DAY
          We are close on Tony, looking out the window of the
          airliner, his own expression matching that of his
          father.
          EXT. LOGAN I_?i'Z'E.NATIONAL AI72ORT (STOCE) - DAY
          as an airliner of the period comes in for a landing.

          . YEW ENGLAND COUNITRYSIDE - DAY
          A limousine is moving northbound up Interstate 93.
          Maples on either side of the road are turning' red
          and yellow. We pan the limousine past and continue
          to pan to a sign marking the stateline between
          Massachusetts and New Hampshire.
          r .M. L:MC SINNE - DAY
          It is almost night. Tony is looking out the window
          as the limousine enters the little town of Hanover,
          moves down past the Da. Louth College green on Eleazer
          Wheelock Street and t7 rns right on north Main-
           E`{'.r.'. XAY DAMS' ECUSr. - VIGRI'
           We are an a post box, the name "Adams" on _ts Side.
           We cull back _c '..:cl ude a modes +shi -e, ?aa-story
          ! (ccrrT+_`?-? J

                         

                         

                         

                         

          4.

                         0 CONTI` :
          clapboard house with giant elms and maples in the front
          yard. The limousine stops outside the house.

                         4
          Under its fanlight, the front door swings open and
          Ray Adams comes out. As her children run to her she
          kneels and gathers them into her arms. We move in
          close on Tony and as we hold, the '' ?NN TITLES A.ND
          CREDITS are over and we begin to hear

          SPEAKER' S VOICE
          our Nation is and has been histor-
          ically the symbol of freedom, of
          justice and opportunity and its
          pecui.iar strength is that no matter
          what our individual background --

          EXT. NAVY AIM MA.RI Z CORPS STADIUM - DAY

          We are close on a young man, Anthony Adams (Corleone),
          in the uniform of a Midshipman of the Naval Academy
          at Annapolis. We are pulling back as the speaker --
          who is Arne Grundellius, the Secretary of State --
          continues, his accent faintly Scandinavian.

          • CRUNDELLIUS' VOICE
          -- there are no limits to the goals
          to which we can each legitimately
          aspire. And now, as an unpopular war
          is ended in East Asia and we set our
          sights on new goals, I leave you with
          the words of another Sailor ---
          We continue to pull back to discover that we are at
          the Navy and Marine Corps stadium. It is a late Spring
          day and in the early-middle Nineteen Seventies. The
          Midshipmen, their parents and guests, are gathered
          for- the graduation ceremonies.

                         GRQNDELLIIIS
          ' Our will is to keep the torch of
          freedom burning for all. To this
          solemn purpose we call on the young,
          the brave and the strong, and the
          free. Heed my call. Come to the
          sea. Come sail with me.'

                         (THEN)
          I'= sure the entire 3ricade of Mid-
          shimen recogr zes the words o John
          ^p ?aui Vo nes

                         

                         

                         

                         

          S.

                         CONTINUED:
          During this speech we find we have been. moving through
          the stadium. We find Tony's mother, Ray, among the
          guests. We also see Tony's sister, Mary, about twenty,
          Tom Hagen and Al Neri.

                         ANOTHER ANGLE
          The speech is over and as the audience applauds, the
          Superintendent of the Academy crosses an to Grundellius.

          SIIPER2NTE11DENT
          Thank you, lister Secretary -
          As the Secretary of State crosses back to his seat,
          the Superintendent addresses the microphone.
          SUPERZNT-".j. DENT
          The following First Classmen will
          step forward to receive their
          diplomas.
          The Superintendent consults a list which an Aid has
          supplied- The Superintendent reads off. the first
          t'o names, then

                         0

                         SUPERINTENDENT
          Trident Scholar Anthony Adams
          At the sound of his name Tony rises and moves toward-
          the Speaker's platform. We

                         DISSOLVE TO:

          EXT. TEE SPEAF.R'S PL?TFORM - DAY

          The final First Classman -has received the final diploma
          and is.moving off as a Midshipman runs up to center
          stage, and.-in accordance with long tradition ---

                         MIDS$IPMAN
          I propose three cheers for those
          about to leave us. Eig hip!
          (the Brigade answers
          with a roar)

                         SIN- HIP:

                         (AGAIN)

                         HIP :LIP
          The Brigade answers for the th:i.rd time aid as the Mid-
          sh pma.. runs cff, his =lace is taken by a RepreSen- by
          tat.?.ve of the graduating class.

                         0

          (CCNT TL"ZD) -

                         

                         

                         

                         

          6.

                         CON IN = :

                         FIRST CLASSMAN
          I propose three cheers for those
          we leave behind. Hip hip

                         (HURRAY)
          Hip hip!

                         (HURRAY)

                         SIP HIP
          ?EA aR=G TONY
          As the graduating Midshipmen give their last full-
          throated response and skim their hats into the air,
          Tony -- half a beat behind the others -- does like-
          wise. We pull back and pan upward to the explosion
          of white hats arching through the air.

                         CAT TO:

          EXT. NAVAL ACADEMY YA?W -- DAY

          We pick up a man named Stu Palmateer moving th-rough
          the group of strolling Midshipmen and their guests.
          Palmateer, is a poised, pleasant, tough man abcut
          forty, dressed in the unifora of a Marine Caotai.n_
          He spots Tony in a group with his guests, near the
          0 statue of Tecumseh.

          TONY AND THE OT!ERS.
          Tony is standing with Kay, Mara Hagen and Al Nexi.
          Tony turns as --

                         (COMING UP)
          Congratulations, Tony.

                         TONY
          Thank you, Sir.

                         (THEN)
          Captain Palmateer, I'd like to
          present you to my Mother, Mrs.
          Adams -- my. sister, star,{ ---

                         (THEN)
          And this is Mister 3agen, a very
          old friend, and Al Teri - T used
          to ride on his shoulders when I
          was a little 1:ov-
          Pa: ateer ,as gzeeted the ladies, shaken hands with
          he men, ad l bi.: c appropriately. Nice to .eet u,
          so cn.

           (CL?? T )

                         

                         

                         

                         

                         K

          7.

                         PALMATEER

                         (TO XAY
          I'm sorry to drag him off this
          way, Mam.

                         (TO TONY)
          Ready?

                         TONY
          (nods, then)
          See you all tonight.
          Tony kisses his mother and he and Palmateer move off.

          EXC. GEORGE, WASEIYGTCN BELTWAY - DAY
          We pick up a car coming along the Washington Beltway
          approaching the CIA turnoff near Langley, Virginia.
          We pan with it, then continue to pan over to a road
          sign which reads, "Central Intelligence Agency".

          LIT. T3E CAR - DAY
          Palmateer is driving. Tony sits next to h.m. They
          make the turnoff to the CIA, go up the access road,
          0 come to a stop at the entrance. As the Marine Guard
          comes out of the guardhouse and up to the car, Palmateer
          is taking out his identification.

          • EXT. TEE MAIN CIA 3UILDLNG - DAY

          Palmateer and Tony, on foot now, move up to the build-
          ing. They move through the front doors.

          =4T. . CIA BUILD LNG (LOBBY). - DAY
          We are in he gigantic foyer of the CIA building. The
          camera is focused on the CIA motto, etched boldly into
          the white marble wall. It reads: "YE SHALL M TOW T=

          TRUTH AND THE TRIIT$ SEA" MA=- YOU FREE" .
          We pan off the motto to pick up Palnateer and Tony
          as they move through the great, columned rcom toward
          the desk at the end. The Guard at the desk, seeing
          Paimateer' s identification, signals h 1m and Tony up
          to the Badge Office which i.s up a flight of steps on
          the ricnt.

                         

                         

                         

                         

                         I

          S.
          0 I . CIA BU=SIT MA.DGE OF-vICF,) - DAY
          as Palmateer shows his identification to a Woman
          behind the desk:

                         PPL4MATEER
          Captain Palzaateer.
          (then, indic-

                         ATES TONY)
          Mister- Adams. We have an appoint-
          ment with the D.D.P.
          The Woman gives Palmateer his badge, checking the photo
          on the badge against his face before she does so.
          Palmateer initials the form she gives him as she gives
          Tony his pass and stamps it in large letters: "rust

                         BE ACCCIMPANIED%
          I=. CIA BtJILAING (LOBBY) - DAY
          Within the foyer is another crate separating the foyer
          from the inner sanctum of the building itself. We are
          on the Gate Guards as they check the badges and passes
          of those entering.
          They nod Palmateer and Tony through the gate. We pan
          them toward the bank of varicolored elevators. As
          they go to the elevators we hear ---

          PALMATEER' S VC ICE
          This is a. preliminary interview,
          not binding on either party. The
          next step, if it's agreed to take
          that step, would be a session with
          the Assessment and Evaluation sec-
          tion. Psychological testing.
          Biographical data. Ends with a
          polygraph test.

          SIM. CIA BUILDING (SIXTH FLOOR) - DAY
          We are on the elevator doors as they open and Palmateer
          and Tony emerge. They walk down the corridor with its
          bare, off-white walls. The floors are covered; with
          green vinyl. Only the office doors add color. They
          are painted variously, red, blue and yellow. As they
          go down the long ha. Tway:

          PATZA,E
          ?.ssum ._^_c no serious orobiams cc-me
          to light, y cv will be g..ven pro-
          visional operao_ons apz=va_, effec-
          0 tive fors .x non zhs . di x. q rich

          (C JNT =ED )

                         

                         

                         

                         

                         I

          9.

                         CONTINUED :
          PALMATEER (Cont' d)
          time you would be sent to our fac-
          ility at Camp Peary for a special
          training program --
          Tony and Pal.mateer enter the office at the far- end
          of the hall.
          iT. MOREECUSE'S OFFICE - DAY
          It is a reasonably large office as befits one of the
          top echelon CIA men. Morehouse, himself, sits at a
          large desk. The seal of the Agency is an the wall
          behind him, flanked by the National flag and the
          Agency flag, an standards.
          We are close on Thomas Morehouse, about fifty-five,
          an imposing, silver-haired mar.. He locks up from a
          dossier he's been studying, then:

                         =REHOUSE
          Would you have any objections to
          being assigned to us, Mister Adams?

          ANOT: R ANCLH
          including Tony and Pal steer who sit across the desk
          from Morehouse.

                         TO:JY
          That would depend on the duty,
          Sir.

                         MOREHOUSE

                         (TO PALMATEER)
          How much have you told ii=, Stu?

          PAL,`?ATEE.'
          Just that there was an assignment
          we thought he'd be suited for.
          MOREHOQSr.
          All right.

                         (THEN)
          We've been authorized and funded
          to carry out a too priority covert
          operation in Latin America. Captain
          Palmateer w lZ be Field Coordinator.
          Your job wou.l.d be s.iasor..
          There is a beat, then:

                         

                         

                         

                         

                         -R

          10.

                         CONTMILTED :

                         MOBS' OTJSE
          Something the matter, Mister Adams?

                         TONY
          Before we go into polygraphs and
          so on, there's something you might
          not know.
          MCB EOUS.c
          Fact is, we know quite a bit.

                         (READS FROM

                         DOSSIER)
          Adams, Anthony. No middle initial.
          Born New York Nursery and Childs'
          Hospital, rebruary 3rd, 1951.
          Mother and father separated.

                         - TONY
          Divorced.

                          MOREEOUSE
          Divorced.
          Morehouse. scratches the correction into the dossier.
          with a pencil, then:

          MO.'3EHOUSE.
          You were raised in Hanover, New
          Hampshire. Your mother teaches
          school. Name legally changed in
          1963. Attended Phillips Exeter.
          Lettered in ice hockey.

                         TONY
          And baseball.

                         MOREEOUSE
          And baseball.

                         (NODS)
          Your father is Michael Co=l--one, a
          resident of Nevada. Re's principal
          stockholder of Genco International,
          a corporation that deals mainly in
          hotels and casinos, but they also
          have interests in an alive cil com-
          pany, a charter airline, laundromats,
          nursing homes, so on.

                         TONY
          Does this assignment have scmet-h4 n5
          to do wit nv =a;, er?
          tc NT.?w.•'?VZD}

                         

                         

                         

                         

                         I

          ?I.

                         0 CONTIYU:

                         MOREHOUSE
          only in so far as he has certain
          business connections that might
          be willing to help us.

                         TONY
          Y haven't seen my father since I
          was ten years old.

                         MOREEOUSE
          No contact at all?

                         TONY
          I get a Christmas present and a
          check or. my birthday --that's
          about it. As far as I know, he's
          retired. Doesn't see anyone.

                         OREEOUSE M
          Yes. That's why I was anxious to
          have this meeting today -- while
          Mister Hagen was still in Washington.

                         ANOTHER VGI2
          0 as Tony pauses, thinking. After a moment:

                         TONY
          I really don't know if I can help
          you very much, Mister :Korehouse.

                         MOREHOUSE
          Let me ask you a personal question,
          Tony.. Do you love your Country ,-
          (holds up hand)
          I'm not talking about t.'::e Nathan
          Hale kind of thug. Just simply,
          warts and all, do you dish this
          Nation well?

                         TONY
          Yes, of course.

                         MOREHCUSE
          Well, what if w told you -- and
          trying nct to be grandiose -- that
          this one operation might very ael'6
          insure peace on this :iemisnhere for
          the next: f.i t r vea s . Maybe Mora.

                         

                         

                         

                         

          ?2.

          EXT. A RESTAURANT TERRACE (WASHINGTON) RIGHT

          We are an the terrace of a pleasant restaurant over-
          looking Washington D.C. The dome of the Capitol
          building is impressively floodlit as are the Washi:g-
          ton monument and the White :louse.
          Tony and Hagen are leaning on the terrace railing,
          looking out. A Waiter has just finished pouring coffee
          for them. As the Waiter moves off, Tony throws a look
          at $agen, then:

                         TONY
          Well?

          HA,EN
          Let me make sure I'm clear on this.
          You're telling me that the Govern-
          ment would like the Corleone family
          to perfatm a service for them --
          probably involving some friends of

                         R
          ours in Latin America.

                         TONY

                          RIGHT_

                          AAGEN
          s Where? Who's concerned? What
          would it entail, speci.-46ically?

                         TOUR
          I don't know that vet.

                         EMMIT
          When will you be able to tell me
          these things?

                         TONY
          As soon as they decide I.'m not a
          Russian spy.

          A IOTSM XYGLE
          as Eaaen smiles, shakes his head. Sureaucracy..

                         TONY
          What they'd like to fi:d out. right
          now is: Wctzd you be interested?

                         3AGEI
          :' L... :.et you .c.-tct?•.

                         TCN??Y
          no ?4U save to cheCx i :tv fat er
          (c N'" =NL ?D )

                         

                         

                         

                         

                         IT

          13.

                         CONTIWED

                         HAGEN

                         TONY

                         HAGEN

                         TONY
          I think about Tahoe sometimes.
          Is the fishing still good up
          there?

                         0 HAGZ
          I suppose so. I'm in Vegas most
          of the time..

                         F

                         TONY
          Those were good days.

                         EAGEN
          Yes. They were.

                         ANOTBER ANGLE
          As Al Teri cones up, looking at his watch.

                         NERI
          You still want to catch that
          shuttle to New York?

                         HAGEN
          Yes. Thank you, Al.

                         (TO TONY)
          Where can I reach you?
          Tony scribbles an address on a card and hands it to
          Hagen. As'he looks at it:

                         HAGEZY
          Q.S. Navy Research 'acili.ty.

                         (THEN)
          Research on what?

                         TONY
          Nothing. it-'s a dummy out: i,t.
          That phone rings in Langley,
          but they'll know where I am.
          f f Agen nods and guts the card in his wallet a we

                         TO:

                         

                         

                         

                         
          l4.
          M. tNTERF.OGATION ROOM ONE (CIA) -- DAV
          Tony is completing a test, fitting blocks together
          against time. The First•Interrogator is watching
          impassively, stopwatch in hand.
          Tony finishes, straightens. The Interrogator clicks
          the stopwatch impassively, giving no indication
          whether or not Tony has passed the test.

                         CUT TO:

          INT. INTERROGATION ROOM TWO (CIA) - DAY

          The Second Interrogator, a psychiatrist, is seated

                         I
          behind a desk rocking at a little steeple that he's
          made of his hands.

                         2ND INTERROGATOR
          And you were never curious?

          ?NCTMM NGLE
          including Tony who sits across the desk from the
          Interrogator.

                         TONY
          About what?

                         2ND INTEMOGATOR
          Why your father sent you away.
          You never wondered about it?
          You must have thought something.

                         TONY
          I thought he had his reasons.

                         2ND INTERROGATOR
          And you don't feel any resentment?

                         TONY

          110.

          2ND 7.NNT .'RR0GATOR
          What do you feel?

                         TONY
          Nothi..ng
          The Secor_d Iuterro,acor glandes •cver. The ~t ace of
          anger in Tor_v`s _as- response has told hiz. and us
          somethinc .
          C"^' TO: ui-

                         

                         

                         

                         

          15.
          0 T. L"tT-_.RROGATION ROOM TER= - DAY
          Tony sits in a chair in a small room with acoustical
          tile on the walls and ceiling. Behind hint is a desk-
          like structure with a built-in apparatus of dials,
          graph paper and odd, narrow metal pens.
          Tony is connected to the desk ensemble by three appara-
          tuses: a blood pressure cuff attached to his arm,. an
          accordian tube around the chest to measure changes in
          breathing rhythms; a hand-held device with electrodes
          which measures changes in perspiration or galvanic skin
          response.
          The Third Interrogator sits at the desk behind Tony,
          asking questions slowly and checking the three styluses
          on the rolling graphs.

                         3RD INTERROGATOR
          Have you ever visited a Ccnsmunist
          Country?

                         TONY
          No.

          3RD INTER3CGATOR
          Have you ever belonged to a Commu-
          9 aist Organization?

                          TONY
           No.

          3RD IN`'ERROGATOR
          Are you telling the ruth?

                         TONY
          Yes.

                         3RD INTERROGATOR
          Have you ever had a homosexual ex-
          perience?
          Tony turns and looks at the Third Interrogator.

                         TONY
          No. Save vou?
          The ;"hi=d Interrogator tenses angrily for a ncment,

                         THEN:
           31-I0 :N'_r'ERRCGATCR
           Its esser zia1 that you face the
          i f lar and answer the ClUest'_oi:s ?es
          at 1o.
          (CCNT'Z Nt D)

                         

                         

                         

                         

          16.

                         CCNTZ''' 'ITFED :
          Tony turns back to the wall, then:

          3RD I:YT RROGATOR
          Do you have any friends in the
          Communist Party?

                         TONY
          No.

                         CTT TO:
          INT. I:YT...c2ROGATZON ROOM TS-M2 - DAY
          It is later. Tony is on his feet getting ready to
          leave. as the Third Interrogator is checking over the
          graphs with their red ink squiggles.

                         3RD TERROGATOR
          I get a high galvanic response on
          question twenty-three.

                         (CHECKS SECOND

                         GRAPH)
          Z also have agitation indicated
          on your cardio tracing on the
          same question: 'Do you have any
          close friends in. the Communist
          Party?' Your answer was negative.
          Would you like to amend that?
          Tony has rolled down his sleeve and out on his coat.
          He pauses at the door, then:

                         TONY
          It's my roommate up at Eseter.
          Phil Bodeen.

          3RD INT.'RRCGATCR

          He's a Ma.- xist?

                         TONY
          Z don't know.

                         (GRINS)
          But last time r saw him he had an
          American flag sewn to --..e seat
          of his pants.

          3RD INT=.RCGATOR
          And how did 'cu feel about tat?

                         TONY
          ( sh..?ug s
           L gis y y r r 1 1 +I GrL MCI n
          E (CON ` tZD)

                         

                         

                         

                         

          17.

                         WONT : F
           3RD >arr.` 3ROGATOR
           That's interesting.

                          TONV
          what?

                         3RD INTERROGATOR
          You think it's perfectly all right
          to sew the Nation's flag to the
          seat of your pants?

                         TONY
          As a matter of fact Z don't.

                         (THEN)
          But the Supreme Court does. Syza-
          bclic freedom of speach. Protec
          ted under the First Amendment.
          As Tony tuffs to go :

                         3RD INTERROGATOR
          Adam me?

                         TONY

                         (TURNS BACK)
          Sir?

                         0

                         3RD INTERROGATOR
          I take it you're prepared to die for
          that right?

                         TOUR
          No, Sir. Dying doesn't fit in with
          my plans at all.

          3RD M47TRROGATOR
          Just what are your plans, Mister Adams?

                         TONY
          T plan on passing this thing

                         CUT TO:

          TNT. HANDBALL COURT - DAY
          Tony and Stu Palszateer are in sweat clothes, playing
          a hard, ma-mercy game Of handball. ?almateer is close
          as he hits the ball:

          PAT.MATE R

                         YOU WILL

                         

                         

                         

                         
          COHTZ.wE :
          We whip pan to Tony, returning the shot.

                         TONY
          What makes you think so?

          PALMAT Z.P..
          You've got friends in high places,
          chino(.
          As Tony puts one away:
          PAI.6VxT..,ER
          Shot..

                         P (THEN)
          Believe me, you'll be reading-in
          on the project by next week.

                         CUT TO:
          =T. IBO CATION BUILDING (St.PSEY CITY) - DAY
          We are on the front entrance of the building owned by
          the International Brotherhood of Dockworkers. This is
          the National Headquarters Building, so indentified by
          a plaque of some kind. Tom Hagen enters this buildinq
          0 followed ?;y Al, Neri.

          MM. BRA.DY' S OUTER OFT= - DAY
          The President of the Union, Patrick Brady, a large,
          red-faced, hearty man, comes bursting out of a door
          and crosses up to Hagen and Neri.

                         HAGEN
          Tom! Alberto ! Come in. Come in
          This way.
          I (to Secretary)
           No calls, :Maggie.

                         1

          =41Z. CCNFEP,.N=_ BOOM - DAY
          This is a very plush room with heavy carpets, a long
          polished table and a large portrait of the Union's ex-
          president, Danny Devito. Under this portrait is a bar
          at which Brady stands pouring drinks into crystal.
          glasses. -
          As he t r=ls the drin s, y Yea the w..T..rzh Ner+

                          K
           then as he gi"Ies the secon d one to Iacer: °-
           (CDNT t D)

                         

                         

                         

                         

          18.

                         CONTINVZD

                         BRADY
          You're looking good, Tom. How do
          you keep your belly so flat?

                         EAGEN
          Mainly trying to rum you down.

                         $RADY
          Do You think this outfit =ins
          itself?

                         RAG=
          I think you're stalling us, Pat.

                         BRADY
          StaLLJag?

                         (TO UTERI)
          Will you listen to this guy?

                         (GRINS AT

                         HAGEN)
          You want to know when I got back
          from New Orleans? Ten-thi.-ty last
          night. Big problems.

                         3AGE1
          I understand and S sympathize, but
          you've had our proposal for a month ---

                         ANOTHER ANCE
          Brady crosses to the head of the board table where
          he opens a :older and starts riffling through some
          papers. As he does so, Eagen opens his briefcase.

                         BAGEN
          '1f you've misplaced it, I have a
          copy of the package plus. a summary
          of Genco International's assets and
          projected profits based on audited
          financial statements with additional
          data supplied by our Comptroller.
          I also have the plans and estimates.

                         BRADY
          okay. okay. I found 4-

                         (THEN)
          You want a i'i,ty M :?lion do? la=
          line of credit to be. granted is
          full to Genco azternatiorai and
          Subsidiaries and so on and so -73rth --

                         (-OAKS AN)
          For a. hotel in tlar tic City? 7-'s
          a lot of money, '^o t.
          (C.` CN'i _.dL: E^u )

                         

                         

                         

                         

          19.

                         CONT'VCZD :

                         BAG T
          Not if they vote in local option
          gambling. And they will.

                         BRADY
          That's beside the point. ?act
          is, those days are over when Danny
          used to hand it out like so much
          free lunch. There's been a big
          reduction in our c fitment to new
          construction loans.

                         RAGR`
          How Long has that policy been in
          effect?

                         BRADY
          Let me read you something.

          ANGTIMR ANG '.E
          as Brady withdraws a newspaper cut-out, obviously an
          editorial, from the folder.

                         3RADY
          S It's headed2 Take the hcod out
          of the Brotherhood.

                         (LOOKS UP)
          Cute?

                         (READS)
          'It. is precisely men like Patrick
          Brady -- who took over the Pres-
          idency of the I3D when the former
          President, Danny Devito was packed
          off to prison, who must be watched
          by the SEC. Because of the tremen-
          dous economic power of the anion
          Pension Funds, these men -- with
          their syndicate connections --- are
          putting the Underworld in a position
          to dominate the American economy

                         (THEN)
          What more can I tell vou?

                         A
          You can tell me if the answer is
          yes or no.
          As Brady crosses to the 4a!-! where ?.e ? arge srt dio
          Por-trai.t of Danny Devito, a tcugh-lcok zc __ctle Tay,

                         E
          (CONTI ITv:.D )

                         

                         

                         

                         

                         I

          20.
          CCNTI tUEI? :
          is framed in a place of honor. After a moment Brady
          shakes his head and turns back to Hagen.

                         BRADY
          t'm sorry, Tom. :Such as I love and
          respect cur former president, I
          don't have any burning urge to end
          up rooming with him at Leavenworth.

                         EAGW
          Before you give me your final re-
          fusal., I'd like to say one thing ---

                         BRADY
          You've just had my final refusal,
          Hagen. The answer is no.
          i ANOTHER ANGLZ
          as Hagen looks at Brady for a moment, then starts
          putting papers back into his briefcase. As he does so:

                         BRADY
          No hard feelings. It's just a
          policy decision of the Board.

                         BAGEN
          I understand and I thank you for
          your time ---

                         NERI

                         (TO BRADY)
          I'll see you around, Pat.

                         BRADY
          What's that supposed to mean?

                         M
          It means I'll see you around.

                         BRADY
          What am t supposed to do? Get
          scared? Piss in my pants?
          ,,To one is trfing to intimidate you.
          The Corleone :° ti.ly doesn't do
          business that wacr,

                         3RADY
          T h e Cori eons 4ami? y doesn't do
          (C ON"r y zt, D )

                         

                         

                         

                         

          21.

          CONTZ4"D :.

                         BRADY (CONT'D)
          a hell of a lot of business, period.
          The way Z hear it, you're finished.
          With Mike locked away in a rubber
          room someplace, and a two million
          dollar tax lein on your Vegas prop-
          erty, you. got the balls to come in
          here and try to run muscle on me.
          Get out of here. Both of you.

                         SAGE N
          We were just going.
          Hagen has packed his papers into his briefcase. Now
          he nods at Neri and as both turn, and start toward the

                         DOOR:

                         BRADY
          Wait a minute.
          Hagen turns back. Brady comes up with the Genco Inte.r-
          national loan application.

                         BRADY
          Take this along with you in case
          you run out of toilet paper on
          0 the flight back to Vegas.
          3agen takes the application from Brady, then cuietly:
          E ,Gr 3
          Don't ever think that the Corleone
          Tamily is finished, Mister Brady.
          That would be a mistake.
          Hagen and Neri now turn and exit. Brady is looking
          after them. Gradually the bravado drains from his
          big, pink, Irish face and he crosses to the telephone
          and picks it up.

                         BRADY
          Gat me Sam Maatrocina.

                          CITT 1 '60
          ELT . MAATRCCIii .' S YACET (LONG ZS LADIM MARIMTl) - DAY
          We are close on the hatchway to the main Salon as
          Sam iaatroci_na, the slick, sharp, middle-aged Don
          of a. powerf :.L New York faoi i y, comes up into shot
          and pauses, smiling cff:
          CONT==,cm

                         I

                         

                         

                         

                         

                         4

          22.

                         CONTSNVED :

                         MAATROCINA
          There's a trick to it, Pat.

          ANOT$ER ANGLE
          including the canopied afterdeck of %laatrocina's
          lovely yacht, moored among others of its type in
          a splendid :forth Shore Long Island marina.
          Pat Brady, looking out of place in his suit and
          city shoes,atands trying to light a cigar with a
          table lighter.

                         I BRADY
          Hello Sam. I'm sorry to have
          bothered you.
          We pull back slightly as Maatrocina --- in neat yacht-
          ing whites and deck shoes - comes up followed by a
          cold-looking man, Ralph Augusto, Maatrocina's hood.

                         TROC
          No bother at all.

                         (TO AUGUSTO)
          Light the man's cigar for him,
          Ralph..
          As Augusta comes up, takes the lighter from Brady and
          sets about the business of lighting the big, Union

                         LEADER'S CIGAR:

                         MAATRCCINA
          So what's our friend Tcm Hagen sp
          to these days?

                         BRADY
          Still shopping around for that loan.

                         MAATROCINA
          Lots of luck to him.

                         BRADY
          Thank you, Ralph.
          Augusta, having lit Brady's cigar, nods expression-
          lessly and sits as:

                         3RADY
          T h ey ;ri ed to =1=_-W a scare at me.
          w^1TI\Z'v.Z0 ;

                         

                         

                         

                         

                         I

          23.

                         CONT=NRED :

                         MAATROCIIA
          Don't worry about it.

                         BRADY
          Something about Hagen kind of
          bothered me.
          Maatrocina offers a platter of grapes to Brady.

                         MAATRCC=TA
          You like a grape?

                         BRADY
          No thank you.

                         I

                         M ATROCINA
          I'll te12 you haw tough Hagen is --

                         {EATS A

                         GRAPE)
          Ralph Auqusto will make in squat
          down in the middle of Times Square
          in the rush hour and take a shit.

                         BRADY
          Yeah? What about Al Neri?

                         0

                         MAATROCINA
          I tell you don't worry -- don't
          worry. You stick with the -Maatro-
          ciza famly you're safe as church.

                         (THEN)
          That I promise you on my mother's
          grave.
          As Maatrocina leans across with the grapes again:

                         MAATROCINA
          Do me a favor.. One grape. I grow
          'em at my own place. They're de-
          licious.
          As Brady takes a grape, Maatrocina looks over at Ralph

                         AUGUSTO:
          ANOTHER ANGI. - FEATURING AUGu STO
          As he nods almost iimperceptibly, gets un and goes,

                         I TO:

                         E

                         

                         

                         

                         

          24.

          0 NETWORK NEWSROOM (WASBINGTON) - DAY
          We pick up Elizabeth Ann Dunne, about 30. She is a
          television personality, mainly an interviewer of
          celebrities, although sometime a Newsweman and talk-
          show performer. She is poised, attractive, warm,
          hirp. She is coming out of an office. She crosses
          through the newsroom with its teletypes and general
          sense of activity. A Cameraman comes up to her:

                         CAMERAMAN
          You ready, Liz?

                         ELIZA$ETE
          f i fteen minutes. Out in front.
          We follow her out of the newsroom, dawn a hallway and
          around a corner and in through a. door rrar!ced, "Projec-
          tion Room One".

          =T. PROJECTION ROOM ONE
          Elizabeth comes into the projection room, pausing in
          the doorway as her eyes adjust to the change in light.

          A? M OT R ANGLE
          We see that Tony, in miform, is the only otter person
          in the projection 'roem. Tony takes out a cigarette
          lighter.

                         TONY
          Here.
          As Tony flicks the lighter on:

                         ELTZAAET
          Thank you.

                         TONY
          My name's Adams. Tony Adams.

                         (THEN)
          Z know who you are Miss Dunne.
          As Elizabeth finds a seat, we see that what is being
          run i.n this Projection room is an interview ime-troreen
          Elizabeth and a large, attractive, Latin-American
          political leader named Arnando. ?Tidal. They sit to-
          get er at the --col area of a lur..ar r hotel. Tidal wears
          a fatigue uni.fc= without any insignia whatsoever- The
          image of Elizabeth an screen is saving-

                         (CONTINUED)

                         C

                         

                         

                         

                         

          25.

                         0 CONTNUED:

                         ELIZABETH'S VCIC
          Senor Presidente, there are still
          those who say that in spite of the
          advances you've made, your Country
          still falls short of the Democratic

                         IDEAL ---

                         VIDAL
          Obviously. However, the convulsions
          that my Countz went through four
          years ago last February were not so
          much a revolution as they were a --

                         (PAUSES)
          I am thinking of the labors of
          I Herculi.o --- in the stable

                          ELIZABETH'S VOZC
           A cleansing.

                         S VIDAL
          Exactly. Muchas gracias. My Country
          was befouled by the corruption of
          its leaders and their exploitation
          at the people. A hard cleansing was
          needed and sometimes that is painful --
          but the pain is over and now Z am
          0 hoping. that qty little Cc=try and
          your great Yaticn can once again be

                         FRIENDS ----

                         ELIZABETH'S VOICE
          I'm sure a lot of pecple say Amen
          to that, Senor Presidente.
          On the screen the scene has shifted to an attractive
          beach area where Vidal, in a wet suit, is adjusting
          his'-scuba gear preparitory to diving. Admiring child--
          red and Elizabeth Ann. Duane watch. Over this:

                         J

                         ELIZABETH'S VOICE
          For a glimpse of another facet of
          Armando Vidal's nat•.ire, we spent the
          last day at the beach at Finca del
          Sol Where El Presidente exhibited
          his skills as a scuba diver -- one
          of his favorite hobbies.
          On the screen Vidal --ousels a kid' s ha..- and crosses
          into the water. As he wades out, Tony turns to
          Elizabeth.

          (C CNT=IL EC ;

                         

                         

                         

                         

                         T

          25.

                         CONTZLVTIED :

                         TONY
          When will this be shown?

                         ELIZABETH
          Sometime in the Fall.
          While Elizabeth, in the projection room, is saying
          this. her image on the screen has turned to the T4
          camera and is saying:

                         ELIZABETB'S VOICE
          This is Elizabeth Ann Dunne coming
          to you from Finca del Sol where --
          in an exclusive interview -- El
          Presidents, Armando Vidal, has just
          extended the hand of friendship
          from quote, 'his little Country to
          our great Nation . '

                         I

                         ANOTESR ANGLE
          The reel is over. The screen goes blank and the
          lights in the projection room go on.

          ELIZA.BET?I
          40 What is it that you're working on? 16
          Some sort of'a psychological pro-
          file on Vidal for the Navy Depart-
          ment?

                         TONY
          Right. There's a couple of ques-
          tions I wanted to ask you, if that's
          okay.

                         (THEN)
          What are you doing about lunch?

                         ELIZABETH
          I ignoring it. I've got to pick up
          some shots around town, but you're
          welcome to come along.
          They are at the projection: room door. As Tony cpens
          it for her she causes, then:

                         ELIZABET3
          You ever play any baseball, :lister
          Adams?

                         TONY
          Sow did you guess h.at?

                         

                         

                         

                         

                         I

                         I

          26-A.

                         CCNTTX= :

                         ELIZABETH
          I saw you pitch a no-hitter against
          Army two years ago.

                         TONY
          You're kidding.

          TI' ZA3ETH
          L don't kid about no hitters..
          Elizabeth exits. Tony follows. We ---

          CII:' TO:

          EX' T. MC= VE NQN - DAY
          as a troup of Boy Scouts goes past us and clears the
          Visitor's Gate, exposing the bowling green, the court-
          yard and far down, framed by giant black oak and maple
          trees, we see the `Mansion with its pure lines and

                         SIMPLE ELEGANCE-
          Tony and Elizabeth appear., followed by the Cameraman
          and one or t'c more. As Tony and Elizabeth cross into
          the beautifully kept grounds of the old Plantation ----

                         TONY
          None of my business, but what were
          you. doing at an Army-Navy ball. game?

                         ELIZABETH
          We were putting together a special
          on Arne Grundellius. He'd just been
          appointed Secretary of State. Ee
          threw the first ball that day.

                         TONY
          You've got a pretty good memory.

                         ELIZABETH
          And you.'ve got a pretty good slider.
          But L've got to be honest, you threw
          a lot of junk in the last two innings .
          As Tony shoots her a look:.

                         ELIZABETH
          My old man was on the Spar rs' Desk
          of the Boston Globe for wwent;r-five

                         (CONTMM-S-0

                         

                         

                         

                         

                         I

                         CONTINUES:
          ELIZABETH (Cent' d)
          years. Z was practically raised at
          Fenwick Park in the Carl Yastremski

                         DAYS-
          As Elizabeth crosses and starts working out a shot
          with the Cameraman, Tony is eying her speculatively.

                         CUT TO:

          EXT. TEE CRYPT OF GEORGE WASHINGTON - DAY

          We are at. the littleopen-fronted white marble burial
          vault of George Washington. . We pull back to include
          Tony as he looks at the sarcophagus.

          ELIZABET3'S VOICE
          Tony? We're finished ----
          As Tony turns, we pull back and pan to include Eliza-
          beth, who stands in the little leafy path leading
          from the vault. As Tony crosses and ;tins her, we

                         CUT TO:
          0 T. SPACE AND FL GF ' M SEt M - DAY
          We are on the balcony as Elizabeth sets up a shot
          frog: the Wright Brother's "Flyer" panning to
          "The. Spirit of St. Louis", and then to one of the
          Space Capsules. Tony is watching Elizabeth. She
          looks over, catches his eye, smiles.

                         CUT TO :

          EXT. GEORGETOWN - NIGET

          as Tony and Elizabeth approach her apartment on a
          quiet, Georgetown side street. The cold, faintly
          blue light of the- street lamp at the corner throws
          leaf shadows on their faces as they come up to the
          front door..

          CLOSER ---AT TEE DOOR
          She opens the door, steps to one side and gestures
          him in.

                         MZZA3ETE
          One dr iak, ckayr?

                         

                         

                         

                         

          26-C.

          INT. ELIZABETH'S APARTMENT - NIGHT

          We are close on a picture of Elizabeth and Arne
          Grundellius, the Secretary of state, whom we might
          remember by sight as the Speaker at Tony's gradu-
          ation. The picture is in.€orral, taken at an open-
          ing night at Kennedy Center.
          We pull back to include Tony, locking at the picture
          as Elizabeth comes up with a couple of drinks.

                         TONY
          You and Grundeilias?

          EZ,I ZA.3E'_'E
          At Kennedy Center.

                         TONY
          Ee really gets around.

                         ELIZABETH
          Well, he got around nee anyway.

                         TONY
          Oaps.

                         ELIZABE'-"3
          No ha=, no foul.

                         (THEN)
          r wasn't fighting him off very
          hard.

                         ANOTHER ANGLE
          As Elizabeth picks up another picture, this one a
          framed studio portrait, inscribed, of the Secretary.
          As she locks at it:

                         ELIZA3ETE
          We had quite a little thing going
          .for a while. I think he ac tuually
          gave up two starlettes and a bare-
          back rider -- temporarily.

                         TONY
          You stir see h,m?

          EI,IZ.ABETE
          Once in a while.

                         (THEN LAUGHSY
          Said she ?ri.s4f?i? r.

                         

                         

                         

                         

                         I

          26-D.

                         CONTIDI = :

                         TONY
          Z was just going.

                         ELIZABETH
          Don't mind me. I'm just your basic
          Boston Irish. We bruise easily and
          heal slowly.

                         CD TO:

          EXT. ELIZABETH'S FRONT DOOR - NIGHT

          as Tony cones out. Elizabeth stands in the doorway.

                         ELIZABETH
          Call. me.

                         TONY

                         (NODS)
          Goodnight.
          Tony goes down the street. She watches after him
          as we -«

                         CUT TO:
          rXT THE BATTERY (NEW YORK CITY) - NIGHT
          A limousine comes down past the Battery Park and moves
          toward the Staten Island Ferry Building. It is about
          ten o'clock at night.

          I INT. THE L=!OUSINE - NIGHT

          Frankie Rizzi, about 30, is driving. Frankie is the
          son of Connie Corleone and Carlo Rizzi. In the back-
          seat is Al Neri. Frankie looks off toward the river.
          We can see. the ferry coming in.

                         1

                         FRANKII
          Here it comes now.

                         ANOTHER ANGLE
          as the fear comes up. Noses into the slip. Creak
          of pilings. The limousine drives onto the ferry.

                         

                         

                         

                         

                         I

          27.
          IYT. : HE LS.' CUStNE - NIGHT
          Frankie is listening to the baseball scores. Neri is
          nervously checking his watch. A few passengers are
          boarding. There are no other cars.

          ANOTHER ANC LE
          Salf a dozen loose, Puerto Rican kids come bopping
          up the automobile deck. one is listening to a radio
          that he holds to his ear. He crosses up to Frankie.

          PUERTO RICAN RID
          Hey Mister, you got a cigarette?

                         PRANRIE
          I don'-.t ;smcke.

          PUERTO RICAN RID
          You got a dollar?

                         FRANRZIR
          Beat it.

          ANOTHER ANGT.E

                         I
          as Neri, who hasn't been paying attention to the kids,
          now looks up in annoyance.

                         NERI
          Give him a _--
          Suddenly veri breaks cg!, sensing something wrong.
          The other Puerto Ricans are surrounding the limousine.
          As Neri dives for the door of the car ---

                         ;N1ERI

                         LOOK OUT:

                         ANOTHER ANGLE
          Guns have appeared in the hands of the Puerto Ricans
          and they start to blast at the limousine from outside.
          The ferry whistle is blowing.

                         MWTT ANGLZ
          Neri :its the deck, a !4 na , cones t _t°? -- --u n :t a-
          blast:.:.CT.

                         

                         

                         

                         

          28.

                         ANOTHER ANGLE
          One of the Puerto Rican kids is hit squarely in the
          chest. He flies back, azms up, legs sz readea_gI d,
          like he was hit in the chest with a baseball bat.

                         ON FRANRSE
          Prankie is out of the car, reaching for his gun. Before
          he can get it clear, he is hit in the shoulder and spun
          to the splintery, oily deck.

                         ANOTHER ANGZE
          As a Puerto Rican goes to finish off Frankie, Al Neri
          blows him away.

          REVERSE Z.
          as two other Puerto Ricans blast Neri simultaneously.
          Neri's eyes go wide. He coughs.: A big, stringy sob
          of blood appears in his mouth, vomits out onto his
          shirtfront.

                         ON NERI
          He goes down to his knees, tries to raise his grin
          for one last shot. A Puerto Rican grins and putting
          his gun. an inch from Neri's face, pulls thetrigger.

                         ANCTEER ANGLE
          People are screaming, running. The'ferry is starting
          to pull from the dock. The four remaining Puerto
          Ricans run for the end of the boat, make the Leap
          over the churning water from the ferry to the landing.

          FULL SHOT - FMM LANCING
           The ferry boat is still
           pulling away as the `our
           Puerto Ricans land on t h - e he pier and disappear into the
           night. The ferry boat's whistle is blowing shrilly.
           Off its starboard bow we can see the Statue c- Leber _r.
           Over this we hear ---

           -P?3T__' -.ST' S `JOIC
           I am the ?aa^tt,-cc±±^; +n and the r. - ' 1
           and he teat believeth in Me, al -
           though he be dead, shall live ---

                         E
          DI SSO?.. iE TO :

                         

                         

                         

                         

          29.
          EXT. ITALIAN-CAT3CLiC GRAVEY-XI II (LONG ISLA `7D) -- DAY - -
          We are on the ornate tombstone of Don Vito Carlecne-,
          the Godfather. We are pulling back from it as

                         PR2EST

                         (CONTINUING)
          And' everyone that liveth and be-
          lieveth in pie shall not die forever.
          We have pulled back to include the buria.L services
          for Neri. At the graveside are Tony, in dress blues,
          Hagen, Rocco Lampone and others.

                         ANOTHER ANGLE
          The services finish and the group breaks into smaller
          informal, groupings. We move to Tom Hagen and Tony, who
          have drifted to one side.
          aNOTH.ER ANGLE
          as Hagen indicates a moon-faced man, Imberto Croce,
          about sixty, who is approaching them.

                         HAGEN
          Umberto Croce out of Tama. He
          took over the whole Florida thing
          after Hyman Roth and Johnny Ola
          were retired.
          Cmberto has come up.

                         HAGZ"N
          Cmberto. Michael's son. Anthony.

                         CROCZ
          Youi father must be proud.

                         (THEN)
          What do you think, Tom? They're
          saying it was Maatrocina.
          3agen makes a little gesture, reminiscent of the God-
          father; a kind of upward opening of the hand, as if
          gently letting a tiny bird free.

          ANOTEE . ANGLE
           As Frank ,e R .zzi. his a in a sliaq, and Sant .._^.c
          Cor' eone ?cnay ` s oldest son, rcW is awd . e W -i-r-
          ties, =Cme UM together:
          (CUNT:Nt7ZD )

                         

                         

                         

                         

                         - 41

          30.

                         CONTNM :

                          BAGZN
           Say hello to your cousin,

                          IR RANKIE
           Rizzi - your Aunt Connie s by--
           and this- is Santino, your uncle
          Sonny's oldest.
          As Tony shakes hands with Santino and ?rinkie:

                         PRANKIE

                         (TO TONV)
          Excuse my left hand.

                         SANTINO
          Frankie was in that little shit
          storm on the ferry boat.

                         CROCE
          The whole thing don't make sense
          to me.

                         FRANXIE
          All Z know is Al got a phone call
          from that, nephew of his, Tommy,
          who runs numbers in Staten Island.

                         SANTINO
          Fucking punk.

                         FRAHRIE
          It was something about Al's sister
          being sick bad with the ptomaine or
          I something. We walked right into it.

                         CROCZ
          What about Tommy?

                         FRANXIZ
          The nephew? Nobody seen him since.

                         SANTINO
          Ask me he's out in the Narrows with
          about eight slot machines tied
          around his neck.

                         ANOTHER ANGLE
          As Rocco Lamm pone comes up, clearly agitated:

                         LAAONE
          How do you Like the ally oaf that
          bastard? Snowing up here.

                         I

                         (CONTIEE

                         

                         

                         

                         

          31.

                         CONTIIUED :

                         SANTI-1141O
          Maatr-ocina?

                         LAMP ONE
          If your old man was alive, that
          son of a bitch would be eating
          his dinner in hell tonight.

                         RAGE
          Be patient, Rocco, and trust me.

                         (SMILES OFF)
          Sam.

                         ANOTSER ANGLZ
          as Sam Maat:ocina comes up, his expression suitably
          somber.

                         MAATROCLMA
          Tom. E'ellas. Go figure life, huh?.
          A guy like Al Neri. Who'd of fig-
          c a red him to get mousetrapped like
          that?
          Maatrocima is shaking hands with Hagen,. Rocco and
          0 Frankie. As he does so:

                         EAGFN

                         (INDICATES)
          Mike's son, Anthony. Sam Maatrocina.
          Maatrocina holds out his hand to Tony. Tony just looks
          at him coldly for an insulting split second. Maatro-
          cina's expression doesn't change, and the outstretched
          hand moves to squeeze Tony's bicep. Now he shows his
          teeth in a grin

                         MAATROCINA
          The arm on the guy.

                         (THEN)
          If you ever need a job come see me.
          w!aatrocina ti ns and moves off. Tony Is looking after
          him.

                         HAGEN
          Mistake, ';.'onv. ?never let a man
          like that 'mow what you' :e thiak-
          .ng .

                         =;T TO

                         11

                         

                         

                         

                         

                         I

          32.
          TNT. LIVING ROOM (CON coRLEONE's HOUSE) - MIGHT
          The members of the Corleone Family and some of their
          friends are gathered at the Godfather's old house in.
          the Mall. The tenor of the group is subdued, although
          the tensions of the day are beginning to ease.
          We pick up Fraakie who is guiding Tony through the
          room.

                         PA N =
          Santino always liked the old place.
          He picked it up after Pentangeli
          knocked himself off.

          ANOTHER ANGLE.
          as they pass Umberto Croce who is coming out of the
          diming room where a buffet has been set up. Croce
          has a heaping plate of, food. He pauses, shaking his
          head.

                         C:TOCE
          Sad day, sad day.
          As Croce moves on, shaking his head, _rankie locks
          after him.

                         IS

                         FRAVRIE
          Dania near ruined his appetite.

                         (THEN)
          Good man though. From the old days
          before the Spics and all.

                         TONY
          How strong is the Corleone family
          connected in Latin America?

                         FRANKTE
          We got some people doom there used
          to work for us in the hotel. Now
          they do odd jobs. Help with the
          airline. Like that.

                         TONY
          Tell me about the airline.
          It's what you call non-scheduled.
          You 'now?

                         (THEN)
          Lf we ever go out of business, haL
           the rock groups i -he ccunt-= r J

                         40 (CCNT " + D )

                         

                         

                         

                         

                         S

          33.

                         CCNTINLTED :
          ? R A N = (Cont'd)
          be standing around with empty spoons
          stuck up their noses.
          As Sonny's daughter, Francesca, comes by talking with
          her twin sister, Barbara, who is now a nun. They are
          now about forty.

                         FRANZIE
          Hey, Francesca. Barbara.

                         (TO TONY)
          You remember the twins.
          As the women greet Tony, old Mister Nazorine, the baker,
          comes by.

                         FRANBIE
          And here' s Mister Nazori.ne -- still
          makes the best tarelles in town.
          As Tony is greeting the old man:

                         FRANCESCA
          Tony, Tony. I remember the day
          you. were born. A blizzard. And
          grandpa and Tessin and Clemenza
          were sitting out in the backyard
          in the snow with a five gallon jug
          of grappa, celebrating.

                         BARBARA

                         (LAUGHS)
          And grandma was out there yelling,
          disgrazia.. Infamita! You could
          hear her clear to Freeport.

                         F RANXIZ
          (tugs Tony off)
          Later, huh?

          M. DON CORLEONE'S OFFICE - NIGHT
          Gathered in Don Corleone's old cotter office are Santino,
          Tom Hagen, Rocco Lampone and Umberto Croce. Tony and
          Frankie enter.

                         BEN
          CZ.ose the door, please, ?rankie
          and make yourself comfor able.

                         ( THEN)
          First, Tony, t!lank you for want c.
          We all aroreciate is.

          (C ONT11-N ED )

                         

                         

                         

                         

                         A

                         L

          34.

                         CONTINUED :
          Tony nods. All are seating themselves as:

                         HAGEN

                         (TO TONY)
          Would you care for a little wine?
          A little Anisette? No? All right.

                         (THEN)
          I've invited L berto Croce to sit
          in with us-because of his strong
          connections in Latin America and
          in the exile comarunity down in
          Florida -- and because he's a
          trusted and valued friend of the
          Corl eone family.

                         (THEN)
          And now,, if your friends in Langley
          have decided that you're not a
          Russian spy, perhaps you can tell
          as the nature of the service they'd
          like us to perform.

                         TONY
          It's a political assassination.
          The target is Asmanda Vidal.

                         0

          A,NQ1'8ER ANGLE
          Santino, whose attitude has been somewhat sardonic
          throughout, now breaks out in a bray of laughter.
          Tony turns on his cousin, then coldly and quietly:

                         TONY
          If it's too much for you, just say
          so. M can break this off right now ---
          Santino, taken aback at the cold authority in Tony's
          manner, turns for support.

                         SANTINO
          What did I say, for God sake.

                         (TO TONY)
          Whaddya so touchy?

          E AG I
          When is this planned for?

          TONY.
          Next February. ?!e' s having a week-
          long celebration o the Fif_'h. nn?.zr-
           ersar,7 of the Revolution.

                          (THEN)
           We're planning i t to ? cok like an

                         40
          accident -- or natural causes.

                         (CJN'RINUZD )

                         

                         

                         

                         

          35.
          CCNTI i ED
          SA.NTTzYO
          That's a hell of a trick if you
          can do it.

                         TONY
          Our Technical Services Division is
          experimenting in two areas. One is
          a scuba: diving wet suit designed to
          malfunction at a critical depth.
          The other is a toxic biological
          material -»- a strain of botulism
          that's tasteless, colorless and
          odorless -- and so lethal that
          one drop on his food or an his
          toothbrush would be fatal inside
          of an hour.

                         CROCE
          That would mean getting somebody
          close to him.

                         RAG'N
          Could that be done?

                         CROCE
          t think so.

                         EAGEN

                         (TO TONY)
          And what's your part in all this?

          TOW,
          t'm the cut-out. The circuit
          breaker.

                         BAGZN
          The only link between our people
          and the Government'

                         TONY
          That's right.

                         (THEN)
          There's a second phase to this
          operation. It consists a spread-
          ing confusion -- planting explos-
          ives -- the Police Barracks -- the
          Central. Power Station -- so on.
          which will hopefully trigger an
          uprising of the Anti -Vi"'aiista
          forces.

                         ONTI TCZ-

                         

                         

                         

                         

          36.

                         CON'RL'??IUED :

                         SANTINO
          I thought these bastards wanted a
          hit --?-
          .(then)
          They're locking for World War Three.

                         HAGEN

                         (TO LAMPONE)
          What do you think, Rocco?

                         ANOTZR AY(=
          As Rocco Lempone -- who walks with a limp as a souvenir
          of his service in World War Two-- thinks about it, then:

                         LAMPONE
          You're starting to talk about a
          pretty big operation. You're
          going to need an assault team --
          maybe two. Small arms. Ammo.
          Hand grenades. Field radios. A
          support network down there. You'll
          have to have transportation. Stag-
          ing areas.

                         0 TONY
          The supplies can be made available.

                         LAMFONE
          How about getting them down there?

                         FRIAS=_
          No problem. We can carry eight tons
          a trip in the DC Six.

                         HAGEN

                         (TO UMBERTO)
          What do you think, Umberto? Can
          you get your hands on a few Anti-
          Vidalistas who wouldn't mind going
          doom there and raising a little
          hell?

                         UMBERTO
          I'll talk to. Doctor Earcenas,

                         SANTIO
          And we'll all end :zn. Faith cu= balls -
          in the g='av.

          F I.R A, N&
          You're cetti g old, Santi no.

          (C". NTINL=)

                         

                         

                         

                         

          37.

                         CONTLVIIED

                         SAN'TINO
          I'm still young enough to whip
          your ass.
          As x'rankie starts to rise, Hagen puts a stop to any
          further discussion.

                         HAGR"`N
          All-right. That's it.

                         (THEN)
          We all agree -- except Santino --
          that what Tony proposes is possible
          although not. easy.

                         (THEN)
          Of course, financial arrangements
          will have to be worked out --

                         TONY
          There's no problem there. Every-
          thing will be handled through me
          and in cash.

                         HAGZN
          You can tell your friends that they
          have a deal.

          TONY.
          Good.

                         SAGE
          Tell them this too --- what the
          Corleone family wants for its part
          in this operation is the uncondi-
          tional pardon of Danny DeVito. And
          that will have to come first.

                         TONY
          I don't know if they'll accept that.

                         RAGE
          It's a non-negotiable condition.

                         (THHEN)
          We'll start getting things lined
          up, but we won't move until Danny
          walks out of Leavenworth.

          ANOTS Z-R NGLL
          Sant-4-0 -a aagen:
          Sr'?,N'T'I O

                         YOU REALLY
          Devito?

                         ( CON'"INCED )

                         

                         

                         

                         

                         I

          38.

                         11 CONIINEM

                         HAGEN
          Why not?

                         SANTINO
          That would have to come right from
          the White Souse.

          RA.GZN
          Where do you suppose the hit order
          came frog?

                         CDT TO:

          EXT. CAMP PEARY, VIRGINIA - DAY

          Camp Peary is the clandestine training facility of
          the CIA, under military cover. It is near Williams-
          burg, Virginia, a couple of hours drive from. Washington.
          .cwa by the official cryptonym "ISOLATION" it is un-
          officially called The Farla" -
          The enormous, thickly-wooded area is divided intern-
          ally into tightly controlled training areas. A high
          chaintlink._fence topped with barbed wire surrounds
          the base. Signs an it read: "U.S. GOVE.SNMENT RESER-

          VATION. NO TRESPASSING."
          We are presently on such a sign. We hold for a moment
          as we hear ----

                         INSTRUCTOR'S VOICE
          A doomsday car is a vehicle which
          is loaded with a high explosive,
          such as gelignite, and left in an
          area where it will do the most dam-
          age when detonated --

          EXT. TRAINING AEA (CAMP P_.ARY) - DAY.

          We are on the Instructor, a lean, mean-lacking A=Y
          Sergeant is impeccable fatigues. His manner of speech
          is Southern.

                         INSTRUCTOR
          Do I read disapproval on your face,
          Mister Adams?
          AsNOTR NN-- ~'
          ..ncludina the tra:..'aing class. thirty or fort., men 4=
          army fatigues. Score are dark men, mustachiced., pass
          E :t iler Saudis cr Iranians. Amcnq t!?iese we finch Tony.

          ( C O N T I N U ED)

                         

                         

                         

                         

          39.

                         CCNT NUED :
          Although he doesn't answer the Instructor, we can
          read something less than total approval on his face.

                         INSTAUC'R'OR
          In view of the fact that we're
          getting our butts kicked rosey all
          over the world, it might behoove
          us to take a lesson from the IRA
          and the PLO.

                         (THEN)
          Now, the first thing those boys'l1
          do when they aim to spread a little
          unhappiness in the ranks of the
          righteous, is steal a motor vehicle --

                         (THEN)
          You know how to steal a motor
          vehicle, Mister Adams?

                         TONY
          No I don't.

                         " = 4STRLICTOR
          By the time you leave Camp Perry,
          you' 11 be an expert.
          The Sergeant turns to the others.

                         INSTRUCTOR

                         (CONTINUING)
          All right, gentlemen, this is a
          remote control detonator.. .on safe.
          Be holds up a detonator in. his hand, then points Off.:

                         INSTRUCTOR
          And that yonder is a doomsday ca_r.

                         ANOTHER ANA
          including an old car set in a valley some two hundred
          yards away. The Instructor takes the detonator off
          safe and as he activates the charge in the doomsday
          car, we zoom in. The explosion fills the screen as
          the doomsday car is blown to hell.

          JISSCL'JE .C :

           "NT. CT...ASSRCOM T (CAMP ?MARY) - DXZ
           We are ..: a cl assrcOm in wooden a--=V ar-racks _ The
           teacher", a :Va .- Coxxna ,de: , s ? °_c t ing . -he= e are

                         E

                         (CONTINUED)

                         

                         

                         

                         

          40.

                         CONT IUED:
          chalked o ptograms on the blackboard behind him.
          They are variously, ODYOKc, 'ODACID, OD MM. AELAU ,,
          AEJAMMER, AEBROOM. Tt3DOV, =ESR, .KUCAGE.

                         COMMANDER
          The cryptonym is a name used in
          place of the true name. In the
          company, crtonyms consist of
          two letters that determine the
          general catagory followed by a
          word -- the United States Govern-
          ment is designated by the letters
          "O" and "D", and the word "Yoke".

                         (POINTS)
          O-D-yoke. The Department of State,
          O -Z-acid.

                         STUDENT
          What's that last one?

                         INSTRUCTOR
          0-0--envy?

                         (GRINS)
          That's the FBI.
           As the group of trainees laugh, we

                          CUT TO:

          EXT. A. CANYON ROAD (C+ '4P PAY) - DAY

          Up'a dirt road, between the trees, comes a platoon of
          trainees, in jungle gear wet with sweat, double-tizsing
          as they chant:

                         PLATOON
          Hut two three four --

                         REEP-BY-YA-LO'

                         BEEP-BY-YA-LO-

                         LO-RIGHTS-LO--
          We move into the platoon, pick out Tony as he runs,
          not cocnti.nq. The Drill Instructor runs up alongside
          of Tony.

                         INSTRUCTOR
          You're not singing, Mister Adams.
          Aren't you harpy is our little
          croup?
           As Tonv locks over at the : - acing Drill inst? ?c cor ,
          h t en starts chanting a h --he zest
          C:iT To :

                         

                         

                         

                         

          41.

          EXT. PISTOL RANGE (CAMP PEAR21 - DAY

          Tony is at the pistol range, firing. As he finishes
          up the clip and draws the target back to him on a
          pully device, the Pistol Instructor comes up and in-
          spects the target with the bullseye chewed out.

                         PISTOL INSTRUCTOR
          Good. shooting. You do a lot of
          hunting?

                         TONY
          No.

                         PISTOL INSTRUCTOR
          Mast run is the family, then.

                         TONY
          You might be right.

                         CUT TO:

          INT.. LOCX P ICRITG CLASS (CAMP PEARY ) - DAY

          There are diagrams on the blackboard. Tumblers,
          locks, keys. There are big, half-sections of locks.
          Half a dozen. members of the class, including Tony,
          are working with picks an locks. As the Teacher
          helps Tony.

                         TEACHER
          On the ordinary pin tumbler cylin-
          der lock, the spring actuated
          drivers are partly in the shell
          and partly in the plug. The. trick
          is to lift them up so the plug can
          turn freely --

                         (THEN)
          That's right.
          The lock has opened.

                         TEACHER
          Once you've gotten the lock picked
          and the door opened the best thing
          is to tape the bolt mechanism back
          so you won't have to keep picking
          the lock -
          As the Teacher Instrructs the class - a la Watergate
          break-inn -- how to tace back the bolt mechanism, we

                         CUT TO:

                         

                         

                         

                         

                         L

          42.

          EXT. T: AIMING AREA (CAMP PEARY) DAY

          We are on the Drill Instructor who brandishes a
          Marine Corps knife, six-inch blade, brass knuckles
          incorporated into the handle.

                         INSTRUCTOR
          This object is a United States
          Marine Corps killing knife. I..
          will now demonstrate that it is
          not worth doodley shit if you don't
          know how- to use it.

                         ANA ANGLE
          as the Instructor looks around the circle of Officer
          Trainees and from them chooses Tony.

                         INSTRUCTOR
          You.. You'd like to kill me,
          wouldn't you, Sir? Well, here's
          your chance.
          The Instructor tosses Tony the knife. Tony catches
          it. As they circle,. the Instructor taunts Tony.

                         INSTRUCTOR
          Come on. Come on. Make a move,
          Sir. Are you falling in love with
          me? Then do something hostile.
          Make a face. Stick out your tongue.
          Do something, Mister Adams.
          Tony swings the knife. The Instructor avoids him.

                         INSTRUCTOR
          My little bitty sister can make a
          better move than that, Six.

                         ANOTHER ANGLE
          The Instructor offers a tempting target. Tony swings.
          The Instructor slaps his cap across Tony's face, grabs
          Tony's wrist and disarms him, throwing him to the
          ground.

          ANOTHER ANGLE - ON TONY
          The Instructor turns his back on Tony, deliherately .
          Tony gets to his feet and charges the .nst=,actorls back.

          (C^NT? 2IUED )

                         

                         

                         

                         

                         I

          43.

                         CONTINUED
          The Instructor has been waiting for this. Re flips
          Tony again.-
          Tony lies inert an the ground. The Instructor comes
          up, beads to inspect Tony. As he does so:

                         - INSTRUCTOR
          All right, Sir. You ---
          Re breaks off as. Tony has driven an upper cut into
          the Instructor's balls. The Instructor grabs his
          groin and goes down in a heap.

                         ANOTHER ANGLE
          Tony rolls to his feet, grabs up the killing knife
          and presses it to the Instrutor's throat.

                         TONY
          All right you, son of a bitch, tell
          me about it..
          The Instructor is looking at Tony. Suddenly he grins;

                         INSTRUCTOR
          You're getting there, Mister Adams.

          ANOT3ER ANGLE
          Tony looks at the knife in his hand, then tosses it
          away and starts off. At this point a jeep comes
          bouncing over the hill and skids to a breadsiding
          stop.. We see that PaJ.ateer is at the wheel.

          PALHA2'E.=

                         (TO TONY)
          Jump in.
          Tony is in the jeep. As Palmateer guns out, trailing
          a plume of dust, we ---?

                         CUT TO:
          tNT. RECREATION ROOM - CAMP PEAwM - DAY
          Palnateer is watching the Recreation Room television
          set on which there is a newscast of the ze ease of
          Danny DeVito frc n Leaveizwcrth..

                         M ZUED )

                         

                         

                         

                         

          44.

                         CONTINUED:
          Danny, almost as wide as he is tali., his broad pug-
          natious face wreathed in a grin, camas cut of the
          prison gate, his hands held up in the "Victory" sign.
          He pushes his way through the Television Cameramen
          and Newsmen, not saying anything.
          We pan him over to a limousine which he gets into
          quickly. The limousine drives off.

                         NEWSCASTER'S VOICE
          Today in Leavenworth, Kansas, one--
          time International Brotherhood of
          Dockworkers' President, Danny DeVito
          i was. released. on pardon after serving
          six and. a half years of a fifteen
          year sentence an embezzlement and
          conspiracy charges. DeVito didn't
          state his future plans, but insiders
          expect him to challenge the incum-
          bent President, Pat Brady, in the up-
          coming election.

                         ANOTHER ANGLE
          As Tony comes into the room, Palmateer gives him a
          pen and a couple of pieces of paper.

                         I

          PAL.MATEER
          A couple of things for you to sign
          6 before you go operational. This is
          for your piece --

                         (SECOND PAPER)
          And this is your resignation. It's
          a technicality, but in case the shit
          ever really hits the fan, we'd have
          p to disclaim you.

                         (GRINS)
          It's called the principal of plaus-
          ible denial. We never invoke it
          until the cock crows thrice.
          As Tony is signing the papers, Palmateer has produced
          a service automatic from a canvas carryi+:g case. He
          puts it on the table before Tony ---

          E=. A WAS ZNGTCN D.C. SUZZZ G - DAY
          As Tony, dressed in his blues, comes out of a build-
          ing. He carries a briefcase. Cressinq to ie. curbs
          gae stexas and looks up the s ?_. eet

                         

                         

                         

                         

          45.

                         ANOTHER ARGLZ
          As a large car pulls up and stops in front of Tony,
          he opens the door and gets into the backseat. We
          now might be able to see that F'rankie is driving.
          With him in the front seat is Santino. In the back-
          seat is Umberto Croce.
          INT'. THE L SOUS tNE - DAY
          as Tony gets in and sits next to Croce. General
          greetings, then Tony gives the briefcase to Croce.

                         FRANRIE'
          Where to?

                         TONY
          Take a right on Sixteenth Street.

                         ANO ANGLE
          As Croce sets the briefcase on his lap, Tony reaches
          into his pocket and gives him the key. Now Croce
          carefully unlocks the briefcase. As this is going on:

                         TONY
          How's. your progress?

                         CROCE
          Doctor Barcenas is getting an
          assaS,ilt team together.

                         TONY
          'Barcenas?

                         CROCE

                         T
          A leader in the exile community.
          0 One of the early revolutionaries.
          Broke with Vidal when El Presidente
          went Marxist and abbrogated the
          Constitution. He thinks we're a
          group of business men backing him
          to get our hotel and casino back.
          Croce has gotten the briefcase open. It is full of
          cash. Tony points ahead.

          TONY .
          Let use of-12 at the next corner.

                         

                         

                         

                         

                         I

          46/48.

          INSE.BT - THE BRIEFCASE
          As Croce picks up a packet of hundreds cut of the
          suitcase, riffles it:

                         TONY'S VOICE
          It's all there.

          HACK TO SCENE
          As Santino grins:

                         SANT=O
          Straight frcm the East Coast
          Distributor.
          Tony looks over at Croce:

                         CROCZ
          We're in business.

                         TONY
          I' I L see you in Florida..

          EXT. WASHINGTON STREET - DAY


                         0
          As Tony gets out of the limousine and crosses to a
          phone booth. We are moving in as he puts a. dime into
          the slot. and dials.

                         TONY
          Hiss Duane, please.

                         CUT TO:

          =T. LOBBY (ENNEDY CENTER) - NIGHT
          We are shooting past the big, nubby sculptured head of
          Sohn Kennedy which identifies, but in no way dominates
          the long, high handsome lobby with its crimson carpets.
          The Opera is breaking for intermission and the audience
          is moving toward the bars and the terrace.
          We move to the bar where, in the crush, we find Tony.
          As everybody seems to be ordering at once---
          TONY . i
          Scotch-rocks, here. Two. Aadame,
          I believe those were

                         (THEN)
          Scotch-rocks. Sere. Two.

                         (CODITINUED )

                         

                         

                         

                         

          49.

                         CONTINUED
          Tony turns to watch a woman move off with two drinks-
          that were clearly his. 'Now, a. braided Admiral usurps
          Tony's place.

                         TONY
          I believe I was next, Sir.
          The Admiral throws a look in Tony's direction, then
          pays for the bourbon and coke and exits.

                         TONY
          Scotch-rocks. Here. Two..
          EXT? TEE TERRACE ( iYNEL'Y CENT~'R) - NZGET
          On the broad, impressive terrace, overlooking the curve
          of the river- as it mirrors the lights of Washington, we
          find Elizabeth, dressed for evening. She is surrounded
          by three urbane men, Morton, her Television Producer,
          Swartzwalder, a Jurist and McKissick, a young Senator.
          McKissick is lighting Elizabeth's cigarette as Tony
          comes up with the two drinks.

                         TONY
          Finally.

                         ELIZABETH
          Thank God for 'the Navy.
          As she takes her drink:

                         ELIZABETH
          Mister Adams,_ I'd like you to meet
          Judge Swartzwalder -- Senator
          McXissick and Jack Morton from
          wham all blessings flow ---

                         MORTON
          As long as the ratings hold up.
          As Tony is shaking hands around with the Judge, McKissick
          turns to Elizabeth:

                         MC EISSICK
          Burning the midnight oil at State?

                         ELIZABETH

                         (SMILES)
          I wouldn't know, s°qve broken off
          relations with the State Decar ..went .

                         

                         

                         

                         

                         I

          50.

                         ANOTHER ANGLE
          Tony has heard this last.

                         SWARTZWALDER
          You stationed in Washington?

                         TONY
          On temporary assignment.

                         ELIZABETH
          He's doing a background on Azmando
          Vidal.

                         MC RISSIC
          A fine. man. And thank God we're
          back on speaking terms -- or are
          we?

                         MORTON
          If you don' t know, who?
          A buzzer sounds from inside. Elizabeth hands Tony
          her glass.

                         ELIZABETH
          Would you be an angel and
          Tony takes the glass from her, crosses and puts it
          on a nearby bench. As he does so, Elizabeth is Look-
          ing after him, as to the others:

                         ELIZABETH
          I've always had a =ad thing for
          sailors- They've got such neat
          little asses.
          As Tony returns and escorts her back into the theater:

          CG", TO:
          EXT. R"" ?MY CE:TTm..,q DRIVEWAY - NZGHT
          We are on the line of cars coming up to pick op their
          passengers outside the Eall of States. Mi..ch honking
          of Eioras, so on.

          ON TON`I AND ELIZABETH
          as they stand among the azagn= _coes , 1 oaki i for a
          tax:.. Elizabeth spats -ne f_--st.
          (CON°^Z `i?D ?

                         

                         

                         

                         

                         7

          51.

                         CONTMM=

                         ELIZASET:

                         THERE'S ONE
          Tony hurries for the taxicab.

                         ANOTSEB ANGLE
          as. an Airforce General intercepts the cab.

                         GENERAL
          Believe this is mine.

          .ON ELIZABETE
          As she watches Tony get outranked for his cab, then
          she spots another.

                         ELIZABETH
          f f ere comes another.

          ANOTHER ANGT.E
          as. Tony hurries for the. next cab, only to get beaten
          cut by a. State Department type with two oil Sheiks
          in tow.
          STATE DEPART.'MiT TYPE
          You're next, Ensign.
          Tony turns, signals: to Elizabeth to wait, then exits.

          CLOSE - ELIZABETE
          as she looks after him., puzzled.

          MT. . PARE ING AREA - NIGHT
          as Tony moves into. the Eennedy Center Parking area,
          looking around.

          ON ELZZABET:E
          She is pacing. She stops, looks at her watch, then
          turns at an insistent bbonking from -.edam: veway. We
          pull back and pan to include Tony s it ti_ng in a big,
          beautiful, official-looking sedans. ae leans across
          and opens the front door. She crosses and gets in.

                         

                         

                         

                         

          52/53.
          ON HALL OF STATES DOORWAY (Ba"?° CENTER) - NIGHT
          as an Admiral and his wife come out toward the drive-
          way with their guests. All are chuckling merrily at
          • something the old sea dog has said. Now his wife's
          expression alters as she looks of.

          ADMIRAL' S WIFE
          Charles: Isn't that our car?
          He looks off. His expression changes. He starts to
          run..
          ANOTHER ANGLEZ=n=Y• CENTER DRIVEWAY
          As Tony drives off, the Admiral runs vainly after
          his car, waving his hand..

                         ADMIRAL
          Stop: Stop!

          INT. THE ADMI-RAL'S CAR_- NIGHT

          Elizabeth has been looking back. Now she regards Tony
          with. new and approving eyes..

                         ELIZABETH
          You have interesting talents, Mister
          Adams ..

                         TONY
          You'll be astonished.

                         ELIZABETH
          i I'm looking forward to it.

                         CUT TO:

          INT. ELIZABETH' S BEDROOM - NIGHT

          Tony and Elizabeth are in bed together making love.

                         ANOTEER ANGLE
          featuring Elizabeth as

                         ELIZABETH
          Now. Now. Yes. Now

          = 14. TO :

                         

                         

                         

                         

                         Y

          54.

          INT. BEDROOM (ELIZABETH'S APT.) - NIGHT

          Tony and Elizabeth are sitting up in bed. Tony is
          lighting her cigarette. Suddenly he pauses.

                         ELIZABETE
          What's wrong?

                         TONY
          Shh.

           L E IZAB£TE
          What's the ?.»..
          She breaks off because Tony has put his hand over her
          mouth. Her eyes blaze as she reaches for his wrist.

                         TONY -
          Noise in the living- room.

                         (THEN)
          There it is again.
          This time we too have heard something. He uncovers
          her mouth.

                         ELIZABETH
          There's a window in the front room.
          It rattles.

                         TONY
          I don't know.

                         ELIZASETB
          Well, I do.

                         ANOTHER ANGLE
          as Elizabeth swings out of bed, naked, crossing out
          of the bedroom and into the hall.

          INT. EALLWAY (ELIZASETS' S APT.) - NIGHT

           We move with Elizabeth as she goes down the hallway.
          SST. LIVING ROOM (EZIZABE:'B' S APT.) - "r-GET
          As Elizabeth comes into the darkened ? :.ring room, suddenly
          a Man from out of "-,-.e shadows, clamps one hand over her
          .mouth and with the other hand he holds a :cr? a to her
          .mroa t.

                         0

                         

                         

                         

                         

                         I

          55.

                         ANOTHER ANGLE
          As a. Second Man appears. This one has a gun. The First
          Man turns to him, speaking in a whisper.

                         FIRST MAIM
          Bring him down here.
          The- Second. Max. nods. We follow him as very softly he
          goes down the hallway.

                         P

          CLOSE - THE SECOND MAN
          as he pauses. outside the bedroom door which is half
          P ajar. Now suddenly he moves ---
          IDi'i:. BEDRCOM - NIGHT
          0 We are angled on the hall. door as, in a single move
          the Second Man kicks the door open and flicks the bedroom
          light on. He has his gun pointed at the bed. We whip
          pan to the bed.. It's empty.

          CLOSE: - THE SECOND MAN
          As for a frozen moment., surprise and consternation

                         0
          show an his face. At this point:

                         TONY

                         (VERY SOFTLY)
          Don't make a noise. Just stand
          where you are or I'll kill you.
          We have pulled back and panned slightly to include Tony
          who is sitting on the. floor, his back against the wall.

                         I
          The gun in. his hand is, trained on the Man who has just
          -come in.

                         ANOTHER ANGLE
          As: the Second Man stands motionless, Tony is on. his
          feet and up to him. Tony takes the qua from him and
          tossing it an the bed gestures, forefinger to lips,
          be quiet.
          Now, Tony turns to the Man and. starts down the hall
          with him. Tony has him by the back of the jacket, his
          Tun pressed against the back of the Second Man's head.

                         

                         

                         

                         

                         I

          56.

          INT. LIV -MIG ROOM (Z LIZABETH' S APT .) - NIGHT

          As Tony enters with his prisoner, the First Man - the
          one with the knife -- still has his weapon pressing
          into Elizabeth's throat. He is looking the other way,
          but turns as: _

                         TONY'S VOICE
          Turn very easy.
          The First Man turns, sees Tony and the Other Man.

                         TONY
          S've got a. gun at your partner's
          head.
          There is 'a beat, then:

                         FIRST MAN
          Drop the gun or I slit her throat.

                         TONY
          You drop the knife.

                         FIRST MAN
          Don't you think I'll kill her?

                         0 TONY
          Z don't give a shit if you kill
          her. I_ said crop the knife.

          ANOTRER ANGLE.
          As the Second Man -- overconfident at finding hi=self
          still alive -- speaks to his partner.

                         SECOND MAN
          Cut her a little bit to convince
          this asshcle. Open up her throat.
          Suddenly the Second Man's knees buckle as. Tony, in a
          lightning move, brings the gun barrel down across the
          Man's head.
          As the Second Man is on his hands and knees, Zi.?ce a
          stunned ox, dripping blood on the carpet, Tony tuns
          once more to the an with the knife.

                         TONY
          I'm going to give you one more
          chance. Trot the k:i-`f:a and !'I--l
          let you go .

                         

                         

                         

                         

                         -T

          57.

                         CONTINTED: -

                          FIRST MAN

                         (LAUGHS)
          7. 111 cut her fucking head off,
          first.

                         ANOTSER ANGLE
          as the Second Man, still on his hands and knees, now
          pushes himself up to a kneeling position.

                         SECOND MAN
          Show him a little blood.
          As the Man with the- knife nods grimly, Tony puts his
          gun to the back of the kneeling man's head and calmly
          pulls the trigger.

                         ANOTBER ANGLE
          The. kneeling man plunges for+qard, dead, the back of -
          his head blown off.. Tony now turns, without emotion,
          to the man with the knife.

                         TONY
          That's what you're going to look
          like in two seconds if you harm
          that girl..

                         ANOTRER ANGLE
          As the First Man drops his knife and releases Elizabeth,
          she is locking at Tony, stunned, her aplomb totally
          vanished.

                         TONY
          Go get dressed and call the Police.
          Elizabeth nods and half-stumbles out of the room. Tony
          crosses to the Man.

                         (CONTIJ UED )

                         

                         

                         

                         

                         I

                         I

          58.

                         (CODITINT1EI1)

                         FIRST XUX
          when the cops come, you're going
          to be up on a murder rap, baby.

                         TONY
          No, I'm not.
          Tony lifts the gun and blasts the First Man three
          times in the chest.

                         ANOTHER ANGLE
          as Elizabeth bursts bac k into the room, looks, then
          starts to scream. Tony crosses up and taking her in
          his arms, softly comforts her.

                         TONY
          It's all right.. He tried to get
          my gum away and I had to shoot him.
          As. Tony, still comforting her, moves her away, We --

                         DISSOLVE TOT

          EXT. EVERGLADES BAY - DAY

          We are in a swampy backwater on the West Coast of
          Florida, south at Fort Meyer. Hacked out of the
          everglades is a clearing on which a tourist acccmmo-
          dation has been built. It is called Tarpon Lodge
          and signs announce "Cold Beer", "Boat Rentals" and
          "Tourist Accoammdations". There is a small pier.

                         ANOTHER ANGLE _
          as a small, commercial fishing boat comes chugging
          up the bayou and blows its whistle.

          CLOSER - THE FISH=TG 3O
          as a Latin American, Roberto Barcenas, about 3S,
          comes out to the bow of the boat, locks off toward
          the tourist cabins. Roberta is a capable, cool,
          izstelligent man.

          E<'CT. T9Y T RPON :IODGZ - DAY
          as Umberto Croce, dressed in white and weariri a straw
          hat against the sun,. ccmes out or she ramshackle lodge
          and moves to the of

                         0

                         

                         

                         

                         

                         59A

                         THE PIER
          Crewmen of the fishing boat are tieing up as Roberto
          Barcenas comes down onto the pier followed by half a
          dozen others. They are radio, Borracho, Padre Pepe,'
          El Fararon, Arrigo and Roberto's younger brother, Jorge.
          Croce and Roberto embrace, then :

                         CROCE
          This way.
          M=. As AIRSTRIP - DAY
          A rough airstrip has been bulldozed out of the scrib
          pine and palmetto forest. At one end is a hanger and
          on its side the words: INTER-CAR EAN CBAMTERS.
          As Croce and the Latin Americans come up:

          CROCE.

                         TONY:

                         1 0 ANOTHER ANGLE
          1 Inside-the hanger we see a D.C. Six. Tony comes out
          cleaning cosmolene off his hands with a rag.

                         CROCE
          Tony, I' d :like you to meet Doctor
          Barcenas.

                         TONY
          I'm pleased to meet you, Doctor.
          They shake hands, then:.

                         ROBERRTO
          Permit me to present my companions --
          Indio, Borracho, Padre Pepe, El
          Fararon, Arrigo and my brother, Jorge.
          The members of the assault team -- the Indian, the
          Drunk, the Priest, the Pharoah, Arrigo who looks
          like a pimp, and the romantic, fire-eating younger
          brother -- are introduced to Tony individually. Xd
          lib greetings, then to Barcenas:

                         TONY
          Are you read-!?

                         (CCINNTINUED 11

                         

                         

                         

                         

                         60-

          CONT=T Z :

                         ROBERTO
          For many years.
          As they move into the hanger:

                         ROBERTO
          Z understand from Senor Croca that
          the idea with the diving suit did
          not work out.

                         I TONY
          There were a few problems.

                         ROBERTO
          But the other is. fine?

                         TONY
          Yes. ,

                         (THEN)

                         D
          You've been in. touch with your
          underground?

                         ROBERTO

                         (NODS)
          Everything is arranged.

          INT THE EA2GAR - DAY
          At one side of the hangar we see crates of various
          weapons. Boxes of hand grenades, mortars and mortar
          shells. Sub-machine guns caked in cosmolene, LAW
          rockets, field radios. So on. As Tony and Roberto
          come in, followed by the others:

                         TONY
          we got you BARS and Carbines
          plus ten, thirty calibre light
          machine guns, and ten, four-point
          t o inch mortars with a thousand
          rounds of high explosive and a
          thousand, rounds of white phosphorous.
          Tony is pointing out the various boxes and crates:

                         JORGE
          What about sub-machine guns?

                         TONY -
          Thirty or; them. Tarty-five calibre
          nine millimeter. With ten thousand
          rounds.
          (po.ints
          Over there.

          (CONTT-.?_-D )

                         

                         

                         

                         

          61.

                         CONT=TISW
          As Jorge crosses. over to the box of weapons, picks
          one up in his.hands:

                         JORGE
          Ten thousand rounds! Aye de mi!

                         ANOTEER ANGLE
          as. Tony picks up and hands Roberto a long, olive green
          plastic tube.

                         TONY
          We also got you two hundred Sixty-
          six, millimeter, M-72 LAW rockets.
          They're lightweight, one-shot dis-
          posable Bazookas.

                         (THEN)
          There is also field gear, medical
          supplies, tentage, demolition mater-
          ials, combat rations ---
          Arrigo, the. pimp, comes up, takes the LAW rocket from
          Roberto, then:

                         ARRZGO
          We make a little trouble for E:
          Presidents, hey?

                         JORGE
          When do we leave?

                         TONY
          You, go in tomorrow to get it ready.
          I'll meet you down there at the end
          of the week.

                         (THEN)
          Who's your radio man?

                         PEPE
          t am.

                         TONY
          I'll show you the set up.
          As Tony and Padre Pepe move off together toward. a
          short wave radio set up in the corner, ae --

                         CUT TO:

                         

                         

                         

                         

                         K

          62.

          EXT. LAS VEGAS HOTEL (SWT_NZU TG POOL AREA) - DAY.

          We are in a long shot and angled downward on the swim-
          ming pool. area of the Corleone Family's Las Vegas
          flagship hotel. Into the shot, threading their way
          between the girls in their bathing suits, came Sam
          Xaatrocina and Ralph Augusta, walking purposefully.
          We zoom back ---

          INT. TOM HAGE"N' S ©FFICE (LAS VEGAS HOTEL) - DAY

          We are on Rocco La=pone, who is standing by the window,
          looking out onto the pool area. As he turns:

                         LA ONE
          Here they came.
          We are pulling back. Also present in the large, attrac-
          tive office, are Tom Hagen, Danny DeVito and Sant no
          Corleone, who is at the bar fixing drinks.

                         SANTINO
          Half an hour late.

                         DEVIT0
          0 I'm happy he's here at all. That
          means he's willing to talk like
          a reasonable man.

          L.A ONE
          Tell A3..ieri about it.

                         HAG=
          What you've got to understand,
          Rocco, is that what happened to
          Al was business. What's done is
          done. The important thing is to
          avoid trouble if we can. This
          isn't the old days. The Five
          'Family wars are over and done with.

                         LAMPONE
          Maybe that's too bad.

                         ANOTEEQ ANGLE
          as Santino comes over with the drinks io± Hagen and
          DeVito. As Danny accepts his-drink and raises it to

                         HAGEN:

                         

                         

                         

                         

          63.

                         CONTINUED:

                         DEVITO
          To you -- all of you. I'll never
          be able to thank you for what you
          done.
          As DeVito drinks, the intercom is buzzing. Hagen
          flicks it.

                         SECRETARY'S VOICE
          Mister Maatxocina and Mister Augusto
          are here, Sir.

                         HAGEN
          Send them in, please.

                         ANOTHER ANGLE
          Eagen comes around his desk to the door to greet
          Maatrocina as he enters, followed closely by the
          cold-looking Ralph Augusto.

                         EAGEN
          Sam. Z'm glad. you could come.
          Ralph. Good to see you. What
          are you drinking?

                         MAATROCINA
          Z'11 take a little bourbon with
          you._ On the rocks.
          Sagem nods at Santino who crosses to the bar to make
          the drink, as Hagen turns now to Augusto.

                         HAGEN
          Ralph?

                         AUGCSTO
          Nothing.

                         HAGEN
          Coke?

                         AUGUSTO
          Nothing.

                         ANOTHER AN=
          As Santin_o is making the drink; Kaat--ocL-za tax.,.--ns to
          Danny DeVito, shaking hands.

                         

                         

                         

                         

                         I

          .7

          64.

                         CONTINUED:

                         MAATRCCI4A
          Let. me save us all some time. I
          know the Hagen-Corleone Family got
          Danny sprung and I'm glad about it ---

                         (TO DEVITO)
          From the heart.

                         DEVII'O
          Thank you, Sam. I :snow you mean
          it.

                         MAATROCI A

                         (TO HAGEN)
          I also know how you swung it --
          (grins, holds

                         UP HAND)
          Yeah. That's right. You aren't
          the only ones with a friend or
          two in. Washington. Don't under-
          estimate me.

                         RAG=
          We never have, Sam.

                         MAATRCCINA
          One more thing I know -- Danny' s

          ? •
          a big hero to the rank-and-file.
          He'll leave Pat Brady for dead in
          this Special Election that's coming
          up. Okay? So that leaves one thing
          to talk about.

                         HAGZN
          How we can all accommodate to this
          new situation.

                         MAATRCCINA

                         (GRINS)
          I don't know from accc=cdate --
          just so we all get a chance to
          drink from the well.

                         HAGS
          Problem being, for the vast five
          years the well dried up an us as
          you might say. It was almost as
          it Pat Brady had scmethin q against
          us personally.

                         TROCT A
          T=, Tcm. "cu should have come
          to me.

                         E (CONTINCZD )

                         

                         

                         

                         

          65.

                         CONTZY[JZD :

                         EAGEN
          You don't like to bother your
          friends about these matters.

                         MAATROCINA
          What are friends for?

                         HAGW
          In any case, the problem is solved
          ar will be soon. Tice well is ao
          longer dry.

                         MAATROCINA
          And. everybody gets to fill his
          bucket. That's as it should be.
          After us.

                         MAATRCCINA
          Meaning what exactly?

                         SAG=
          We need a loan of fifty million
          dollars. That has to be the first
          order of business ---

                         MAATROCI A
          The Atlantic City Hotel?

                         BABY
          1 That's right.

                         I

                         ANOTHER ANGLZ
          As Maatrocina makes flat paddles of his hands and points
          them inward to his chest as he turns to DeVito.

                         MAATROCLVA
          And what am t? An orphan? I want
          to get in on Atlantic City, too.

          DEV.ITO
          You had it all your way for the
          last five years, Sam. Let some-
          body else do business.

          . AATROCINA
          I got ao cb7ectio:t - ,a him doing
           business --- i just want to do
          business, too. Kay?
          What Hagen gets, :: get.

                         LI

                         (CCNTI'TCZD )

                         

                         

                         

                         

          66.

                         CONTINITE

                         DEVITO
          Are you talking about a hundred
          million dollars?

                         MAATRCCINA
          If fifty and fifty still make a
          hundred, that's what I'm talking
          about.

                         DEVITO
          it can't be done.

                         MAAT;RCCINA
          Don`t you think I know how much
          money you-'ve got in that goddamned
          pension fund?

                         HAGEN
          He's also got the Department of
          Labor, a board of trustees and
          the SEC looking down his throat.
          He can't make that large a commit-
          ment.

                         DEVITO
          We` can't put more than twenty-five
          percent of our assets into real

                         ESTATE ---

                         MAATROCINA
          Is that what you brought me across
          the country for?' To tell me I'm
          getting frozen out?

                         DEVITO
          Nobody's freezing anybody.

                         MAATROCIXA
          Then why do I feel these chilly
          winds nipping at my ass?

                         DEVITO
          You should felt the winds in that
          joint. It was Sagen that got me
          out.

                         (THEN)
          He comes first. You come second.
          That's the way it is.

                         MAATH(3CINA
          How f2ar second?

                         (CONTSNC?? )

                         

                         

                         

                         

                         I E

                         Y

          67.

                         CQNTNMED

           DEVI.TO
          Youtve gat to. give me a year.

                         MAATRCCIYA
          We both want tc get into Atlantic
          City. L can't give them a year's
          head. start.

                         ANOTEM ANGLE
          asDe Vito looks over at Tom Eagen.

                         DEVITO
          Tom? What if-you people take thirty
          and Sam takes twenty?

                         SAG=
          T have to have fifty. That`s defw
          I inite.. You knew it and agreed to
          it in front.

                         DEV2TO
          Sc that we can all part friends ---
          what about forty-ten?

                         MAATROCIA
          I •st. not taking any ten. What
          they get. I get. That is final.

                         DEVITO
          I All. right, All right. Maybe Z
           can work something out. Z' 11
          try`.
          MAATROC n A
          Try hard.

                         ANOTSER ANGLZ
          Maatrocina glances over at Augusta. They rise.

                         EAG=
          Thank you for coming, Dan Haatro--
          cina. I'm. sure we can find a way
          to live toget?..er in peace.

                         4AATRCCI A
          Of course, my old friend,
           :Kaatrcc: na ad ? i s aoodbves a; ound and and Angus Lo

                          LEAVED HE
          49 (C Ci TIN-G"ED )

                         

                         

                         

                         

          68.

                         CON'I INIIED :

                         HAGE??1

                         (TO LAMPONE)
          You might have to go one on one
          with Augusto before we're finished.

                         LAN ONE
          I'm counting on it.

                         ANOTHER ANGLE
          As Lampone crosses to the window to watch Augusto
          and Maatrocina leave through the pool area below,
          Hagen moves to Danny DeVito.

                         HAGEN
          I'd like to put a couple of people
          with you,, Danny.

                         DEVITO
          I don't need your people, Tom.

                         (THEN)
          Sam and I go back a long ways. He
          knows I' U. come up with something
          for him.

                         SANTINO

                         (TO DANNY)
          Why-don't you stick around? See
          the show? Maybe the tooth fairy
          will. stuff one of those long-legged.
          blondes under your pillow.

                         DEVITO
          I'll take a rain check.

                         ANOTHER ANGLE
          As Devito grins, shakes hands around and exits, Hagen
          turns to Santino.

                         HAGEN
          I want you to fly to Washington
          tonight. Talk to Senator Geary.
          Anybody you have to. But find out
          who ' s on Maatrocina' s payroll.

                         (THEN)
          I don't like him knowing about Tony.

                         UUMPCNE
          Neither do 1.

                         (CONTINMED )

                         

                         

                         

                         

                         I

          69.

                         CCNTI2MED :

                         SANTINO
          You think he's walking into some-
          thinq down there?

                         BALM
          That's what I want you to find out.
          As Santino turns and goes, we ---

                         CUT TO:

          EXZ. LATIX-AMERICAN CI"-''T- NIGHT
          We are as the sky over the bay of Armando Vidal's
          Capitol. It is the Fifth Anniversary of his revol-
          ution and a fireworks display is taking place over
          the bay. Sky rockets are arching upward over the
          waters, exploding in cascades of red, green and gold.

                         CUT TO:

          A STET COMM - NIGHT
          We are on an impromptu exhibition on a street corner.
          A big, marvelous looking black Girl, in almost nothing,
          circles with a lean Latino almost touching but not quite,
          in a hip grinding mambo .

                         E

                         ANOTSER ANGLE
          as the crowd laughs and cheers them on.

                         A WOMAN
          Aqua!
          We move up to a loud speaker attached to an ornate and
          bunting festooned.lampost. From the speaker we hear:

                         VIDAL'S VOL=
          But make no mistake, any of you --
          you of the North American delega-
          tion in particular. We welcome
          you to our Count=y -- but the days
          of exploitation are over.

                         CUT" TO:

          INT. BALL.COM - ECTE'L OE 3ZVOLCCICN - NIGHT'

          Ei Presi.dente, A.. andc V .dal, is speaking to a crowd
          j?n the Grand 3allrcomm. The usual tel.evisicn cameras,
          so,or_.

                         0

                         

                         

                         

                         

                         T

          7Q.

                         ANOTHER ANGLE
          As the predominantly Latin American crowd cheers, we
          pick out Tony, in civilian clothes, amongst. the' cheer-
          ing people.

                         VIRAL
          The blood of our martyrs is still
          too fresh on our pavement. We will
          be friends and neighbors, but never
          again slaves to Imperialism.

                         1
          A young Latino, and we will recognize him as Ar=igo,
          has moved to Tony's side. As the cheers have erupted
          once more, Amigo, with'a brief nod of the head, indic-
          ates that Tony shouldfol:ow him.

          ANOT'_R ANGLE
          As Arrigo goes, Tony looks after him briefly, then over
          to the American Delegation including Lucas, the Assistant
          Secretary of State for Latin American Affairs; Cariock,
          the Speaker of the House and several prominent liberal
          Senators including McKissick of Utah and the elegant
          Harthaiemew of Pennsylvania.
          Also in the delegation, in uniform, is Stu Palmateer.
          The older man's eyes meet with Tony's for a brief
          moment. He has seen the exchange. He watches as
          Tony leaves, then looks over toward u_agudo, Vidal's

                         I
          Chief of Police. The harsh, heavy Policeman is laugh-
          1 ing at something Vidal has said, seemingly paying no
          attention to Tony's exit.

          INT. LOBBY ECT!L ME LA REVOLUTION - :TIGHT

          As Tony is crossing the lobby, Elizabeth Ann Dunne,
          followed by 'a small entourage, enters. Half a pace
          behind her is Kenny Morton, her producer and behind
          them are three men carrying portable TV equipment,
          cameras, tripods, batteries, so on. She is speaking
          to 8e.ny..

                         ELIZABETH
          With all due respect, that's bull-
          shit.. The man's obviously inccm-
          petent or the car wcu: d have been

                         WAITING EXACTLY
          She breaks cf-f . seei nc Tony.

                         

                         

                         

                         

          71.

                         TONY
          Hi.
          She stops. Ee crosses up:

                         TONY
          r thought you weren't coming down
          here.
          She doesn't answer.

                         TONY
          Aren't we speaking?

          E L IZA3ET3
          I. don't 'know.
          Elizabeth crosses off toward the desk, Tony looks
          after her for a moment, puzzled, then crosses out.

          EXT. HOTEL DE LA REVOLCCION - NIGHT

          As Tony comes out of the hotel and crosses the garden
          grounds toward the street, we see that a fireworks
          display is still in progress.

          EXT. AVENIDA DE LA R VOLUCION - NIGHT

          Throngs of Merry-makers are moving along the wide,
          palm-lined avenue that follows the curve of the bay.
          Many are in cost=e. Some play instruments. Others
          have bongo drums. The atmosphere is frenetic. There
          is. a pervasive beat. The whole city seems to throb
          with it.
          We are on Arrigo who stands outside the flaw of traffic,
          cigarette in his mouth, patting his pockets for a match.
          Tony comes- up and lights ArrigoIs cigarette. As he

                         DOES SO

          A.RASGO
          Se sure you have your identification.
          Maguda's pescadores -- you understand?
          The Police -- they're out f.sh..:g
          tonight.
          We are on aaather c=oup Of G-4-is azd :Men i a ?rildly
          sexual dance.

                         

                         

                         

                         

                         I

          72.

                         CONTINT :
          We pan off them to Arrigo passing. Tony follows. We
          move. in closer to Tony. A big, fat Mama comes out of
          a doorway, shakes everything she has at him. Tony grins,
          pats her on the ass and -passes by

          EXT. EARBOR AREA - NIGHT

          We are on the part of the harbor where the big fishing,
          boats tie up. The sound of the celebration is fainter
          now, a couple of blocks distant. We are on an old,
          but seaworthy fishing boat; the name on the stern is
          "Stelia.Maria". Amigo comes up the pier, pauses by
          the short gangway.

                         ANOTEER ANGLE

                         I
          As Tony comes up and joins Axr-igo a Third man suddenly
          appears out of the darkness. As he comes up to Tony,
          we recognize Jorge Baxcenas.

                         JORGE

                         (SHAKING HANDS)
          You bring it?

                         TONY
          Yes.

                         JORGE
          This way.
          Jorge leads them aboard.

          INT. MAIN CABIN (STELLA MAR15) - NIGHT

          In the cabin are Roberto Barcenas, Indio, ?araron,
          Padre Pepe'and Borracho. There is also a girl, Angelica,
          25. She has dark eyes and blonde hair. There is some-
          thing faintly flashy but also enormously sensual about
          her..
          One Man is cleaning a stripped automatic rifle. Another
          is loading a banana. clip with 30 calibre bullets. Barcenas,
          the girl and some others are bent over a map on the mess
          table. All. cook up as Tony, Jorge and Arrigo enter.
          T _tobertc smiles warm- , canes up and embraces Tony.
           (C'J1?I'r ?`ILT'ED i

                         

                         

                         

                         

                         T

          - 73.

                         CONTLIUM

                         ROBERTO
          It's good to see you, my friend.

                         (THEN)
          All goes well?

                         TON

                         (NODS)
          And you?

                         ROBERTO
          Everything is ready. Arms and
          aumm=ition distributed. Now the
          waiting.

                         TONY
          You've set a time?.

                         RCBEATO
          Day after tomorrow. Seven A.M.

                         (CHECKS WATCH)
          Th.zrty-four hours.

                         (THEN)
          This is Aujelica. A great good
          friend of E1 2residente. She knows
          where he keeps his toothbrush-

                         TONY
          Good.

                         ANJELICA
          You have something for me?

                         ANOTHER AN=
          as Tony unbuttons his shirt and strips off a money
          belt which he puts on the table and opens. 'IIe takes
          out some money.

                         TONY
          A hundred thousand pesos, cash.
          Count it, please.
          As Roberto takes up the money and counts it., Tony
          takes out a small vial in a plastic container.

                         TONY
          And this.

                         (THEN)
          It's tasteless and odorless, but
          it has a vet-,,? short period of tox--
          ic.Z. ty --- do you understand? Once
          s opened it must be used within
          twenty-four hours.

                         (CCNTI UZ )

                         I

                         

                         

                         

                         

          74.

                         CONT2NUHD

          ANJ ?ICA
          How soon does it work?

                         TONY
          The symptoms are those of botulisat.
          High fever, nausea, vomiting, and
          death. Within an hour.

                         ROBERTO

                         (TO ANJELICA)
          Hadn't you better be getting back?

                         ANJELICA
          Yes..

                         ANOTHER ANGLE
          As Anjelica turns, her coat swings open slightly and
          we see that she is dressed in a costume.-- a typical
          Tropicana showgirl kind of thing.

          ANJELIC .

                         (TO TONY)
          Good night.

                         TONY
          Thank you.

          ANJ%-"LICA
          For nothing. When we meet again
          things will be better.

                         JORGE
          Be careful of 4agudo's fishermen.
          They're out in force tonight.
          Anjelica nods. Roberto hands her the money, then
          kisses her on. the cheek.

                         0

                         ROBERTO
          Vaya can Dios, Chica.

                         ANOTSMR ALYGZZ
          She staffs the money in the pocket of the overcoat and
          exits. Roberto looks at A..-.rigo who nods and follows
          her out. Vow Roberto looks back at Tony.

                         TONY
          Have the supporting omeratic n.s
          been set urz?
          E (CCNT? iUED)

                         

                         

                         

                         
          .i

          75.

                         CONTINUED :
          Jorge crosses to a map on the table.

                         JORGE

                         (POINTS)
          Sere. Here. Here.
           As Tony crosses and studies the map, we

                          CAT TO:

          A SMALL CITY PARK - NIGET
          The celebration on Vidal's Capitol is still in progress
          in this small city park. Suddenly, a police truck
          appears in one of the streets, blocking it, and a fly-
          ing squad of armed police piles out of the canvas-
          covered rear and. The Sergente blows his whistle.

                         SERGENTE

                         (IN SPANISH)
          identification check. Have your
          cards ready.

          ANOTTER ANGLE -- TE CROWD
          We. see A.-rigo and Anj el ca moving through the crowd.
          They stop, exchange a look and then turn- and Move the
          other. way_ We follow them through the crowd to a small
          alley. Amigo and Anjelica and a couple of others, who
          1 would apparently just as soon not be stopped by the
           police, move down the alley.

          INT. ALLEY - NIGST

          As Anjelica and Arrigo move down the alleyway, con-
          gratulating themselves on their escape from Magudo's

                         I
          men, suddenly three Policemen materialize out of the
          darkness ahead.

                         POLICEMAN

                         (IN SPANISH)
          Not so fast my friends ---

                         (THEN)
          Against the wall, Pockets inside
          out. You too, blondie.

                         CTT TO :

                         171

                         

                         

                         

                         

          76.

          I NT. POLICE STATION - ?SIGHT
          Half a dozen of the disreputable fish that have been
          caught in Magudo's net are sitting on a bench against
          the bare, dirty walls of the bare, high-ceilinged room
          with its one unshaded bulb hanging from the ceiling
          illuminating the suspects and Police alike harshly.
          On one side of the room is a barred "tank" or holding
          cell. There is a fat Capitan behind the desk and
          another Policeman, pockmarked and hawkish, looking,
          working an some papers at another desk. The Sergente
          comes in with Anjelica and Arrigo.

                         ARRIGO

                         (IN SPANISH)
          This is an outrage: I protest:
          SEr A?E

                         (IN SPANISH)
          Sit down!
          The Sergente shoves Arrigo violently toward the bench
          as we ---

                         CQT TO:

          INT'? HOTEL DE LA REVOLTJCICN LOBBY - NIGHT
          We are on the doors of the hotel as Tony enters and
          crosses the large lobby toward the elevators.

                         ANTHER ANGLE
          Over in one corner of the lobby we see Palmateer in
          a group with several others of the United States
          Delegation. Tony raises a hand to Palmateer who nods
          back as Tony continues toward the elevators.

                         I

          ANOTHER ANGLE - BY DESK
          as the Desk Clerk, who has been watching the door,
          signals over to a uniformed Security Officer. The
          Security Officer nods and crosses to intercept Tony.
          As he does so:

                          SECURITY OFFIC^11
           Senor Adams.
           Tony stops. The u::==sing Security Officer crosses
          ."p to m .

                         

                         

                         

                         

                         CONTZT ED:

                         TONY
          Yes?

                         SECURITY CFFICZR
          There is a message for you. A
          friend. of yours wishes to see you
          in the bar.
          tN'I'. HOTEL DE LA REVOLUCION BAR - NIGHT
          The place is reasonably crowded, even at this hour.
          We are on Elizabeth Ann Dunne who is the center of
          a small circle of admirers, including Kenny Morton.
          She looks off, sees Tony entering, then excuses her-
          sel-f and crosses up to Tony who stands in the doorway.

                         CLOSER
          as Elizabeth comas up to him.

                         ELIZABETH
          I want to talk to you.

                         0 TONY
           I take it this definitely means
           we're speaking again.
           She draws him to an empty booth and as they sit:

                         I

                         ELIZABETE
          I'm going to tell you a secret.
          It wouldn't take an awful lot for
          me to get hung up on you. Okay?
          But I've got this problem --

                         (THEN)
          You scare me.

                         TONY
          What are you talking about?

                         ELIZABETH
          You, sport. Adams, Anthony. No
          middle initial.

                         (THEN)
          Are you really with the Navy or
          was that just so much malarkey?

                         TONY
          What makes you thi:Lk --l'm not with
          the Navy?
          1 CO 1 .i...Y U 0

                         

                         

                         

                         

                         I

                         I
          77-i.

                         CONT+MME :

                         ?? ELIZABETE
          Tou know what I heard in Washington?
          That something very interesting might
          happen down here.

                         TOONY
          -Where did you hear that?

                         ELIZABETS
          You wouldn`t know anything about it,
          would 'you?

                         TONY
          I want to know where you heard that.

                         ELIZASETS
          What difference does it make, it's --•-
          Tony is on his feet and moving oui of the bar ae 2.

          INT. EOTEL DE LA REVCLLCION LOBBY - NIT

          The all group of Americans, of which Stu Palmateer
          was part, has broken up. Palmateer is moving toward
          the elevator, but turns as he hears -----

                         TONY'S VOID
          Stu?
          As Tony comes up:

                         PALMATEEE
          Scmething wrong?

                         TONY
          I don' t know.
          They wove casually toward the newsstand. Palmateer
          Picks up a'Spanish language newspaper, idly scans it,

                         THEN :

          PALZ4AT'.'..E?B
          What's the problem?

                         TONY
          Sow many people know about t:-his
          mission?
          oAL.u.ATv-ZR
          Why?

                         

                         

                         

                         

          78.

                         TONY
          Z think there's been a leak in
          Washington.

                         PAT MATER
          There are only five people who have
          any idea what's going on.

                         TONY
          is Artier Grundellius one of them?

          PALMAT,.E'?
          Yes.

                         TONY
          F Then maybe it's not that serious. -

                         PALMSATEER
          I If yon think the mission's compro-
          mised, we'll abort.

                         TONY
          No. I think it's all right.

                         (THEN)
          See- you tomorrow..

                         ANOTAR ANGLE
          Palmateer crosses to the elevator as Tony moves back
          toward the barroom. We move with Tony as a Bellboy
          intercepts him.

                         BELLBOY
          Senor Adams? There is a phone call
          for you. You can take it on the
          house phone if you like.
          Tony nods,.tips the Boy, then crosses to the house-
          phone and picks it up.

                         TONY
          Yes?

          IT. DC RSILE EATING PLACE - NIGHT
          Jorge 3arcenas is at a public phone.

                         JORGE
          You zeccgnize my voice?

                         (THEM
          You better net down here right
          away,

                         0

                         

                         

                         

                         

                         I

                         S

          79.

          ANOTE R ANGLE
          As Tony hangs up and turns, Elizabeth is coming up
          from the bar.

                         £LIZASETH
          What in the hell's going on around
          here?

                         TONY
          Z don't know..
          Tony turns and crosses out of the lobby as we ---

                         CUT TO:
          INT. MAIN CABzX (STELLA MARTS) ,- NIGHT
          we are close on the pack-marked, hawk-faced Policeman
          who was present at the Police Station when Anjelica
          and Arrigo were brought in.

          PCLIC°..MAN
          I'm positive, Doctor 3arcenas. AS
          soon as they found the hundred
          thousand pesos in her pocket, they
          called Colonel Magudo In- T. came
          as soon as I could.

                         0
          We have pulled back to include Roberto Harcenas, Tony,
          Jorge and others, including the Captain of the fishing
          boat, a man named Mezcurio.

                         ROBERTO
          She's being questioned now?
          The Policeman nods. tRoberto turns to Tony.

                         ROBERTO
          You have to 'assume they'll be
          tortured. You have to assume
          that they'll talk.

                         TONY
          Then you're going to have to move
          right now.

          ANOTL ANGL.
          Jorge start: passing out weapons and hand grenades.
          Tony takes a su.?:-.machine gun and checks it, as we ---

                         

                         

                         

                         

          82.

          EXT. JAIL OUSE ROOF - NIGHT

          A couple of Policemen are on duty on the jailhcuse
          roof. Their names are Lopez and Ruiz. Lopez. is look-
          ing into the square.

          LOPEZ'S ANGLE (THE SQUARE)
          We are shooting down into the square from behind Lopez
          as Ruiz comes up and joins him. We see the two cars
          stop outside the jail, and the occupants pile out. Tony,
          Jorge, Roberto and Indio from the first car, Sorracho,
          Pepe and 'araron from the second. They are all armed,
          carrying LAW rockets and slung-sub-machine guns.

                         ON RTXIZ
          as he jumps up onto the parapet, cocks his own machine
          gun and fires down into the square. As his bullets
          stitch the cobblestones moving up toward the group.

                         ON TONY
          At the sound of Ruiz 's first shot, he's got his sub-
          machine gun in his hands. He fires from the hip.
          Lopez throws up his hands, falling back. Ruiz spins
          on the parapet and falls into the street.

          =T. THE JAII.HOUSE - NIGHT
          Carbajal and Mosca have started firing out of the gun
          ports in the doors.
          i M=. TEE STREET OUTSIDE THE JAIL -- NIGHT
          As Indio gets hit in the forehead and is jolted back-
          wards, his face a bloody mask, Tony, Jorge and Roberto
          have picked up their LAW's and fire almost simultaneously.

          EXT. TEE JAILHOUSE - NIGHT

          as the big, heavy doors blow away ---
          INs°. ITS...-" JAILEOL;S ?II(an
          as - the second door is blown back into the roc=. The
          ceiling is caving in, piaster is falling. The ai= is
          thick with plaster dust and smoke.

          (CCN'I'IVU ?,J )

                         

                         

                         

                         

                         I
          a3.

                         CONT IU :
          Mosca sits against the wall, mouth open. His leg lies
          across the room.
          The Prisoners in the holding cage are screaming. A
          Policeman runs in from another room just in time to
          be cut down by Jorge who bursts through the door
          followed by Tony and Roberto.

          ANOTHER ANGLE. -- BAS = STAIRS
          as Colonel Magudo runs up the basement stairs, pistol

                         IN HAND:

                         ANOTHER ANGLE
          as Jorge, Tony and-Roberto all `ire at the same time,
          blowing Magudo back down the basement stairs. Tony
          and the others dive down the stairs.

          EXT. THE CENTRAL SQUARE - NIGHT

          As the occupants of the first car are attacking the
          Jailhouse, Fa=axon, Pepe, and Borracho launch an
          0 attack on the Presidential Palace.

                         ANCTBER ANGLE
          A.: the sleepy Soldiers come tumbling cut of the build-
          ing in confusion, Barracho is spraying them with his
          automatic weapon while the other two are blowing the
          front doors away with their rockets. Smoke and plaster
          dust is heavy in the square. rives start to lick and
          flicker.

                         ANOTHER ANGLE
          As Borracho, Pepe and.Fararon run into the Presidential
          Palace, somebody has started pealing the bells of the
          Cathedral.

          IY'S. THE PRESZDE. ITIA.L PALACZ - NIGHT
          As Borracho, Pepe and Fararon rjn into the great
          central hall, wit:-1 its cu gyring S--a.=Case, and great
          crystal chan_delie=

                         

                         

                         

                         

          84,

                         CONTIYTJED :

                         BORRACHO
          This way.
          EEO rugs up the stairway, by the others. At the top
          of the stairs he turns, fires at the chaia that holds
          the chandelier anchored. The ceiling chews away.
          The chandelier falls with a crash of crystal.

          CIIT TO,.
          INT. BASMiENT (CI TRAL JAIL) - MIGHT
          There are a dozen doors leading into a dozen basement-
          cells- Tony, Jorge and Roberto are searching them.
          Tony enters a cell, then we hear:

                         TONY'S VOICE
          Roberto!
          As Roberto hurries into the cell

          INT. THE CELL -- YIGET

          There is a gully in the ceiling with a line led. through
          it and tied off. The other end of the line is. tied
          around Arrigo's ankles, suspending hi= upside down.
          Eris hands are tied behind him. His head and shoulders
          are not visible as he is immersed, head-down in a large
          tuh of water.

                         TONY
          Quick!
          Roberto runs in, whips out a knife and as Tony grabs
          the motionless body, the other cuts the line.

                         ANOTEFR ANGLE
          As they gently lay Arrigo's body on the floor, it is
          apparent he is dead.

                         CQT TO:

          INT. THE BASEMENT - YIGET

          As Jorge throws open a door, his face goes white.

                         FORGE
          motheY of God.

                         

                         

                         

                         

          85..

                         CONTINDED:
          We have panned over to the door. We are now shooting
          toward the door with our view mainly blocked out, but
          we can see that Anjelica is dead, naked and tied spread-
          eagled, face down on a table.

                         CUT TO:

          ZNT. PRESIDENTIAL PALACE - NIGHT
          As Bcrracho Fararon and Pepe are rune og down an
          upstairs hallway, throwing open doors, a Soldier
          appears, fires, 3orracho spins and falls.

                         ANOTHER ANGLE
          As E'ararcn cuts the Soldier down, then turns to Pepe:

                         R ARARON

                         (IN SPANISH)
          Let's get out of here!
          They turn and ran.

          • CUT TO:

                         S

          EXT. THE JAILHOUSE AND SQUARE - NIGHT

          A couple of Police vehicles and an Army truck careen
          into the square, skid to a stop and the Soldiers and
          Policemen pile out and take cover an the square. The
          bodies of Ruiz and Indio are still, where they fall.
          At a command from the Officer in charge, the Soldiers
          and Policemen start moving forward, from cover to
          cover, laying down a steady rattle of gunfire.

                         CUT TO:

          MM AN ALLEY BEHIND TEE JAIL - :NIGHT
          Jorge, Roberto and Tony are running along an alley
          in the darkness, bent over. One street away, in the
          town square, apparently all hell is breaking loose.
          As they run:

                         FAARARON
          Hey: This way.
          They stop. Fara_-cn and 2epe are standing _ in -: mouth
          of a i?.tp e n,ar-r-cw open -ig between houses

                         0

                         

                         

                         

                         

          86..

                         ANOTHER ANGLE
          as Pepe and Fararon disappear into the darkness follow-
          ed by Tony, Roberto and Jorge.

                         CUT TO:

          E=. ANOTHER STREET - NIGHT
          A Police car with the numerals "22" painted on the
          side is abandoned at an angle in the street, the
          doors open. -

                         ANOTHER ANGLE
          Pepe runs up, looks in. Tony and the others follow.

                         PEPE
          The keys are gone!

                         TONY
          Get in.
          Tony is under the dashboard crossing the wires as
          the others start piling in. The motor starts, Tony
          slides behind the wheel. As they drive off ---

                         CUT TO:

          ZN' . POLICE CAR TAM=-TWO- NIGHT
          Tony is driving. Roberto is next to him in the front
          seat. In the back are Jorge, Pepe and rararon. As
          they come to a crossroads:

                         TONY
          Which way?

                         ROBERTO
          Lett.

                         (THEN)
          When will the plane be coming?

                         PEPE
          Four. They will land at Quebrada.
          Roberto looks at his watch, then

                         ROBERTO
          We can make it. Left again.

                         E

                         

                         

                         

                         

          87.

          EXT. STREET CORNER - NIGHT

          As Police Car, Number 22, comes left around the corner
          a second Police Car is coming the other way. The two
          cars barely miss each other, just kissing as they pass.

          INT. POLICE..: CAR 22 - NIGHT

          The comandeered Police Car swerves as Tony fights
          the wheel, and finally steadies it.
          TONY'S POINT OF Vt!W (THROUGE BACK WINDOW)

                         I
          The other police car has spun and stalled momentarily.
          As the Driver of the car gets it started and straightened
          out in pursuit, we can see the Second Policeman on the
          hand mike. We can hear his excited voice in. Spanish
          coming over the police radio.

                         RADIO VOICE

                         TIN SPANISH)
          We have seen the terrorists. They
          are in Police Car Twenty-too going
          north on Avenue of the Martyrs.

                         (THEN)

                         0
          All units. All units. Terrorists
          seen going north on Avenue of the
          martyrs .
          Now other traffic can be heard on the Police radio as
          other Police cars respond to the message.

                         SEVERAL COTS
          of various Police cars as they get the message. Some
          swing around in Q-turns, Their sirens are winding up
          to a howl.

          INT. POLICE CAR NITMBER 22 -- NIGHT

          We are shooting back through windshield. Pepe is look-
          ing out the back window and we see the following Police
          car. Now another joins it. Now still another. Police-
          men start firing out of their. cars. The back window is
          starred as a hole suddenly appears in it, only - inches
          from Padre ?ece' s head. He looks at the ho l e and tu.-ns,
          crossing 7:.msei f .

                         PEPS
          .;esus, Mary and Joseph.
          (CO N'IiNII..D )

                         

                         

                         

                         

                         K

                         AS
          `Cis;n7VED %
          As another bullet hits the car somewhere with a clang.
          of metal, Roberto turns td Tony:

                         GO-III E1 0
          Go right on Quebrada.

          THE I BSPE= C3ASE
          as the pursuing Police cars increase their numbers and
          come closer, the-Comandeered car, carrying Tony and the
          others, makes a right- turn on Avenue Quebrada, leading
          out of town. Parked at the curb is a non-descript sedan.

                         FEATURSITG TONY
          As he drives, we see that Roberto has taken from his
          knapsack-pouch a mall detonating device such as we
          have seen in the demonstration of the doomsday car at
          Camp Peary.

          EXT. TEE STREET CORNER - NZGRT


                         0
          As the first of the pursuing Police cars starts around
          the corner, suddenly the non-descript sedan parked at
          the curb detonates -- disintigrates in a blinding dazzle
          of light. In the jolt of the shock wave, every window
          within half a mile radius shatters.
          A vast ball of flame and black, heavy smoke billows
          upwards from the corner where the doomsday vehicle and
          the first car were immolated.
          Now, the following Police cars, unable to stop and
          unable to avoid the flames which have spread like
          napalm all over the whole corner, skid into the flames
          and smoke, plowing into the wreckage.
          The buildings are in flames. A Policeman, his uniform
          and hair on fire, runs screaming out of the inferno.

                         DISSOLVE TO:

          EX. A COUNTRY ROAD - N1 GET
          as Pc?l.ce Car Nunber 22, tarns down a side road, through
          a gate, then up and over a h-4-11. We pa.-n to the gate,
          over which are the words: "° i.-sca Quebrada".

                         

                         

                         

                         

                         D

          89.
          EXT. AIR FIELD (F=CA. QUEBRADA) - NIGHT a
          The Police Car comes to a stop at the side of what
          is apparently a hacked-out -landing strip in the middle.
          of a small valley. There is a cane field on one side,
          the green stalks are give or six feet high.
          Tony, Jorge, Roberto and Pepe get out of the car.
          Roberto opens the back door and, to Fararon.

                         ROBERTO
          Come on! Do- you want to ---

                         ANOTHER ANGLE
          Roberto has broken off as he looks at-Fararon. We
          realize that sometime during the chase the Pharoah
          has taken a bullet in the chest. Se's dying, and pink
          bubbles of blood fora and break on his lips as he
          stiizggles for breath.

                         ROBERTO
          I'm sorry, old friend.
          Tony crosses and gets back into the car to help
          Fararon.

                         ROBERTO
          I'm afraid he's finished.
          At this point we hear:

                         JORGE'S VOICE
          Here they come:

                         ANOTHER ANGLE
          as Jorge, Pepe and Roberto run to the center of the
          airstrip, looking upward. We can hear the sound of
          an approaching aircraft.

                         ON MOSERTO
          as he points a flashlight at the sky and signals a
          short and a long, the letter Alpha.
          AlNCTRER A0IGL.E - I:IC=1r--NG `"HE OC-6
          The aircraft is now visible. ?rcm the Pilot's coc pit
          we see the answering signal , a long and th--ee shorts,
          the letter Bravo.

                         

                         

                         

                         

                         I

          9Q.

          PULL SECT - THE DC-6
          as it swings in on the final approach and starts to
          settle in for a landing.

          CUT, TO:

          EXT. COUNTRY ROAD - NIGHT

          A couple of jeep loads of Soldiers turn down the
          same side road that we saw Police Car 22 take a few
          minutes earlier. As the jeep loads of Soldiers pass
          through the gate on which are the words, "Finca
          Quebrada", we ---
          Ct?T' TO:

          EXT. THE A2RFT L - NIGHT

          The DC-6 lands and swings around, taxing bark. over
          the rough ground.

          CLOSE - TONY
          He senses something wrong, and pausing by the edge
          of the airfield, calls to the at-tars.

                         0

                         TONY
          Wait a minute.

          ON ROBERTO, PMPE AND JORGE
          as they run for the plane.

          ANOTHER ANGLE - ON THE PLANE
          as the plane swings around again and the door opens.
          We reveal E'rankie Rizzi in the doorway.

          CLOSE ON RRANRI"
          as he looks out.

          ON ROBERTO, P E AND J ORGE
          as they ran toward. the plane.

                         

                         

                         

                         

                         K

                         91-
          £Xfi. RIM OF THE ETT.r.
          as the two jeeps come up to the rim of the hill. One
          jeep turns a powerful spotlight onto the airfield.
          The A.IRPT T-fl - NIGHT'
          as the spotlight catches Roberto, Pepe and Jorge in
          its beam.

          ON THE SOLDIERS
          as they fire.

          ON ROBERTO, PEPE AND JORGE
          as they are chopped down, one after the other.

                         OK TONY
          as he turns and fires at the jeep loads of Soldiers
          with his automatic rifle.

          ANQTEER ANGLE - THE SOLDIERS
          as Tony's fire shatters the spotlight. Some Soldiers
          fall, others fire at the DC-6.

                         THE DC-6
          as it starts to pick up speed, trundling over the.
          rough ground.

                         ON TONY
          as he runs for the plane.
          ON ?RAl?1Z
          as he sees Tony.
          A,NoTr.. ER ANGLZ
          As Tony :'ims up -o the m lane , =rarekie reaches down,
          drags him up and in. Bu-1-let hales are apvearing
          the fuselage of the plane.

                         

                         

                         

                         

          92.

          ON THE SOL4IMRS
          as they fire.

          ON TIM DC-6
          as it rises into the air and banks off to the Nor t. % ---

                         CUT TO:

          E,XT'. VZDAL' S PRES=ZNTIAL PALA - DAY
          Armando Vidal, his face a harsh mask, stands in the
          shattered doorway of the Presidential palace looking
          off toward-the central Square..

                         VIRAL
          The cetinter-revolution has failed.
          The. people once more did not -rise
          up as expected.

          VIRAL' S POV (THE SQUARE AN TAZLHCUSE) - DAY
          In the. battle-torn square, outside the jailhouse, we
          see an angry croard of Rioters care ing the dead and
          bloody body of Roberto Barcenas.

                         VIRAL

                         (COMING OVER)
          And once more the rich are bewilder-
          ed by the fact that the poor are un-
          I willing to die for them.
          Somebody produces a rope, somebody else throws a loop
          around his feet and Roberto is hoisted upside-down to
          the top of an ornate old lam post.

          NLMADT' S VOSCZ.
          And while E1 Presidente was speak-
          ing, in understandable bitterness,
          crowds in the Capitol were running
          rampant, stringing up the bodies of
          the leaders of the failed coup d'etat.

                         QIM CROWZ

                         T
          as the people cheer. Somebody produces an American
          flag and sets it afire. it buns In the street. lids
          kick at it, 5L it on it.

                          "

           ,GCN'?'. .ZD?D )

                          '

                         

                         

                         

                         

          93.

                         CONTIN= :

                         NEWSMAN'S VOICE
          They burned American flags and
          threatened the safety of the Amer-

                         ICAN DELEGATION

          EXT. AIRFIELD - DAY

          Uniformed Soldiers with fixed bayonets are guarding
          the airfield. Crowds are gathered outside the gates.

                         ANOTHER ANGLE
          as limousines 4ith soldiers on the ;enders and roofs
          come inching through the fist-waving, threatening mob.
          As the gates are opened, Soldiers with bayoneted
          rifles force the crowds back so that the cars can get
          through,

          NEWSMA2X' S VOICZ
          -- who were taken to the airfield
          under military escort.
          We see the limousines stop by a waiting transport

                         I
          plane, and -- with the guns of the Soldiers holding
          off the angry mob -- the American Delegation to Vidal's
          celebration hurries into the big airliner..

                         CLOSER
          In the American group we see Elizabeth Ann Dunne; the
          Senators McKissick and Barthalemew; Assistant Secretary
          Lucas; Speaker of the House, Cari.ock; and Stu Palmateer,
          looking cooler than he could possibly feel.- Over this

                         WE HEAR:

          NEWSMAN' S VOICE

                         (CONTINUING)
          Soldiers with bayonets were forced
          to fight off the angry mob which
          clearly blamed the United States
          for the abortive coup.

                         CUT TO:

          !XT. THE STATED PART!-= -- DA ;
          As Are r? z?de::.ias comes out of the State Department
          and crosses to his car, he is surrounded by Newsmen.

                         

                         

                         

                         

                         I

          94.

          CON'' I?7IIED

          NEWSMAN 'S VOICE
          Meanwhile in the Nation's Capitol,
          Arne Grvndellius, the Secretary of
          State, was beseiged by Newsmen as
          he left the State Depa.tent after
          an all-night session.
          CLOSER - ON GRt'YDELL tD S
          as the Newsmen are thrusting microphones into his face,
          asking questions.

          FI3ST NEWSMAN
          Mister Secretary! Mister Secretary!
          Can we get a statement?
          Grnndelli.us stops.
          GRIINDE,tLZUS
          We will make an official statement
          tomorrow.
          NEWS ii TOGETRER
          What about Vidal's accusations --
          Have you Xosygia's statement? will
          you speak to the United lations?
          Was Doctor Barcenas an 1nerican
          agent?

          GR=EZS.ZC S
          P One at a time. One at a time.

          2ND NEWSMA,
          According to world opinion, the
          CIA was behind this.

          - GRONDELL."II5
          Gentlemen: Gentlemen: I am late
          for a meeting at the White House
          i but we catagorically deny these
          allegations. The United States
          Government does not use assassin-
          ation as an iastrent of foreign
          policy.

                         ANOTHSR ANGLI
          AS G-=de? ii us starts to move throuch --he crowd of
          3eoorters,
          (CCNT_-6 _qT '

                         0

                         

                         

                         

                         

          95.

                         CONTMED :

                         3RD NEWSMAN
          What about the rumors' that the under-
          world was involved in this?

                         GRDNDELLIIIS
          If the underworld was behind it
          and I don't rule out the possibility
          at all -- that fact will be brought
          to light in open hearings before a-
          special committee of Congress. It's
          in the works right this minute, and
          subpoenas will be coming out by the
          weekend.

          INT. ELIZABETH'S AP T - DAY

          Tony, dressed as last we saw him at the airfield, is
          in Elizabeth's apartment watching the news on. her liv-
          ing room television set.

          NEWSCASTER ° S. VOICE
          And now for further reactions to
          today's developments, we take you
          to the United Nations where Sander

                         VANOCUZ --
          Tony has turned off the television set with a remote
          control switch, having heard the. sound of a key in
          the front door. He rises and turns as Elizabeth enters
          carrying a paper bag.

                         TONY
          Listen, I --
          He breaks off as Elizabeth's face goes pale with shock
          and she drops the bag on the floor. A couple of oranges
          roll across the rug as:

                         TONY
          I'm sorry, I had to talk to you
          She looks at him her expression almost readable as anger;

                         ELIZABETH
          You're listed as missing. It's on
          the wire.

                         I

          TONY.
          Well, much as I.hate to disappoint
          everybody.

                         (CONTINCRM )

                         

                         

                         

                         

                         I

          96.

                         CONT=M= :
          Suddenly Elizabeth drops to her knees and starts
          crawling around on the floor, blindly searching for
          the oranges.

          - ON TONY
          as he locks at her for a puzzled moment, then drops
          to his own knees and taking her by the shoulders,
          straightens her. Tears are pouring down her cheeks.

                         TONY
          Bey, wait a minute
          He kisses her.

                         TONY
          What's all. this crying shit?
          She smiles, sniffles, blinks the tears out of her eyes,

                         THEN:

          ELZZ.A$ETE
          You want an egg sandwich?

                         CUT TO

          INT. ELIZABETE' S EITCHEY - NIGHT

          Tony and Elizabeth are in. the kitchen having sandwiches
          and coffee as the kitchen wallphone rings. Elizabeth
          gets up and answers it.

                         ELI ZABETH
          Hello?
           NT. MoREHCIISE's O TI Y (LA.vGLEY) - NI zHT

                         1
          Stu Palmateer is at the desk. Morehouse is with him,
          reading some reports.

                         PAIMATE' R
          This is Captain Pa.TLrnateer. I got
          a message to call this number.

                         ON ELIZABETH
          as she speaks into the phane

          LI2ABE:'H
          Yes. Hold on for a minuts, will
          you.
          + ( ( ( to Tony??yy
           Is your call.

                          (CCNT E0 )

                         

                         

                         

                         

          97.

                         CCNTINCTED
          Tony crosses over and takes the phone.

                         TONY
          Stu?

                         PATWMATEER
          Are you all right?

                         TONY
          Considering.

                         (THEN)
          Some friends of mine flew me up
          to Maryland this morning. I thought
          maybe I'd better not go directly
          home.

                         PALMATEER
          Good.

                         TONY
          How are you?

          PAL?`SATEYR
          Ten kinds of blue hell are breaking
          loose out here. Are you all right
          at that number till tomorrow?

                         TONY
          Fes.

          PAL.MATEER
          Then I' 11 get back to you.
          Click, as Palmateer hangs up. Tony t•.irns to Elizabeth.

                         TONY
          I hope you don't mind a house guest.

                         0 CU TO:

          I INT. AN AUDITQRIGM (NEST ORLEANS) - NIGH

           We are an a cheering audience of Longshoremen.
           ANOi= AN= 1:,i.iii S2EA2=' S ?LA -'- Qi M
           Danny'DeVito is holding up his hands to the crowd.
           Behind him., red, white and blue bunti.^g. ?oste.zs
           reading: 1 OTT YOR i=TO -- Gi V' Tom: CYZON . ACX
           To : E VMSBERSHZP" .

                         

                         

                         

                         

          98.

          EXT. AUDITORIIIM -- NIGH':'

          As Danny DeVito comes out of the stage door of the
          auditorium, Ralph Augusta comes up to him.

                         AUGUSTO
          Sam wants to see you.

                         DEVITO
          I'= at the Pcnchartrain.
          Ralph takes Danny by the arm and moves him toward a
          chauffeured limousine waiting at the curb. As he does

                         SO :

                         AUGUSTO
          Now. Tonight. He's got an idea
          how to get everybody off the hook.
          As.Augusto opens the door and ushers Danny into the
          back seat of the car, we ----

                         CUT TO:

           MCT. BOURBON STREET MEW ORLEANS) - NIGHT
          0 As the limousine moves dawn Bourbon Street with its
          hockey tonks and jazz joints.

          INT. THE LZIMOUS INE - NIGHT

          As they drive down Bourbon Street, Augusta leans for-

                         I
          ward to the Chauffeur.

                         AUGUSTO
          We'll go in the back way.

                         CHAUFF EM
          I Yes Sir.
          The Chauffeur turns down a side street and up an alley.

          E=. TEE ALLET -- NIGHT
          A truck is blocking the alley. A big, cheerful-Looking
          Laborer with a ;knit cap on his head, is sitting on a
           big barrel by th rear of t h t= ck.
          h T e Eli cusine pu e l ls ap be fer hi ? n d e 1 s the 2 trac /+ k, s c y/?Yy?? toys a :'+t^i /? i a
           inside a nearby cltth we hear a Jazz t=,=zet on a long
           ride.

                         

                         

                         

                         

                         ??T

          99.
          MT. THE LnKOII&INE
          as Augusta leans forward.

                         AUGIISTO
          Give him the horn.
          The Chauffeur honks the horn. The Laborer grins over
          at the limousine, gestures, palms up.

                         AUGUSTO
          So we walk. It's not far.

                         ANOTHER ANGLE
          as Augusta, Danny Devito and the Chauffeur move up
          toward the rear of the truck, the Laborer jumps down
          off the big barrel he's been sitting on.

                         LABORER
          Hello Danny.
          At this point the Chauffeur wheels and grabs Devito
          in an arm lock, with one gloved hand clamped over his
          mouth.

                         ON ABGUSTO
          as a knife suddenly glitters in his hand and he plunges
          it into Devito. Danny is kic-ting and st-aggling.

                         ACWSTO
          Hold the cocksucker still..
          As Augusta plunges the knife into Danny again and
          again and again.
          AN WAVZ"11 r?Nf:I.E
          The Laborer has taken the lid of, the barrel. Danny
          sinks to the pavement, convulses and dies.

                         ANCTEER ANGLE
          as the three men pick up Devito`s body, stuff him
          into the barrel, then hoist the barrel into the back
          of the truck.
          The jazz tr ttpet is sti l playing, as we --

                         CTT IRA

                         

                         

                         

                         

                         K

          100.

          =NT. ELIZABETH'S BEDROOM - DAY
          It is the folloaring morning. Tony is in bed, asleep.
          Elizabeth enters, turns on the bedroom TV set and
          these crosses and shakes Tony. Tony comes awake fast.

                         TONY
          Wha_ is it?

                         ELIZABETH
          Your friend, DeVito ---
          The TV` set has warmed up and now the Newscaster's voice
          comes over the pict'..ire of a middle-class house in
          Bayonne, New Jersey.

                         NEWS CASTER
          There have been no ransom demands
          and the Devito family -- although
          concerned -- are not yet alarmed.

                         (THEN)
          Police have stationed a guard on
          the ex-union Leader's Bayonne, New
          Jersey home were his 'rife and groom
          daughter are in seclusion. Informed
          sources fear an eruption of mob
          violence if the popular Labor Leader
          has met with foul play.

                         I

                         ANOTHER ANGLE
          As the Television Newscaster switches to another item,
          we see a burning house, fire engines, so on.

                         NEWSCASTER
          Long Beach, New York. In a possibly
          related incident, the home of re-
          puted Syndicate figure Santsno
          Corleone was firebombed early this
          morning. Corleone, thirty-five, is
          in guarded condition at Saint
          Catherine's hospital with first
          } degree burns over two thirds of his ---
           Tony is out of bed. Be has switched off the TV set
           and crossed to the phone.

                          TONY
           Long distance =;fc_aa,ion, please.
           h T e number of the Ve?as ?alms ---

           C , D TO:

                         

                         

                         

                         

                         ??3

          EXT. LAS VEGAS AIRPORT - MIGHT

          It is early evening of the same day.

          INT. LAS VEGAS AIRPORT - VIGET

          as Tony comes out into the central area of the Vegas
          Airport. He pauses, looks around.

                         ANOTHER ANGLE
          as Rocco Lampcne crosses up to him.

                         ?? LAMPONE
          Let's go.

                         ANOTEER AUG=
          as Tony follows Lampone.

          EXT. LAS VEGAS AIRPORT - NIGHT

          as a big car driven by a Button an named Fritz pu.Us
          up. Rocco opens the door to the backseat, gestures
          Tony in. As Tony gets in ---

                         ON ROCCO
          I He looks around. There is a car full of Button Men
          in front of them. Another car full of button men
          behind. Rocco nods. Gets a nod back >rom each driver.

          INT. CAR - D1=GET

          As Tony gets into the backseat, we see that Tom Hagen
          is there. Rocco Lampone now gets into the front seat
          alongside the driver, Fritz.

          A L MP ORE.
          Tony, this is Fritz.

                         (THEN)
          Let's move.

          ?NOT= AIGI2
           as the cars drive out om! the ai Ci .. a 1 e, 2Eagef' S
          car in the middle.

                         

                         

                         

                         

                         I

          102.

          INT. HAGEN `S CAR

          as Hagen turns to. Tony.

                         - SAGR'??1
          Danny's dead. They found him in
          a barrel at the mouth of the
          Mississippi.

                         TONY
          Maatrocina?

                         HAGEN

                         (NODS }
          It's going to be bad for a while,
          that`s wby I wanted you out here.
          It's time you-Jesus Christ Fritz
          This last as another car swings out of a side street
          and a Hood leans out of the rear window with a t•.reLve
          gauge p=p gun, and blasts at Fritz.

          ON RAG-EN'S CAR
          as a blast of heavy shot takes out the windshield
          and blows most of Fritz's head away.
          ' : NT. SAGrzN ' S CAR
          As a fountain of blood gushes up from the stalk of
          Fritz's neck and Hagen's car starts to swerve into
          the curbing, the Gunman in the other car sends two
          more blasts of deer--load into Hagen's car.

                         ANOTHER ANGLE
          as the Corleone Button Men in the following car blasts
          at the attackers, the Shotgun Man blasts away.

          I ANOTT ER ANG=
          as Hagen is hit by half a dozen buckshot and he flaps
          over dead, onto Tony. The caw hits and tolls.

                         ANOTSER ANGLE
          The Hagen car comes to a s tor on its wheels again.
          The Corleone Butt-an Men it the fol icwinc ca= -snt?
          cut, run up.

                         0

                         

                         

                         

                         

                         I

          103.

                         CLOSER
          As Rocco Lampone comes out of the wreckage, the Button
          men are pulling Tony -- who is unconscious -- out.
          Lampone looks at Fritz and Hagen.

                         LONE
          Not a goddamned thug you can do
          for them. Let's get out of here.

                         ANOTBER ANGLE :
          As The Corleone Men carry Tony to the other ca= and
          get him inside, people are starting to gather.

                         LAMPONE
          It's all right. It's all right.
          Gangway. We're getting him to
          the hospital! Man's hurt here!
          Clear the road:.
          As the Bystanders move back, the Corleone car burns
          rubber and digs out

                         CQT TO:
          MT. BEDROOM (CORLEONE COMPOIINf, TAHOE) - NIGTT
          Camera is subjective, the screen is pitch black with
          a single red-orange dot moving erratically in the
          center. It is the coal of a cigarette as someone
          takes a last puff, tamps it out, then scratches a
          match and. lights another.
          In the flare of the match we see a Nurse's bulldog
          face.

          ANOTHER ANGLE - INCLC1 ING TONY
          He wears a bandage around his head like a burban. Be
          is looking at the Nurse in the flicker of the match
          flame.

                         TONY
          Who are you?

                         XURSE
          OY., gc?od, you're awake.

                         TC Y
          Wait a minute.

                         S
          t CfJI3'?'LD }

                         

                         

                         

                         

                         A

          104.

                         CONTD DZD :

                         NU'RSZ
          I'll be right back.
          The nurse has risen. She switches on a lamp and
          exits.
          ANOThR ANGLE (MIRROR SHOT)
          We are angled into a mirror over a chest of drawers,
          holding on Tony as he sits up in bed, then gingerly
          gets to his feet. He is wearing silk pajamas. Now
          he crosses to the mirror and checks himself out.
          Aside from the bandage he seems to be in one -piece.
          Now, in the mirror we see the door open and silhou-
          etted in the doorway, the figure of Michael. Tony
          turns.
          ANOTffER ANGL -
          as Tony and Michael look at one another for. a long
          moment.

                         HICEAEL
          How are you feeling?

                         S TONY
          What am I doing here?
          As Michael comes into the room and closes the door:

                         HICEAEL
          I had Rocco bring you.

                         (THEN)
          How's the head? A little -pain?

                         TONY
          A little.

                         MICHAEL
          The Doctor says it's nothing.to
          bother you, but I've always found
          it's easier to be brave about some-
          body else's headache.

                         TONY
          Did he say how soon T could leave?

                         MIC'Z??
          Sit dcwn. Sim down..

                         S

                         

                         

                         

                         

                         ANO'TEMR ANGLE
          Tony doesn't sit down.

                         TONY
          r'd like to get out of here as soon
          as possible.

                         MICHAEL
          Let me ask you a question. Have
          you any idea where you stand?

                         TONY
          I'm not totally stupid.

                         MICHAEL
          I aC 't think you're stupid. I
          think you're smart. 3 t not smart
          enough.

                         TONY
          t'n willing to learn.

                         MICHAEL
          Good.

                         (THEN)
          You've let your enemies get too
          close to you.

                         (THEN)
          Those people who tried to kill you
          in Las Vegas, they were Maatrocina's
          people.

                         TONY
          Are 'you sure?
          Michael answers that question with a look: Of course
          in sure.

          MICHAZZ,
          The question to ask is this: Who
          knew you were flying to Las Vegas?-

                         TONY
          The girl I was staying with and ---
          Tony breaks off:

                         HICI=L
          Someone f_ cm Langley?

                         TONY
          Before I left, I called a man named
          Stuart ?almateer.
          (CONY r ED)

                         

                         

                         

                         

          106.

                         CONTINOEA

                         MICHAEL
          I told Tom Hagen -- God rest his
          soul --- but I warned him. it was a.
          mistake -- that whole Vidal business.
          As the only contact between our people
          and the Government you had no protec-
          tion. You were naked.

                         (THEN)
          It they want to break the contact,
          they elimirate you and they're clean.
          As long as you're alive,. you're a
          threat -- do you understand?

                         ANOTHER ANGLE
          As Tony sits. He's not sure whether he likes or
          trusts or is ready to forgive his father, but he
          knows the sound of good sense when he hears it.

                         MICHAEL
          How long do you think the-Administra-
          tion would last if it were to come
          out that the President used the
          Corleone family to assassinate the
          head of a foreign state.

                         0 (THEN)
          The question is rhetorical.

                         ANOTHER ANGLE
          Michael takes out a cigar and goes about the ceremony
          of lighting it.

                         - MICHAEL
          I'm not supposed to smoke these
          things, but it, isn't every day
          a man's son comes home.

                         TONY
          You were saying?

                         XICHAEL
          Every year- -- on February third --
          I've sent you a check. Those ohecks
          were never cashed

                         TCNY
          Would you like to '.cacw why?

                         

                         

                         

                         

          107.

                         CONTINUED:

                         MICHAEL
          r've always know why -- and z res-
          pect your reasons. But, neverthe-
          less, I'm your lather and anything
          t have is yours - if it's money you
          want, my friendship, the benefit of
          my experience -- if you'll accept it.
          It's yours.

                         TONY
          I I need your advice.

                         MICHAEL
          (nods, then)
          There are two ways I see to handle
          this. First, I can make you dis-
          appear in. Sicily until everything
          blows over. Things change. Men get
          old and angers cool. In four or
          five years you could probably be
          safe to come home again..

                         TONY
          What's the other way?

                         1

          MICE?
          0 The other way is a gamble that you
          could lose.

                         (THEN)
          And it would mean becoming a part
          of the family -- for a while anyway.
          Tony is just looking at Michael.

                         MICHAEL
          Think about it. Sleep on it.
          We'll talk again in the morning.

          I=. THE BOATHOUSE (CORLEONE COMPOUND) - DAE
          Tony enters the glassed-in boathouse. Outside, cold
          white winter and the deep, blue Lake. With hi= is a
          Sezv'ant..

                         SERVANT
          Your father will be out in a moment.

                         (INDICATES)
          There's coffee on the sideboard.
          Tony tads. The Ser-want exits. 'ror.?y crosses to the
          sideboard, pours a cup of coffee. As he t:uzmns with
          it, his eyes fall on a table in the corner on which
          (CCdN V E )

                         

                         

                         

                         

                         I

          108.

                         CONT??IUED :
          are many Pictures, mostly in heavy silver frames.
          Some are studio portraits. Some are grainy blow-ups
          of snapshots. Tony crosses to the table of pictures.

          VERY CLOSE - THE PICTURES
          As Tony looks, we are moving from picture to picture.
          The first is a wedding portrait taken on Connie's
          wedding day. Carlo is kissing the Bride. The God-
          father stands, proud and uncom.fortable in his tuxedo.
          Michael in his uniform. The twins, - all heavy eyebrows
          and baby fat.
          '"here is a picture of Sonny with his fists up and
          laughing as if about to hit someone.
          There is a photograph of the three brothers, Sonny,
          Michael and 'redo, their arms around each other, at
          an outing someplace. Sonny is in the middle of a
          big grin. Fredo looks shy and scared. Michael is
          staring straight ahead, a boyish smile on h 1s face
          although his eyes are cold.
          There is a picture of Michael and Ray at Tony's con-
          firmation.

                         0
          There is a blown-up snapshot of the Godfather in his
          tomato garden in the backyard..

                         ON TONY
          as he picks up the picture of the Godfather, remem-

                         BERING ---

                         DISSOLVE TO:

          A FLLASHBACE SEQUENCE - {FROM GODFATHER I)
          The old Don is tending his tomato vines. With him
          is the little boy, Tony. aged three or four. They
          have the special rapport that sometimes exists be-
          tween the old and the very youzg. They play teasing
          games with paper fangs and the bug spray can, then
          suddenly, the old man's heart ;ailed him, and he falls
          i n to the tomato vines. After a moment the lit :le boy
          understands that the old man is no l anger playing a
          game, and he beomes frightened:

                         CUT TO:

                         

                         

                         

                         

                         L

          LOS.
          INT. TifE BOATHOIISZ•- DAY
          We are on the door as Michael enters. He pauses for
          a moment, then:

          MICHAEL ' .
          Do you remember your grandfather?

                         ANOTHER ANC
          as Tony turns from the table full of pictures with his
          grandfather's framed portrait still in his hand.

                         TONY
          Yes..
          As Tony puts down the picture, Michael comes up.

                         1SICEAEL
          Do you remember him with admira-
          tion and respect?

                         TONY
          Z remember that I loved him.

                         MILBAEL
          0 So did I.

                         TONY
          I've been thinking about these
          choices.
          Michael holds up his hand to delay the decision.

                         141C RAM
          Talk with me for a moment.

                         (THEN)
          Let me learn something about my
          son.. What's Trident Scholar?

                         TONY
          It's a special honors program for
          First Classmen.

                         MIC3AEL
          You liked Annapolis?

                         _ONY
          Yes.

                         AICZAEL
          Enough to make a life in t e Navy?

                         (CONT"-RLGCZD)

                         

                         

                         

                         

                         ??I

                         ILA

                         CONTINUED

                         TONY
          No. Not that much.

                         MICHAEL
          2 was surprised you didn't go to
          Dartmouth.

                         TONY
          I thought about it, but Kay wanted
          me to go to Annapolis and the price
          was right.

                         MICHAEL
          How's Hanover these days?

                         TONY
          About the same.

                         MICHAEL
          I always liked that town. That' S
          where Z met your mother -- when I
          was going to school up there.

                         TONY
          I know.

                         0 MICHAEL
          I'd planned to live there, you know.
          Teach maybe -- or go into law. A.
          little office on Wheelock Street.
          Deeds and wills. I would have liked
          that.

                         TONY
          Why didn't you do it?

                         MICHAEL
          Because one day a Sicilian pimp
          and dope peddlar named Virgil
          Solla2 O tried to assassinate my
          father -- your grandfather -- and
          I had to do something about it.

                         (THEN)
          It was a mistake. S took a road.
          The wrong road for me. It ended
          here.

                         TONY
          is this so bad? Sa
          don't know. it depends or. what
          you call terrib? e . You have to

                         CCDN'TINV D)

                         

                         

                         

                         

                         CONTINUED:
          MICHAEL (cant 'd)
          live a certain way --- a cold way -
          or you won't live long.

                         (THEN)
          Do you remember your uncle credo?

                         TONY

                         (SMILES)
          ?redo, the fisherman. Yes. I'll
          always remember, he had a secret
          way to catch fish. 'He taught me.
          You say a Hail. Mary' before you
          put the line down. It never fails.

                         MICHAEL
          You know what happened to ?'redo?

                         TONY
          He died didn't he?

                         MIS
          I had him killed. My own brother.
          It was something r had to do -- or
          felt I had to.

                         TONY
          Why?

                         MICHAEL
          He went against the family. So I
          waited until our mother died and
          then I --
          (breaks off ,

                         THEN)
          Not too many people are fitted for
          this kind of a life. I've had to
          do mazy hard things, but sending
          you and your sister away -- that
          was the hardest.

                         ANOTHER ANGLE
          There is a beat. To= and his father look at each
          other and for a moment we feel that Tony is going
          to cross to his father, then the moment passes:

                         MYC""L
          And now, that I've said t~ at, I'd
          like to hear your decision.

                         ' :CN'Y
          I don't th.iL k I'd ? ike Sicil v

                         (CON'R 1MED )

                         

                         

                         

                         

          112.

                         CONTINOED:

                         MICHAEL
          Good.

                         (THEN)
          Come sit with me. Time is short
          and I have a lot to tell you.

                         CUT TO:
          t N T. CIA BUILDING (LOBBY) - DAY
          As Tony, in civilian clothes, passes through the
          inner- checkpoint with his legitimate credentials.

                         CUT TO:

          ZNT. MOREHOUSE'S OFFICE - DAY
          Morehouse and Palmateer are in the office. Their atti-
          tudes are. less than cordial as Tony enters.

                         MOREHOUSE
          Come in Adams, sit down. I've
          called Stu in on this --

                         (GLANCES AT

                         WATCH)
          Though I'm afraid I can't give
          you much time. I've got a brief-
          ing with the Z-orty Committee at
          noon.

                         TONY
          what I have to say won't take long.

                         (THEN)
          And I think we 111 all be happier
          if it's not on tape.
          Morehouse pauses a moment then openens a drawer and
          switches ofd his tape machine.

                         MOREHOUSE
          Sow's your health? I understand
          you got a crack on the head?

                         TONY
          I'm fine now.

                         MOREBOUSE
          you were up at your father's place
          in Tahoe?
          T::at d s rig

                         (CONTINUED)

                         

                         

                         

                         

          113.

                         CONTINUED :

                         MOREHOUSE
          Has he changed much?

                         TONY
          Not as much as I' d. expected.

                         MO 2 CUSZ
          I remember him very well -- from
          the Senate hearings on crime. Heat
          little man. Very polite, spoke in
          a soft, reasonable voice. He sat
          there with two million dollars worth
          of legal talent at his elbow and told
          the United States Government to go
          piss up a rope.

                         (SMILES)
          You had to admire the pretentious
          little bastard.

                         TONY
          We're wasting each other's time
          with this, Mister Morehouse.

          ' M OREHOUSE
          All right, Son. This is your party.
          You've got five minutes.

                         0 (THEN)
          But before we start, I'm not going
          to listen to a lot of recriminations
          about that cveration. Your people
          blew it, pure and sim le. It was
          totally mishandled. A mistake from
          beginning to end.

                         TONY
          The big mistake was that I'm still
          alive.

                         `SOREROUSE
          Come again?

                         TONY
          11 you'd managed to get rid of me
          down there - or in Vegas -- you' d
          be all right. silt it's too Late
          now. You've lost your chance.

                         HOREZOUSE
          I don't know what the hell you're
          talking about.
          (C 0 di': I?It )

                         

                         

                         

                         

                         I

          .7

          114.

                         CONTINU

                         TONY
          If anything should happen to me --
          anything at all -- you can find out.
          The full story will be all over the
          six o'clock news.

                         MOREHCQSE
          I'd like to hear your conception of
          the full story..

                         TONY
          You were running a no-lose operation.
          if we succeeded, you were rid of
          Vidal. -- if we failed, the Corleone
          family was set up to take the blame.
          Your hands were clean. I was the
          only one who could dispute your
          story and I wasn't supposed to come
          back. I've found out that my res-
          ignation from the Navy was processed
          and accepted a full week before I
          went down here ---

                         MOPEEOUSE
          Stu?

                         PALMATE=
          Well, yes, Sir. His resignation did

                         E
          go through, but it was a snafu -- a
          Yeoman's mistake, that's all.

                         MOREHOUSE
          A clerical error.

                         TONY
          And I don't buy it.

                         MOREBOUSE
          don't give a shit if you buy it
          or not. And I'll tell you some-
          thing else, Mister Adams, or what-
          ever your fucking name is, I don't
          react favorably to blackmail.

                         TONY
          It's not blackmail. It's a simple
          statement of fact. If anything
          happens to me or if the Corleone
          Family is damaged iz these Senate
          Hearings, then the whistle blows
          and the whole zdmin .st_ation aces.

                         

                         

                         

                         

                         7

          (CONTINUED) ?

                         MOREHOUSE
          And what makes you think T. give a
          shit about what happens to the Admin-
          istration?
          Morehouse grins without humor. Tony's eyes flicker.
          Morehouse has told something that Tony needed to know.
          Morehouse is up and coming around the desk, as he
          continues. _

                         _14ORE8OVSE
          Whatever I did was done under dix-
          ect orders from the President of
          the United States, and I. will. so
          testify in open hearing. if that
          upsets some of you= guinea gumbarrs
          in Nevada, then so be it. The days
          are over when your father and his
          like could corrupt and intimidate
          this Nation.

                         TONY
          No. You've taken over the job.

                         MORE&OUSE
          Your five minutes are up -- now get
          the hell out of here.
          As Tony turns and goes, Morehouse glares after him,
          the glint of victory in his eyes.

                         CST TO:

          INT. SENATE BU=DING - FOYER - DAY

          We axe outside the huge Senate Caucus room. The
          double doors are open and inside we can see the
          preparations for the Hearing. The Senators are
          taking their seats -- the Press is being given the
          Press hand-out (stamped: "Embargoed until Witness
          Testifies"). Technicians are carrying cables for
          the TV cameras and lights past Lawyers and Committee
          Staff Members.
          Planted In front of all this, speaking to a TV camera,
          is Elizabeth Ann Dunne.

                         ELIZABETH
          This is Elizabeth Ann Dunne coming
          to you from outside, the. Senate. Caucus
          Room where the Senate Select Co=i.--tee
          on Intelligence is meeting this morn--
          ing ..,...

                         (THEN)
          Oh, Senator)
          (CaNTI_ TU?:D )

                         

                         

                         

                         

                         I

                         I

          L16.
          This last to the polished Senator Barthalemew of
          Pennsylvania who pauses in front of the camera.

                         ELIZABET3
          This is Senator Paul Barthalemew
          Of Pennsylvania who will chair the
          Committee.

                         (THEN)
          Can you tell us the specific purpose
          of this Committee, Senator?
          This Committee has been mandated
          to address two questiocis: One, did
          the United States Government -- or
          any of its officials --- authorize,
          instigate or in any way-abet a plot
          to assassinate the Head of a friendly
          foreign State, Armando Vidal.

                         (THEN)
          And Two, if not -- who did?

                         ELZZA3ETFF
          Thank you, Senator.

                         0
          As Barthalazaew moves into the Caucus Room, shaking his
          head, we ----

                         CIIT TO:
          ZNT. SE35MTw CAUCUS ROOM - DAY
          Barthalemew is. pounding his gavel for order.

                         HARTHALE EW
          Take your seats, please. Sergeant
          at Arms' Will. you see that every-
          an takes his seat?
          We pan over to the door as Tony enters and finds a
          seat.

                         CUT TO:

          EXT. A WOODED AREA (PCMPTON LA=S, NNW SBRSEY) - DAY

          Ralph •Augusto, bare to the waist, is half way up a
          hill, in a stand of trees, working with an ax. Se
          has stripped off his shirt and his coat. is shirt
          and his gun are placed aver a fallen t=ee, ten yards
          away.

                         

                         

                         

                         

          117.

                         ANCTHER ANGLE
          as Prankie Rizzi and Rocco Lampone are coming up the
          hill toward Augusto. They pause:

                         LAMPONE
          Hey Ralph?
          Augusta torus. His eyes flicker to the gun, then
          back to Rocco and Frankie. He smiles warmly:

                         AGSTO
          Hey Rocky. Whaddya say? Waddya
          doin' up here?

          ? R A N =
          We just came up to say goodbye,
          Ralph.

          L?JWONE
          for Neri and DeVito.
          Frankie and Lampone have spoken almost simultaneously,
          and as they speak, their guns are out and blasting.
          ANCTHwR ANGLE - ON AaGt7STO

                         10
          As the bullets thwack into his body. he jerks, but
          doesn't go down. His cold face twists in fury as
          he is moving down toward Tony, raising the ax.

          ON FRANI(TZ
          as he stands his ground, blasting. Twice. 171A%ree
          times. Four times.

                         ANOTBER ANGLE
          as Augusta wavers, his face goes slack and he plunges
          into the ground at F'rankie' s feet. The ax has fallen
          from his hands.

                         CUT TO:
          IN'r . S MIATE CAUCUS ROOM - OAT
          ?Among the Senators an the Committee, we recognize
          Mc:tissick and mossib? y we w i-11. remember old Geary,
          from Nevada. Tonv is an interested spectator as
          3artha? amew,r quest ions the witness, Martin Davideau.

                         0

                         

                         

                         

                         

                         I
          CUNT t3ED

                         3ARTKALZ W
          And.'what light can you throw on
          this matter, Mister Davideau?

                         DAVIDEAII
          S have. a memo here from the Director
          of the FBI to the Director of the CIA
          with copies to the Army, Air Force,
          Navy and State Department Intelligence
          Offices. It states that one of our
          informants -- well, I' l1 read it (ro

                         ADS)
          'during. a recent conversation with
          several friends, underworld figure
          Sam Maatrocina, stated that there
          was going to be an attempt on Armando
          Vidal' s life and this attempt ---
          this hit, in the argot -- was to
          be carried out by another underworld

                         FAMILY

                         MC RISSICX
          Did this informant identify the other
          underworld family?

                         DAVIDEAU
          0 so Sir, he did not.

                         MC KISSICX
          Would you care to speculate,

                         GEARY
          Just a minute, Senator -- I whole-
          heartedly object to this Committee
          being used as a for,= for spec-
          ulation based on an anonymous report
          of an alleged conversation. Reput-
          tations. could be recklessly and i --e-
          grievably damaged.

                         MC RISSICR
          Very well, very well. We wouldn't
          want to damage any of the constituency
          of my esteemed Collegue from Nevada. --

                         GEARY
          Many thanks to the distinguished Sv..nior
          Senator from our Wes tern Sister State
          of Utah.

          C7T TC :

                         

                         

                         

                         

                         I

          119.
          n ?T. MAATROCINA' S OFFICE (NEW YORK CITY) - DAY _
          Sam Maatrccina is seated in his office on the thirty-
          third,floor of an old downtown Manhattan office build-
          ing. The buzzer sounds.

                         MAATROCINA
          Yes?

                         SECRETARY'S VOICE
          A couple of gentlemen here from the
          Internal Revenue, Mister Maatrocina.

                         MAATRCCIVA

                         (SCOWLS)
          The Internal Revenue!?

                         SECRETARY
          Yes sir.

                         MAATROCINA
          All right. Send 'em in.

                         ANOTEER AN=
          The door buzzes open and. two young,- rather conserva-
          tive looking Men, enter with briefcases.

          I FIRST MAN
          Mister Maatrocina?
          Maatrocina is coming around the desk:

                         MAATROCI:YA
          What- is this? Some kind of a roust?

                         FIRST MAN
          It's about your income tax, Sir.

                         MAATROCITA
          My taxes are handled by the biggest
          firm of accountants in New York City.
          It costs me a hundred thousand doll-
          ars a year and you two assholes in
          cheap- suits are going to come in here

                         AND
          Sam Maatroci.na breaks off. The Second Young man has
          hit him an open-handed karate chop on the side of
          the neck. Now, before Maatroctna can speak or cry
          cut, the Aan has hit :taatrocina a seccad chop, shat-
          ter .ag his adams apple.

                         

                         

                         

                         

                         I

          120.

                         ANOTHER ANGLE
          As Maatrocina goes to his knees, his eyes goggling,
          the First Young Man has crosses to the window and
          opened it.
          Now he and the Second Young Man carry the wide-eyed
          New York Don to the window and throw him out.

                         CUT T0:

          EXT. D0WNTG"N MAAJBATZAN STREET - DAY

          as crowds are gathering around the body of Maatrocina
          on the sidewalk, the Two Men come out of the building,
          cross the street and go around the corner.. In the
          distance, sound of approaching sirens.

          EXT. TEE CORNER - DAY

          as the Two Men come around the corner and get into a
          waiting car. Driving the car is Frankie Rizzi. As
          they drive off, we ---

                         CDT TO:

          LYT. SENATE CAUCUS. ROOM - DAY
          it is late afternoon. A Uitness, General Vanderhorst,
          the Director of the CIA, is at the table. Geary is
          quizzing him.

                         GEARY
          No. No. What I'm trying to get
          from you, General, is, as Director
          of the Central Zntelligence Agency,
          did you ever have any kind of order
          in writing authorizing you to assass-
          inate the head of a foreign state?

                         VANDERHORST
          No, Sir. Z did not.

                         MC XISSICX
          Well, come on now.. That's hardly
          the thing that would be put in
          writing, now is it?

                         EXRY
          Well what would it be put n 44
          aol writ=q?

                         

                         

                         

                         

                         I

          121.

                         CONTIII=

                         MC KISSICX
          X suggest that it would be put into
          the same kind of phraseology that
          Henry used to his Barons when he
          said, 'Will, no one rid me of this
          turbulent priest?' He didn't tell
          them to go kill Thomas --z6 Beckett.,
          but the final result of it was
          murder in the Cathedral ---

                         GARY
          What has Thomas 3.~ Beckett got to
          do. witii the subject at hand? ---

                         CSC RISSICX
          If the Distinguished Senator from
          Nevada will refrain from ----
          Ba -halemew is beating with the gavel on his desk.

                         BARTSALEMEW
          Gentlemen: Gentlemen!

                         CUT TO:

          EXT.. THE MALL - DAY

          Tony is buying a hot dog at one of the dog-wagons on
          the Mall. as crosses and sits on a bench where Palmateer
          is sunning himself, looking at a newspaper.

                         CLOSER
          as Tony takes a bite of his hot dog.

                         TONY
          These are good. You ought to
          stave one.

                         PALMATEER
          I don't have a lot of time, Tony.

                         TONY
          First, I wanted to say that although
          I'm sure that I was set--um to be
          killed down there, I never thought
          you were mixed up in i t.

          PA M TEER
          Pine. ' clad to hear that.Now,
          what was it that you wanted?
          (C©NT TC =rD )

                         

                         

                         

                         

                         I

          122.

                         CONTI': UED :

                         TONY
          Z hear that Morehouse is being called
          in by the CommI tree to testify.

                         PALMATEER
          So they tell me.

                         TONY
          Well if he does -- and. I'm telling
          you this as a friend --- he's going
          to bring the roof down on him self ---
          and you too.
          As Pa.lmateer puts the newspaper down, looks at Tony:

                         TONY
          That's definite, Stu.

                         (THEN)
          VIM sorry.

                         CUT TO:

          ?.XT. A MARILAND ROAD (NEAR PATUXENT) - ?IIG3T
          We pick up an automobile moving south along a road
          that edges Chesapeake Bay.
          is Q.

          INT. THE CAR - NIGHT

          We see that Tom Morehouse is driving. We are angling
          through the front. windshield as he turns off into a
          marina parking lot.

          EXT. THE FLOATS - VIC T

          as Morehouse comes out onto the float, then climbs
          aboard. a nice little yawl, apparently his own.

                         CLOSER
          as Morehouse goes to the cabin. The snap lock has
          been unlocked and the hatchway is open. There is a
          dim light in the cabin.

                         (OREEOUSE
          Stu?
          P Z;? yR
          Down here.

                         

                         

                         

                         

          123..

          1 T. CAS= - NIGHT
          as Morehouse comes down the four steps that lead into
          the cabin, then stops.

          ANOT R ANGLE
          Palmateer and Rocco Lampone are waiting for him in the
          cabin.

                         MOREEOUSE
          What the hell is this?

          ANOT'E' ER ANGLE
          as Rocco rises, drawing a twenty-two calibre pistol
          with silencer, and shoots Morehouse three times in
          the chest.
          As Morehouse goes down:

                         LAMPONE
          Is he dead?
          Palmateer kneels to check Morehouse.
          PAL.yATEER
          Yes.

                         LAMPONE'
          So are you.
          i Lampone had leaned down, put the gun to Palmateer's
          head and pulled the trigger.

          EXT. CHESAPEAXE SAY -- DAWN

          A small Coast Guard patrol boat moves up Chesapeake
          Bay, flat calm reflecting a pearly pink sky. As Look-
          out on the flying bridge scans the bay ahead, then.
          i n to the sneaking tube.

                         LOOKOUT
          Bridge.

          I . W'SMIZZOUSE - DAWN
           As the Officer o-6' the deck, a yo:;.ng C: a 3cs,.:' .,

                         ANSWERS:

                         

                         

                         

                         

                         _T

          124.

                         CONTINGED:

                         BOSUN
          Bridge aye.

                         LOOKOUT'S VOICE
          That yawl looks like it's adrift.

                         ANOTHER ANGLE
          As Morehouse's sailboat drifts in the calm, it's
          sails up and motionless, it's tiller untended. The
          patrol boat noses up:

                         BOSUN
          Ahoy, the yawl. You all right? -
          No answer. The Bosun jumps aboard.

                         BOSUN
          Hello? Avon Lady. if anybody
          down there's doing anything they
          shouldn't, now's the time to -
          Tae Bosun has looked down into the cabin. 3e breaks
          off and turns back to the Patrol boat.

                         0 BOSUN
          Get on the horn to base. We got
          two bodies here.

                         CUT TO:
          k =. SFYLATE CAUCUS ROOM - DAY
          Arne Grundellius is at the witness table.

                         GRUNDELLIUS
          And in conclusion, I'd like to say
          that I believe these hearings have
          served a great, good purpose. in
          spite of the fears that the hear-
          ings would do hartto the fabric of
          democracy, our Nation's stronger to-
          day in the knowledge that we do not
          export revolution or use murder as
          an extension of diplomacy.

                         ANOTHER ANG:
          as Grmdelli.us fi.'2is:?es, Geary And one or wwo others
          rise, applatsd.i.r.g.

                         

                         

                         

                         

          125.

                         CONTIKTTED :

                         GEARY
          Hear. Hear.
          ANOTHER ANGLE - THE At7D IENCE
          Tony and Elizabeth are together. Elizabeth is applaud-
          ing Grundellius. Tony looks over at her.

                         CUT TO:

          INT. THE LOBBY (SENATE BUILDING) = DAY

          The meeting has broken up. Spectators, Legislators,
          News Sian and Womea,. Staff and Committee Members are
          leaving or standing in knots, gossiping.
          We pick up Elizabeth and Tony as they cross out, pausing
          to speak with Senator Geary and Grundellius who have
          stopped to chat.

                         ELIZABETH
          Mister Secretary, Senator Geary, I'd
          like to present Tony Adams.
          There are general greetings, and then, as they move
          S toward the doors.

                         GEARY
          I believe we have mutual friends
          in Nevada, Mister Adams.

                         TONY
          Yes Sir. I believe so.

                         GEARY
          If there's ever anything I can do
          you come see me.

                         TCNY
          I might just take you up on that,
          Senator.
          As they exit ----

                         CUT TO:
          EXT. WASHINGTON D.C. STREET
          It is sunset. Tony and Elizabeth are walking toward
          the capital building, outlined against a pink sky.
          As they walk toward J.-_ we begin to hear t .he Gcdwa ther

                         THEME
          SLOW DISSOLVE

                         

                         

                         

                         

          126.

          EXT. CORLEONE COMPOUND - NIGHT

          We pick up the headlights of a car coming up the
          long approach driveway toward us. It comes past
          the gate, past the kennels for the guard dogs, past
          the guest houses and finally up to the main house
          where it stops.
          The front door to the house opens and Michael stands
          silhouetted against the block of yellow light. Tony
          gets out of the car and comes up to him. They embrace
          briefly and move into the house together.
          As the door closes behind them, we start to move up
          and back. The sound of the single trumpet can still
          be heard, playing slowly and sadly, the notes faintly
          resonant as if echoing through the narrow streets of
          some old hill village in Sicily.
          We have pulled up and up and up c ntil everting is
          darkness, as we ---

          FADE OUT

          THE END


================================================ FILE: samples/go/decent/dbg/debug.go ================================================ // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package dbg import ( "fmt" "github.com/attic-labs/noms/go/d" "log" "os" "strconv" ) var ( Filepath = "/tmp/noms-dbg.log" lg = NewLogger(Filepath) ) func NewLogger(fp string) *log.Logger { f, err := os.OpenFile(fp, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644) d.PanicIfError(err) pid := strconv.FormatInt(int64(os.Getpid()), 10) return log.New(f, pid+": ", 0644) } func GetLogger() *log.Logger { return lg } func SetLogger(newLg *log.Logger) { lg = newLg } func Debug(s string, args ...interface{}) { s1 := fmt.Sprintf(s, args...) lg.Println(s1) } func BoxF(s string, args ...interface{}) func() { s1 := fmt.Sprintf(s, args...) Debug("starting %s", s1) f := func() { Debug("finished %s", s1) } return f } ================================================ FILE: samples/go/decent/ipfs-chat/main.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "fmt" "log" "os" "os/signal" "runtime" "syscall" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/ipfs" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/samples/go/decent/dbg" "github.com/attic-labs/noms/samples/go/decent/lib" "github.com/ipfs/go-ipfs/core" "github.com/jroimartin/gocui" kingpin "gopkg.in/alecthomas/kingpin.v2" ) func main() { // allow short (-h) help kingpin.CommandLine.HelpFlag.Short('h') clientCmd := kingpin.Command("client", "runs the ipfs-chat client UI") clientTopic := clientCmd.Flag("topic", "IPFS pubsub topic to publish and subscribe to").Default("ipfs-chat").String() username := clientCmd.Flag("username", "username to sign in as").String() nodeIdx := clientCmd.Flag("node-idx", "a single digit to be used as last digit in all port values: api, gateway and swarm (must be 0-9 inclusive)").Default("-1").Int() clientDS := clientCmd.Arg("dataset", "the dataset spec to store chat data in").Required().String() importCmd := kingpin.Command("import", "imports data into a chat") importDir := importCmd.Flag("dir", "directory that contains data to import").Default("./data").ExistingDir() importDS := importCmd.Arg("dataset", "the dataset spec to import chat data to").Required().String() daemonCmd := kingpin.Command("daemon", "runs a daemon that simulates filecoin, eagerly storing all chunks for a chat") daemonTopic := daemonCmd.Flag("topic", "IPFS pubsub topic to publish and subscribe to").Default("ipfs-chat").String() daemonInterval := daemonCmd.Flag("interval", "amount of time to wait before publishing state to network").Default("5s").Duration() daemonNodeIdx := daemonCmd.Flag("node-idx", "a single digit to be used as last digit in all port values: api, gateway and swarm (must be 0-9 inclusive)").Default("-1").Int() daemonDS := daemonCmd.Arg("dataset", "the dataset spec indicating ipfs repo to use").Required().String() kingpin.CommandLine.Help = "A demonstration of using Noms to build a scalable multiuser collaborative application." expandRLimit() switch kingpin.Parse() { case "client": cInfo := lib.ClientInfo{ Topic: *clientTopic, Username: *username, Idx: *nodeIdx, IsDaemon: false, Delegate: lib.IPFSEventDelegate{}, } runClient(*clientDS, cInfo) case "import": lib.RunImport(*importDir, *importDS) case "daemon": cInfo := lib.ClientInfo{ Topic: *daemonTopic, Username: "daemon", Interval: *daemonInterval, Idx: *daemonNodeIdx, IsDaemon: true, Delegate: lib.IPFSEventDelegate{}, } runDaemon(*daemonDS, cInfo) } } func runClient(ipfsSpec string, cInfo lib.ClientInfo) { dbg.SetLogger(lib.NewLogger(cInfo.Username)) sp, err := spec.ForDataset(ipfsSpec) d.CheckErrorNoUsage(err) if !isIPFS(sp.Protocol) { fmt.Println("ipfs-chat requires an 'ipfs' dataset") os.Exit(1) } node, cs := initIPFSChunkStore(sp, cInfo.Idx) db := datas.NewDatabase(cs) // Get the head of specified dataset. ds := db.GetDataset(sp.Path.Dataset) ds, err = lib.InitDatabase(ds) d.PanicIfError(err) events := make(chan lib.ChatEvent, 1024) t := lib.CreateTermUI(events) defer t.Close() d.PanicIfError(t.Layout()) t.ResetAuthors(ds) t.UpdateMessages(ds, nil, nil) go lib.ProcessChatEvents(node, ds, events, t, cInfo) go lib.ReceiveMessages(node, events, cInfo) if err := t.Gui.MainLoop(); err != nil && err != gocui.ErrQuit { dbg.Debug("mainloop has exited, err:", err) log.Panicln(err) } } func runDaemon(ipfsSpec string, cInfo lib.ClientInfo) { dbg.SetLogger(log.New(os.Stdout, "", 0)) sp, err := spec.ForDataset(ipfsSpec) d.CheckErrorNoUsage(err) if !isIPFS(sp.Protocol) { fmt.Println("ipfs-chat requires an 'ipfs' dataset") os.Exit(1) } // Create/Open a new network chunkstore node, cs := initIPFSChunkStore(sp, cInfo.Idx) db := datas.NewDatabase(cs) // Get the head of specified dataset. ds := db.GetDataset(sp.Path.Dataset) ds, err = lib.InitDatabase(ds) d.PanicIfError(err) events := make(chan lib.ChatEvent, 1024) handleSIGQUIT(events) go lib.ReceiveMessages(node, events, cInfo) lib.ProcessChatEvents(node, ds, events, nil, cInfo) } func handleSIGQUIT(events chan<- lib.ChatEvent) { sigChan := make(chan os.Signal) go func() { for range sigChan { stacktrace := make([]byte, 1024*1024) length := runtime.Stack(stacktrace, true) dbg.Debug(string(stacktrace[:length])) events <- lib.ChatEvent{EventType: lib.QuitEvent} } }() signal.Notify(sigChan, os.Interrupt) signal.Notify(sigChan, syscall.SIGQUIT) } // IPFS can use a lot of file decriptors. There are several bugs in the IPFS // repo about this and plans to improve. For the time being, we bump the limits // for this process. func expandRLimit() { var rLimit syscall.Rlimit err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) d.Chk.NoError(err, "Unable to query file rlimit: %s", err) if rLimit.Cur < rLimit.Max { rLimit.Max = 64000 rLimit.Cur = 64000 err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rLimit) d.Chk.NoError(err, "Unable to increase number of open files limit: %s", err) } err = syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) d.Chk.NoError(err) err = syscall.Getrlimit(8, &rLimit) d.Chk.NoError(err, "Unable to query thread rlimit: %s", err) if rLimit.Cur < rLimit.Max { rLimit.Max = 64000 rLimit.Cur = 64000 err = syscall.Setrlimit(8, &rLimit) d.Chk.NoError(err, "Unable to increase number of threads limit: %s", err) } err = syscall.Getrlimit(8, &rLimit) d.Chk.NoError(err) } func initIPFSChunkStore(sp spec.Spec, nodeIdx int) (*core.IpfsNode, chunks.ChunkStore) { // recreate database so that we can have control of chunkstore's ipfs node node := ipfs.OpenIPFSRepo(sp.DatabaseName, nodeIdx) cs := ipfs.ChunkStoreFromIPFSNode(sp.DatabaseName, sp.Protocol == "ipfs-local", node, 1) return node, cs } func isIPFS(protocol string) bool { return protocol == "ipfs" || protocol == "ipfs-local" } ================================================ FILE: samples/go/decent/lib/datapager.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "fmt" "strings" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/types" ) type dataPager struct { dataset datas.Dataset msgKeyChan chan types.String doneChan chan struct{} msgMap types.Map terms []string } func NewDataPager(ds datas.Dataset, mkChan chan types.String, doneChan chan struct{}, msgs types.Map, terms []string) *dataPager { return &dataPager{ dataset: ds, msgKeyChan: mkChan, doneChan: doneChan, msgMap: msgs, terms: terms, } } func (dp *dataPager) Close() { dp.doneChan <- struct{}{} } func (dp *dataPager) Next() (string, bool) { msgKey := <-dp.msgKeyChan if msgKey == "" { return "", false } nm := dp.msgMap.Get(msgKey) var m Message err := marshal.Unmarshal(nm, &m) if err != nil { return fmt.Sprintf("ERROR: %s", err.Error()), true } s1 := fmt.Sprintf("%s: %s", m.Author, m.Body) s2 := highlightTerms(s1, dp.terms) return s2, true } func (dp *dataPager) Prepend(lines []string, target int) ([]string, bool) { new := []string{} m, ok := dp.Next() if !ok { return lines, false } for ; ok && len(new) < target; m, ok = dp.Next() { new1 := strings.Split(m, "\n") new = append(new1, new...) } return append(new, lines...), true } ================================================ FILE: samples/go/decent/lib/event.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "context" "fmt" "time" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/ipfs" "github.com/attic-labs/noms/go/merge" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/math" "github.com/attic-labs/noms/samples/go/decent/dbg" "github.com/ipfs/go-ipfs/core" ) const ( InputEvent ChatEventType = "input" SearchEvent ChatEventType = "search" SyncEvent ChatEventType = "sync" QuitEvent ChatEventType = "quit" ) type ClientInfo struct { Topic string Username string Interval time.Duration Idx int IsDaemon bool Dir string Spec spec.Spec Delegate EventDelegate } type ChatEventType string type ChatEvent struct { EventType ChatEventType Event string } type EventDelegate interface { PinBlocks(node *core.IpfsNode, sourceDB, sinkDB datas.Database, sourceCommit types.Value) SourceCommitFromMsgData(db datas.Database, msgData string) (datas.Database, types.Value) HashFromMsgData(msgData string) (hash.Hash, error) GenMessageData(cInfo ClientInfo, h hash.Hash) string } // ProcessChatEvent reads events from the event channel and processes them // sequentially. Is ClientInfo.IsDaemon is true, it also publishes the current // head of the dataset continously. func ProcessChatEvents(node *core.IpfsNode, ds datas.Dataset, events chan ChatEvent, t *TermUI, cInfo ClientInfo) { stopChan := make(chan struct{}) if cInfo.IsDaemon { go func() { tickChan := time.NewTicker(cInfo.Interval).C for { select { case <-stopChan: break case <-tickChan: Publish(node, cInfo, ds.HeadRef().TargetHash()) } } }() } for event := range events { switch event.EventType { case SyncEvent: ds = processHash(t, node, ds, event.Event, cInfo) Publish(node, cInfo, ds.HeadRef().TargetHash()) case InputEvent: ds = processInput(t, node, ds, event.Event, cInfo) Publish(node, cInfo, ds.HeadRef().TargetHash()) case SearchEvent: processSearch(t, node, ds, event.Event, cInfo) case QuitEvent: dbg.Debug("QuitEvent received, stopping program") stopChan <- struct{}{} return } } } // processHash processes msgs published by other chat nodes and does the work to // integrate new data into this nodes local database and display it as needed. func processHash(t *TermUI, node *core.IpfsNode, ds datas.Dataset, msgData string, cInfo ClientInfo) datas.Dataset { h, err := cInfo.Delegate.HashFromMsgData(msgData) d.PanicIfError(err) defer dbg.BoxF("processHash, msgData: %s, hash: %s, cid: %s", msgData, h, ipfs.NomsHashToCID(h))() sinkDB := ds.Database() d.PanicIfFalse(ds.HasHead()) headRef := ds.HeadRef() if h == headRef.TargetHash() { dbg.Debug("received hash same as current head, nothing to do") return ds } dbg.Debug("reading value for hash: %s", h) sourceDB, sourceCommit := cInfo.Delegate.SourceCommitFromMsgData(sinkDB, msgData) if sourceCommit == nil { dbg.Debug("FAILED to read value for hash: %s", h) return ds } sourceRef := types.NewRef(sourceCommit) _, isP2P := cInfo.Delegate.(P2PEventDelegate) if cInfo.IsDaemon || isP2P { cInfo.Delegate.PinBlocks(node, sourceDB, sinkDB, sourceCommit) } dbg.Debug("Finding common ancestor for merge, sourceRef: %s, headRef: %s", sourceRef.TargetHash(), headRef.TargetHash()) a, ok := datas.FindCommonAncestor(sourceRef, headRef, sinkDB) if !ok { dbg.Debug("no common ancestor, cannot merge update!") return ds } dbg.Debug("Checking if source commit is ancestor") if a.Equals(sourceRef) { dbg.Debug("source commit was ancestor, nothing to do") return ds } if a.Equals(headRef) { dbg.Debug("fast-forward to source commit") ds, err := sinkDB.SetHead(ds, sourceRef) d.Chk.NoError(err) if !cInfo.IsDaemon { t.UpdateMessagesFromSync(ds) } return ds } dbg.Debug("We have a mergeable commit") left := ds.HeadValue() right := sourceCommit.(types.Struct).Get("value") parent := a.TargetValue(sinkDB).(types.Struct).Get("value") dbg.Debug("Starting three-way commit") merged, err := merge.ThreeWay(left, right, parent, sinkDB, nil, nil) if err != nil { dbg.Debug("could not merge received data: " + err.Error()) return ds } dbg.Debug("setting new datasetHead on localDB") newCommit := datas.NewCommit(merged, types.NewSet(sinkDB, ds.HeadRef(), sourceRef), types.EmptyStruct) commitRef := sinkDB.WriteValue(newCommit) dbg.Debug("wrote new commit: %s", commitRef.TargetHash()) ds, err = sinkDB.SetHead(ds, commitRef) if err != nil { dbg.Debug("call to db.SetHead on failed, err: %s", err) } dbg.Debug("set new head ref: %s on ds.ID: %s", commitRef.TargetHash(), ds.ID()) newH := ds.HeadRef().TargetHash() dbg.Debug("merged commit, dataset: %s, head: %s, cid: %s", ds.ID(), newH, ipfs.NomsHashToCID(newH)) if cInfo.IsDaemon { cInfo.Delegate.PinBlocks(node, sourceDB, sinkDB, newCommit) } else { t.UpdateMessagesFromSync(ds) } return ds } // processInput adds a new msg (entered through the UI) updates it's dataset. func processInput(t *TermUI, node *core.IpfsNode, ds datas.Dataset, msg string, cInfo ClientInfo) datas.Dataset { defer dbg.BoxF("processInput, msg: %s", msg)() t.InSearch = false if msg != "" { var err error ds, err = AddMessage(msg, cInfo.Username, time.Now(), ds) d.PanicIfError(err) } t.UpdateMessagesAsync(ds, nil, nil) return ds } // updates the UI to display search results. func processSearch(t *TermUI, node *core.IpfsNode, ds datas.Dataset, terms string, cInfo ClientInfo) { defer dbg.BoxF("processSearch")() if terms == "" { return } t.InSearch = true searchTerms := TermsFromString(terms) searchIds := SearchIndex(ds, searchTerms) t.UpdateMessagesAsync(ds, &searchIds, searchTerms) return } // recurses over the chunks originating at 'h' and pins them to the IPFS repo. func pinBlocks(node *core.IpfsNode, h hash.Hash, db datas.Database, depth, cnt int) (maxDepth, newCnt int) { maxDepth, newCnt = depth, cnt cid := ipfs.NomsHashToCID(h) _, pinned, err := node.Pinning.IsPinned(cid) d.Chk.NoError(err) if pinned { return } ctx, cancel := context.WithCancel(context.Background()) defer cancel() v := db.ReadValue(h) d.Chk.NotNil(v) v.WalkRefs(func(r types.Ref) { var newDepth int newDepth, newCnt = pinBlocks(node, r.TargetHash(), db, depth+1, newCnt) maxDepth = math.MaxInt(newDepth, maxDepth) }) n, err := node.DAG.Get(ctx, cid) d.Chk.NoError(err) err = node.Pinning.Pin(ctx, n, false) d.Chk.NoError(err) newCnt++ return } type IPFSEventDelegate struct{} func (d IPFSEventDelegate) PinBlocks(node *core.IpfsNode, sourceDB, sinkDB datas.Database, sourceCommit types.Value) { h := sourceCommit.Hash() dbg.Debug("Starting pinBlocks") depth, cnt := pinBlocks(node, h, sinkDB, 0, 0) dbg.Debug("Finished pinBlocks, depth: %d, cnt: %d", depth, cnt) node.Pinning.Flush() } func (d IPFSEventDelegate) SourceCommitFromMsgData(db datas.Database, msgData string) (datas.Database, types.Value) { h := hash.Parse(msgData) v := db.ReadValue(h) return db, v } func (d IPFSEventDelegate) HashFromMsgData(msgData string) (hash.Hash, error) { var err error h, ok := hash.MaybeParse(msgData) if !ok { err = fmt.Errorf("Failed to parse hash from msgData: %s", msgData) } return h, err } func (d IPFSEventDelegate) GenMessageData(cInfo ClientInfo, h hash.Hash) string { return h.String() } type P2PEventDelegate struct{} func (d P2PEventDelegate) PinBlocks(node *core.IpfsNode, sourceDB, sinkDB datas.Database, sourceCommit types.Value) { sourceRef := types.NewRef(sourceCommit) datas.Pull(sourceDB, sinkDB, sourceRef, nil) } func (d P2PEventDelegate) SourceCommitFromMsgData(db datas.Database, msgData string) (datas.Database, types.Value) { sp, _ := spec.ForPath(msgData) v := sp.GetValue() return sp.GetDatabase(), v } func (d P2PEventDelegate) HashFromMsgData(msgData string) (hash.Hash, error) { sp, err := spec.ForPath(msgData) return sp.Path.Hash, err } func (d P2PEventDelegate) GenMessageData(cInfo ClientInfo, h hash.Hash) string { return fmt.Sprintf("%s::#%s", cInfo.Spec, h) } ================================================ FILE: samples/go/decent/lib/importer.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "errors" "fmt" "os" "path/filepath" "regexp" "sort" "strings" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/merge" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/datetime" "golang.org/x/net/html" ) var ( character = "" msgs = []Message{} ) func RunImport(dir, dsSpec string) error { filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if path == dir { return nil } if !strings.HasSuffix(info.Name(), ".html") { return nil } fmt.Println("importing:", path) f, err := os.Open(path) d.Chk.NoError(err) n, err := html.Parse(f) d.Chk.NoError(err) extractDialog(n) return nil }) if len(msgs) == 0 { return errors.New("Failed to import any data") } fmt.Println("Imported", len(msgs), "messages") sp, err := spec.ForDataset(dsSpec) d.CheckErrorNoUsage(err) ds := sp.GetDataset() ds, err = InitDatabase(ds) d.PanicIfError(err) db := ds.Database() fmt.Println("Creating msg map") kvPairs := []types.Value{} for _, msg := range msgs { kvPairs = append(kvPairs, types.String(msg.ID()), marshal.MustMarshal(db, msg)) } m := types.NewMap(db, kvPairs...) fmt.Println("Creating index") ti := NewTermIndex(db, types.NewMap(db)).Edit() for _, msg := range msgs { terms := GetTerms(msg) ti.InsertAll(terms, types.String(msg.ID())) } termDocs := ti.Value().TermDocs fmt.Println("Creating users") users := topUsers(msgs) fmt.Println("Docs:", termDocs.Len(), "Users:", len(users)) root := Root{Messages: m, Index: termDocs, Users: users} nroot := marshal.MustMarshal(db, root) if ds.HasHead() { left := ds.HeadValue() parent := marshal.MustMarshal(db, Root{ Index: types.NewMap(db), Messages: types.NewMap(db), }) fmt.Println("Merging data") nroot, err = merge.ThreeWay(left, nroot, parent, db, nil, nil) fmt.Println("Merging complete") d.Chk.NoError(err) } fmt.Println("Committing data") _, err = db.CommitValue(ds, nroot) return err } func extractDialog(n *html.Node) { if c := characterName(n); c != "" { //fmt.Println("Character:", character) character = c return } if character != "" && n.Type == html.TextNode { //fmt.Println("Dialog:", strings.TrimSpace(n.Data)) msg := Message{ Ordinal: uint64(len(msgs)), Author: character, Body: strings.TrimSpace(n.Data), ClientTime: datetime.Now(), } msgs = append(msgs, msg) character = "" } for c := n.FirstChild; c != nil; c = c.NextSibling { extractDialog(c) } } func characterName(n *html.Node) string { if n.Type != html.ElementNode || n.Data != "b" || n.FirstChild == nil { return "" } if hasSpaces, _ := regexp.MatchString(`^\s+[^\s]`, n.FirstChild.Data); !hasSpaces { return "" } return strings.TrimSpace(n.FirstChild.Data) } type cpair struct { character string cnt int } func topUsers(msgs []Message) []string { userpat := regexp.MustCompile(`^[a-zA-Z][a-zA-Z\s]*\d*$`) usermap := map[string]int{} for _, msg := range msgs { name := strings.TrimSpace(msg.Author) if userpat.MatchString(name) { usermap[name] += 1 } } pairs := []cpair{} for name, cnt := range usermap { if len(name) > 1 && !strings.HasPrefix(name, "ANOTHER") { pairs = append(pairs, cpair{character: strings.ToLower(name), cnt: cnt}) } } // sort descending by cnt sort.Slice(pairs, func(i, j int) bool { return pairs[j].cnt < pairs[i].cnt }) users := []string{} for i, p := range pairs { if i >= 30 { break } users = append(users, p.character) } sort.Strings(users) return users } ================================================ FILE: samples/go/decent/lib/logger.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "fmt" "log" "os" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/samples/go/decent/dbg" ) func NewLogger(username string) *log.Logger { f, err := os.OpenFile(dbg.Filepath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644) d.PanicIfError(err) prefix := fmt.Sprintf("%d-%s: ", os.Getpid(), username) return log.New(f, prefix, 0644) } ================================================ FILE: samples/go/decent/lib/model.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "fmt" "regexp" "strings" "time" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/datetime" "github.com/attic-labs/noms/samples/go/decent/dbg" ) type Root struct { // Map // Keys are strings like: , // This scheme allows: // - map is naturally sorted in the right order // - conflicts will generally be avoided // - messages are editable Messages types.Map Index types.Map Users []string `noms:",set"` } type Message struct { Ordinal uint64 Author string Body string ClientTime datetime.DateTime } func (m Message) ID() string { return fmt.Sprintf("%020x/%s", m.ClientTime.UnixNano(), m.Author) } func AddMessage(body string, author string, clientTime time.Time, ds datas.Dataset) (datas.Dataset, error) { defer dbg.BoxF("AddMessage, body: %s", body)() root, err := getRoot(ds) if err != nil { return datas.Dataset{}, err } db := ds.Database() nm := Message{ Author: author, Body: body, ClientTime: datetime.DateTime{clientTime}, Ordinal: root.Messages.Len(), } root.Messages = root.Messages.Edit().Set(types.String(nm.ID()), marshal.MustMarshal(db, nm)).Map() IndexNewMessage(db, &root, nm) newRoot := marshal.MustMarshal(db, root) ds, err = db.CommitValue(ds, newRoot) return ds, err } func InitDatabase(ds datas.Dataset) (datas.Dataset, error) { if ds.HasHead() { return ds, nil } db := ds.Database() root := Root{ Index: types.NewMap(db), Messages: types.NewMap(db), } return db.CommitValue(ds, marshal.MustMarshal(db, root)) } func GetAuthors(ds datas.Dataset) []string { r, err := getRoot(ds) d.PanicIfError(err) return r.Users } func IndexNewMessage(vrw types.ValueReadWriter, root *Root, m Message) { defer dbg.BoxF("IndexNewMessage")() ti := NewTermIndex(vrw, root.Index) id := types.String(m.ID()) root.Index = ti.Edit().InsertAll(GetTerms(m), id).Value().TermDocs root.Users = append(root.Users, m.Author) } func SearchIndex(ds datas.Dataset, search []string) types.Map { root, err := getRoot(ds) d.PanicIfError(err) idx := root.Index ti := NewTermIndex(ds.Database(), idx) ids := ti.Search(search) dbg.Debug("search for: %s, returned: %d", strings.Join(search, " "), ids.Len()) return ids } var ( punctPat = regexp.MustCompile("[[:punct:]]+") wsPat = regexp.MustCompile("\\s+") ) func TermsFromString(s string) []string { s1 := punctPat.ReplaceAllString(strings.TrimSpace(s), " ") terms := wsPat.Split(s1, -1) clean := []string{} for _, t := range terms { if t == "" { continue } clean = append(clean, strings.ToLower(t)) } return clean } func GetTerms(m Message) []string { terms := TermsFromString(m.Body) terms = append(terms, TermsFromString(m.Author)...) return terms } func ListMessages(ds datas.Dataset, searchIds *types.Map, doneChan chan struct{}) (msgMap types.Map, mc chan types.String, err error) { //dbg.Debug("##### listMessages: entered") root, err := getRoot(ds) db := ds.Database() if err != nil { return types.NewMap(db), nil, err } msgMap = root.Messages mc = make(chan types.String) done := false go func() { <-doneChan done = true <-mc //dbg.Debug("##### listMessages: exiting 'done' goroutine") }() go func() { keyMap := msgMap if searchIds != nil { keyMap = *searchIds } i := uint64(0) for ; i < keyMap.Len() && !done; i++ { key, _ := keyMap.At(keyMap.Len() - i - 1) mc <- key.(types.String) } //dbg.Debug("##### listMessages: exiting 'for loop' goroutine, examined: %d", i) close(mc) }() return } func getRoot(ds datas.Dataset) (Root, error) { defer dbg.BoxF("getRoot")() db := ds.Database() root := Root{ Messages: types.NewMap(db), Index: types.NewMap(db), } // TODO: It would be nice if Dataset.MaybeHeadValue() or HeadValue() // would return just , and it would be nil if not there, so you // could chain calls. if !ds.HasHead() { return root, nil } err := marshal.Unmarshal(ds.HeadValue(), &root) if err != nil { return Root{}, err } return root, nil } ================================================ FILE: samples/go/decent/lib/model_test.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "testing" "time" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/util/datetime" "github.com/stretchr/testify/assert" ) func TestBasics(t *testing.T) { a := assert.New(t) db := datas.NewDatabase(chunks.NewMemoryStoreFactory().CreateStore("")) ds := db.GetDataset("foo") ml, err := getAllMessages(ds) a.NoError(err) a.Equal(0, len(ml)) ds, err = AddMessage("body1", "aa", time.Unix(0, 0), ds) a.NoError(err) ml, err = getAllMessages(ds) a.NoError(err) expected := []Message{ Message{ Author: "aa", Body: "body1", ClientTime: datetime.DateTime{time.Unix(0, 0)}, Ordinal: 0, }, } a.Equal(expected, ml) ds, err = AddMessage("body2", "bob", time.Unix(1, 0), ds) a.NoError(err) ml, err = getAllMessages(ds) expected = append( []Message{ Message{ Author: "bob", Body: "body2", ClientTime: datetime.DateTime{time.Unix(1, 0)}, Ordinal: 1, }, }, expected..., ) a.NoError(err) a.Equal(expected, ml) } func getAllMessages(ds datas.Dataset) (r []Message, err error) { doneChan := make(chan struct{}) mm, keys, _ := ListMessages(ds, nil, doneChan) for k := range keys { mv := mm.Get(k) var m Message marshal.MustUnmarshal(mv, &m) r = append(r, m) } doneChan <- struct{}{} return r, nil } ================================================ FILE: samples/go/decent/lib/pubsub.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "context" "encoding/json" "sync" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/samples/go/decent/dbg" "github.com/ipfs/go-ipfs/core" "github.com/mr-tron/base58/base58" ) var ( PubsubUser = "default" seenHash = map[hash.Hash]bool{} seenHashMutex = sync.Mutex{} ) func lockSeenF() func() { seenHashMutex.Lock() return func() { seenHashMutex.Unlock() } } // RecieveMessages listens for messages sent by other chat nodes. It filters out // any msgs that have already been received and adds events to teh events channel // for any msgs that it hasn't seen yet. func ReceiveMessages(node *core.IpfsNode, events chan ChatEvent, cInfo ClientInfo) { sub, err := node.Floodsub.Subscribe(cInfo.Topic) d.Chk.NoError(err) listenForAndHandleMessage := func() { msg, err := sub.Next(context.Background()) d.PanicIfError(err) sender := base58.Encode(msg.From) msgMap := map[string]string{} err = json.Unmarshal(msg.Data, &msgMap) if err != nil { dbg.Debug("ReceiveMessages: received non-json msg: %s from: %s, error: %s", msg.Data, sender, err) return } msgData := msgMap["data"] h, err := cInfo.Delegate.HashFromMsgData(msgData) if err != nil { dbg.Debug("ReceiveMessages: received unknown msg: %s from: %s", msgData, sender) return } defer lockSeenF()() if !seenHash[h] { events <- ChatEvent{EventType: SyncEvent, Event: msgData} seenHash[h] = true dbg.Debug("got msgData: %s from: %s(%s)", msgData, sender, msgMap["user"]) } } dbg.Debug("start listening for msgs on channel: %s", cInfo.Topic) for { listenForAndHandleMessage() } panic("unreachable") } // Publish asks the delegate to format a hash/ClientInfo into a suitable msg // and publishes that using IPFS pubsub. func Publish(node *core.IpfsNode, cInfo ClientInfo, h hash.Hash) { defer func() { if r := recover(); r != nil { dbg.Debug("Publish failed, error: %s", r) } }() msgData := cInfo.Delegate.GenMessageData(cInfo, h) m, err := json.Marshal(map[string]string{"user": cInfo.Username, "data": msgData}) if err != nil { } d.PanicIfError(err) dbg.Debug("publishing to topic: %s, msg: %s", cInfo.Topic, m) node.Floodsub.Publish(cInfo.Topic, append(m, []byte("\r\n")...)) defer lockSeenF()() seenHash[h] = true } ================================================ FILE: samples/go/decent/lib/term_index.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "sync" "github.com/attic-labs/noms/go/types" ) type TermIndex struct { TermDocs types.Map vrw types.ValueReadWriter } func NewTermIndex(vrw types.ValueReadWriter, TermDocs types.Map) TermIndex { return TermIndex{TermDocs, vrw} } func (ti TermIndex) Edit() *TermIndexEditor { return &TermIndexEditor{ti.TermDocs.Edit(), ti.vrw} } func (ti TermIndex) Search(terms []string) types.Map { seen := make(map[string]struct{}, len(terms)) iters := make([]types.SetIterator, 0, len(terms)) wg := sync.WaitGroup{} idx := 0 for _, t := range terms { if _, ok := seen[t]; ok { continue } seen[t] = struct{}{} iters = append(iters, nil) i := idx t := t wg.Add(1) go func() { ts := ti.TermDocs.Get(types.String(t)) if ts != nil { iter := ts.(types.Set).Iterator() iters[i] = iter } wg.Done() }() idx++ } wg.Wait() var si types.SetIterator for _, iter := range iters { if iter == nil { return types.NewMap(ti.vrw) // at least one term had no hits } if si == nil { si = iter // first iter continue } si = types.NewIntersectionIterator(si, iter) } ch := make(chan types.Value) rch := types.NewStreamingMap(ti.vrw, ch) for next := si.Next(); next != nil; next = si.Next() { ch <- next ch <- types.Bool(true) } close(ch) return <-rch } type TermIndexEditor struct { terms *types.MapEditor vrw types.ValueReadWriter } // Builds a new TermIndex func (te *TermIndexEditor) Value() TermIndex { return TermIndex{te.terms.Map(), te.vrw} } // Indexes |v| by |term| func (te *TermIndexEditor) Insert(term string, v types.Value) *TermIndexEditor { tv := types.String(term) hitSet := te.terms.Get(tv) if hitSet == nil { hitSet = types.NewSet(te.vrw) } hsEd, ok := hitSet.(*types.SetEditor) if !ok { hsEd = hitSet.(types.Set).Edit() te.terms.Set(tv, hsEd) } hsEd.Insert(v) return te } // Indexes |v| by each unique term in |terms| (tolerates duplicate terms) func (te *TermIndexEditor) InsertAll(terms []string, v types.Value) *TermIndexEditor { visited := map[string]struct{}{} for _, term := range terms { if _, ok := visited[term]; ok { continue } visited[term] = struct{}{} te.Insert(term, v) } return te } // TODO: te.Remove ================================================ FILE: samples/go/decent/lib/term_index_test.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "strings" "testing" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) func TestRun(t *testing.T) { a := assert.New(t) storage := &chunks.MemoryStorage{} vs := types.NewValueStore(storage.NewView()) defer vs.Close() docs := []struct { terms string id int }{ {"foo bar baz", 1}, {"foo baz", 2}, {"baz bat boo", 3}, } indexEditor := NewTermIndex(vs, types.NewMap(vs)).Edit() for _, doc := range docs { indexEditor.InsertAll(strings.Split(doc.terms, " "), types.Number(doc.id)) } index := indexEditor.Value() getMap := func(keys ...int) types.Map { m := types.NewMap(vs).Edit() for _, k := range keys { m.Set(types.Number(k), types.Bool(true)) } return m.Map() } test := func(search string, expect types.Map) { actual := index.Search(strings.Split(search, " ")) a.True(expect.Equals(actual)) } test("foo", getMap(1, 2)) test("baz", getMap(1, 2, 3)) test("bar baz", getMap(1)) test("boo", getMap(3)) test("blarg", getMap()) } ================================================ FILE: samples/go/decent/lib/termui.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package lib import ( "fmt" "regexp" "runtime" "strings" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/math" "github.com/attic-labs/noms/samples/go/decent/dbg" "github.com/jroimartin/gocui" ) const ( allViews = "" usersView = "users" messageView = "messages" inputView = "input" linestofetch = 50 searchPrefix = "/s" quitPrefix = "/q" ) type TermUI struct { Gui *gocui.Gui InSearch bool lines []string dp *dataPager } var ( viewNames = []string{usersView, messageView, inputView} firstLayout = true ) func CreateTermUI(events chan ChatEvent) *TermUI { g, err := gocui.NewGui(gocui.Output256) d.PanicIfError(err) g.Highlight = true g.SelFgColor = gocui.ColorGreen g.Cursor = true relayout := func(g *gocui.Gui) error { return layout(g) } g.SetManagerFunc(relayout) termUI := new(TermUI) termUI.Gui = g d.PanicIfError(g.SetKeybinding(allViews, gocui.KeyF1, gocui.ModNone, debugInfo(termUI))) d.PanicIfError(g.SetKeybinding(allViews, gocui.KeyCtrlC, gocui.ModNone, quit)) d.PanicIfError(g.SetKeybinding(allViews, gocui.KeyCtrlC, gocui.ModAlt, quitWithStack)) d.PanicIfError(g.SetKeybinding(allViews, gocui.KeyTab, gocui.ModNone, nextView)) d.PanicIfError(g.SetKeybinding(messageView, gocui.KeyArrowUp, gocui.ModNone, arrowUp(termUI))) d.PanicIfError(g.SetKeybinding(messageView, gocui.KeyArrowDown, gocui.ModNone, arrowDown(termUI))) d.PanicIfError(g.SetKeybinding(inputView, gocui.KeyEnter, gocui.ModNone, func(g *gocui.Gui, v *gocui.View) (err error) { defer func() { v.Clear() v.SetCursor(0, 0) msgView, err := g.View(messageView) d.PanicIfError(err) msgView.Title = "messages" msgView.Autoscroll = true }() buf := strings.TrimSpace(v.Buffer()) if strings.HasPrefix(buf, searchPrefix) { events <- ChatEvent{EventType: SearchEvent, Event: strings.TrimSpace(buf[len(searchPrefix):])} return } if strings.HasPrefix(buf, quitPrefix) { err = gocui.ErrQuit return } events <- ChatEvent{EventType: InputEvent, Event: buf} return })) return termUI } func (t *TermUI) Close() { dbg.Debug("Closing gui") t.Gui.Close() } func (t *TermUI) UpdateMessagesFromSync(ds datas.Dataset) { if t.InSearch || !t.textScrolledToEnd() { t.Gui.Execute(func(g *gocui.Gui) (err error) { updateViewTitle(g, messageView, "messages (NEW!)") return }) } else { t.UpdateMessagesAsync(ds, nil, nil) } } func (t *TermUI) Layout() error { return layout(t.Gui) } func layout(g *gocui.Gui) error { maxX, maxY := g.Size() if v, err := g.SetView(usersView, 0, 0, 25, maxY-1); err != nil { if err != gocui.ErrUnknownView { return err } v.Title = usersView v.Wrap = false v.Editable = false } if v, err := g.SetView(messageView, 25, 0, maxX-1, maxY-2-1); err != nil { if err != gocui.ErrUnknownView { return err } v.Title = messageView v.Editable = false v.Wrap = true v.Autoscroll = true return nil } if v, err := g.SetView(inputView, 25, maxY-2-1, maxX-1, maxY-1); err != nil { if err != gocui.ErrUnknownView { return err } v.Wrap = true v.Editable = true v.Autoscroll = true } if firstLayout { firstLayout = false g.SetCurrentView(inputView) dbg.Debug("started up") } return nil } func (t *TermUI) UpdateMessages(ds datas.Dataset, filterIds *types.Map, terms []string) error { defer dbg.BoxF("updateMessages")() t.ResetAuthors(ds) v, err := t.Gui.View(messageView) d.PanicIfError(err) v.Clear() t.lines = []string{} v.SetOrigin(0, 0) _, winHeight := v.Size() if t.dp != nil { t.dp.Close() } doneChan := make(chan struct{}) msgMap, msgKeyChan, err := ListMessages(ds, filterIds, doneChan) d.PanicIfError(err) t.dp = NewDataPager(ds, msgKeyChan, doneChan, msgMap, terms) t.lines, _ = t.dp.Prepend(t.lines, math.MaxInt(linestofetch, winHeight+10)) for _, s := range t.lines { fmt.Fprintf(v, "%s\n", s) } return nil } func (t *TermUI) ResetAuthors(ds datas.Dataset) { v, err := t.Gui.View(usersView) d.PanicIfError(err) v.Clear() for _, u := range GetAuthors(ds) { fmt.Fprintln(v, u) } } func (t *TermUI) UpdateMessagesAsync(ds datas.Dataset, sids *types.Map, terms []string) { t.Gui.Execute(func(_ *gocui.Gui) error { err := t.UpdateMessages(ds, sids, terms) d.PanicIfError(err) return nil }) } func (t *TermUI) scrollView(v *gocui.View, dy int) { // Get the size and position of the view. lineCnt := len(t.lines) _, windowHeight := v.Size() ox, oy := v.Origin() cx, cy := v.Cursor() // maxCy will either be the height of the screen - 1, or in the case that // the there aren't enough lines to fill the screen, it will be the // lineCnt - origin newCy := cy + dy maxCy := math.MinInt(lineCnt-oy, windowHeight-1) // If the newCy doesn't require scrolling, then just move the cursor. if newCy >= 0 && newCy < maxCy { v.MoveCursor(cx, dy, false) return } // If the cursor is already at the bottom of the screen and there are no // lines left to scroll up, then we're at the bottom. if newCy >= maxCy && oy >= lineCnt-windowHeight { // Set autoscroll to normal again. v.Autoscroll = true } else { // The cursor is already at the bottom or top of the screen so scroll // the text v.Autoscroll = false v.SetOrigin(ox, oy+dy) } } func quit(_ *gocui.Gui, _ *gocui.View) error { dbg.Debug("QUITTING #####") return gocui.ErrQuit } func quitWithStack(_ *gocui.Gui, _ *gocui.View) error { dbg.Debug("QUITTING WITH STACK") stacktrace := make([]byte, 1024*1024) length := runtime.Stack(stacktrace, true) dbg.Debug(string(stacktrace[:length])) return gocui.ErrQuit } func arrowUp(t *TermUI) func(*gocui.Gui, *gocui.View) error { return func(_ *gocui.Gui, v *gocui.View) error { lineCnt := len(t.lines) ox, oy := v.Origin() if oy == 0 { var ok bool t.lines, ok = t.dp.Prepend(t.lines, linestofetch) if ok { v.Clear() for _, s := range t.lines { fmt.Fprintf(v, "%s\n", s) } c1 := len(t.lines) v.SetOrigin(ox, c1-lineCnt) } } t.scrollView(v, -1) return nil } } func arrowDown(t *TermUI) func(*gocui.Gui, *gocui.View) error { return func(_ *gocui.Gui, v *gocui.View) error { t.scrollView(v, 1) return nil } } func debugInfo(t *TermUI) func(*gocui.Gui, *gocui.View) error { return func(g *gocui.Gui, _ *gocui.View) error { msgView, _ := g.View(messageView) w, h := msgView.Size() dbg.Debug("info, window size:(%d, %d), lineCnt: %d", w, h, len(t.lines)) cx, cy := msgView.Cursor() ox, oy := msgView.Origin() dbg.Debug("info, origin: (%d,%d), cursor: (%d,%d)", ox, oy, cx, cy) dbg.Debug("info, view buffer:\n%s", highlightTerms(viewBuffer(msgView), t.dp.terms)) return nil } } func viewBuffer(v *gocui.View) string { buf := strings.TrimSpace(v.ViewBuffer()) if len(buf) > 0 && buf[len(buf)-1] != byte('\n') { buf = buf + "\n" } return buf } func nextView(g *gocui.Gui, v *gocui.View) (err error) { nextName := nextViewName(v.Name()) if _, err = g.SetCurrentView(nextName); err != nil { return } _, err = g.SetViewOnTop(nextName) return } func nextViewName(currentView string) string { for i, viewname := range viewNames { if currentView == viewname { return viewNames[(i+1)%len(viewNames)] } } return viewNames[0] } func (t *TermUI) textScrolledToEnd() bool { v, err := t.Gui.View(messageView) if err != nil { // doubt this will ever happen, if it does just assume we're at bottom return true } _, oy := v.Origin() _, h := v.Size() lc := len(t.lines) dbg.Debug("textScrolledToEnd, oy: %d, h: %d, lc: %d, lc-oy: %d, res: %t", oy, h, lc, lc-oy, lc-oy <= h) return lc-oy <= h } func updateViewTitle(g *gocui.Gui, viewname, title string) (err error) { v, err := g.View(viewname) if err != nil { return } v.Title = title return } var bgColors, fgColors = genColors() func genColors() ([]string, []string) { bg, fg := []string{}, []string{} for i := 1; i <= 9; i++ { // skip dark blue & white if i != 4 && i != 7 { bg = append(bg, fmt.Sprintf("\x1b[48;5;%dm\x1b[30m%%s\x1b[0m", i)) fg = append(fg, fmt.Sprintf("\x1b[38;5;%dm%%s\x1b[0m", i)) } } return bg, fg } func colorTerm(color int, s string, background bool) string { c := fgColors[color] if background { c = bgColors[color] } return fmt.Sprintf(c, s) } func highlightTerms(s string, terms []string) string { for i, t := range terms { color := i % len(fgColors) re := regexp.MustCompile(fmt.Sprintf("(?i)%s", regexp.QuoteMeta(t))) s = re.ReplaceAllStringFunc(s, func(s string) string { return colorTerm(color, s, false) }) } return s } ================================================ FILE: samples/go/decent/p2p-chat/README.md ================================================ This demo application is the simplest p2p chat app you could build using Noms. Basic idea: - Every node runs a Noms HTTP server (port controlled by --port) flag - Every node broadcasts its current commit and IP/port continuously - Every node continuously sync/merges with every other node (note that due to content addressing, most of these syncs will immediately exit) ================================================ FILE: samples/go/decent/p2p-chat/main.go ================================================ // See: https://github.com/attic-labs/noms/issues/3808 // +build ignore // Copyright 2017 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "fmt" "log" "net" "os" "os/signal" "path" "syscall" "github.com/attic-labs/noms/go/config" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/ipfs" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/util/profile" "github.com/attic-labs/noms/samples/go/decent/dbg" "github.com/attic-labs/noms/samples/go/decent/lib" "github.com/jroimartin/gocui" kingpin "gopkg.in/alecthomas/kingpin.v2" ) func main() { // allow short (-h) help kingpin.CommandLine.HelpFlag.Short('h') clientCmd := kingpin.Command("client", "runs the ipfs-chat client UI") clientTopic := clientCmd.Flag("topic", "IPFS pubsub topic to publish and subscribe to").Default("noms-chat-p2p").String() username := clientCmd.Flag("username", "username to sign in as").Required().String() nodeIdx := clientCmd.Flag("node-idx", "a single digit to be used as last digit in all port values: api, gateway and swarm (must be 0-9 inclusive)").Default("-1").Int() clientDir := clientCmd.Arg("path", "local directory to store data in").Required().ExistingDir() importCmd := kingpin.Command("import", "imports data into a chat") importSrc := importCmd.Flag("dir", "directory that contains data to import").Default("../data").ExistingDir() importDir := importCmd.Arg("path", "local directory to store data in").Required().ExistingDir() kingpin.CommandLine.Help = "A demonstration of using Noms to build a scalable multiuser collaborative application." switch kingpin.Parse() { case "client": cInfo := lib.ClientInfo{ Topic: *clientTopic, Username: *username, Idx: *nodeIdx, IsDaemon: false, Dir: *clientDir, Delegate: lib.P2PEventDelegate{}, } runClient(cInfo) case "import": err := lib.RunImport(*importSrc, fmt.Sprintf("%s/noms::chat", *importDir)) d.PanicIfError(err) } } func runClient(cInfo lib.ClientInfo) { dbg.SetLogger(lib.NewLogger(cInfo.Username)) var err error httpPort := 8000 + cInfo.Idx sp, err := spec.ForDatabase(fmt.Sprintf("http://%s:%d", getIP(), httpPort)) d.PanicIfError(err) cInfo.Spec = sp <-runServer(path.Join(cInfo.Dir, "noms"), httpPort) db := cInfo.Spec.GetDatabase() ds := db.GetDataset("chat") ds, err = lib.InitDatabase(ds) d.PanicIfError(err) node := ipfs.OpenIPFSRepo(path.Join(cInfo.Dir, "ipfs"), cInfo.Idx) events := make(chan lib.ChatEvent, 1024) t := lib.CreateTermUI(events) defer t.Close() d.PanicIfError(t.Layout()) t.ResetAuthors(ds) t.UpdateMessages(ds, nil, nil) go lib.ProcessChatEvents(node, ds, events, t, cInfo) go lib.ReceiveMessages(node, events, cInfo) if err := t.Gui.MainLoop(); err != nil && err != gocui.ErrQuit { dbg.Debug("mainloop has exited, err:", err) log.Panicln(err) } } func getIP() string { ifaces, err := net.Interfaces() d.PanicIfError(err) for _, i := range ifaces { addrs, err := i.Addrs() d.PanicIfError(err) for _, addr := range addrs { switch v := addr.(type) { case *net.IPNet: if !v.IP.IsLoopback() { ip := v.IP.To4() if ip != nil { return v.IP.String() } } } } } d.Panic("notreached") return "" } func runServer(atPath string, port int) (ready chan struct{}) { ready = make(chan struct{}) _ = os.Mkdir(atPath, 0755) cfg := config.NewResolver() cs, err := cfg.GetChunkStore(atPath) d.CheckError(err) server := datas.NewRemoteDatabaseServer(cs, port) server.Ready = func() { ready <- struct{}{} } // Shutdown server gracefully so that profile may be written c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt) signal.Notify(c, syscall.SIGTERM) go func() { <-c server.Stop() }() go func() { d.Try(func() { defer profile.MaybeStartProfile().Stop() server.Run() }) }() return } ================================================ FILE: samples/go/nomdex/Readme.md ================================================ # Nomdex Nomdex demonstrates how Noms maps can be used to index values in a database and provides a simple query language to search for objects. ## Description This program experiments with using ordinary Noms Maps as indexes. It leverages the fact that Maps in Noms are implemented by prolly-trees which are similar to B-Trees in many important ways that make them ideal for use as indexes. They are balanced, sorted, require relatively few accesses to find any leaf node and efficient to update. ###Building Indexes Nomdex constructs indexes as Maps that are keyed by either Strings or Numbers. The values in the index are sets of objects. The following command can be used to build an index: ```shell nomdex up --in-path --by --out-ds ``` The ***'in-path'*** argument must be a ValueSpec(see [Spelling In Noms](../../../doc/spelling.md#spelling-values)) that designates the root of an object hierarchy to be scanned for "indexable" objects. The ***'by'*** argument must be a relative path. Nomdex traverses every value reachable from 'in-path' and attempts to resolve this relative ***'by'*** path from it. Any value that has a String, Number, or Bool index using the relative attribute as it's key. The ***'out-ds'*** argument specifies a dataset name that will be used to store the new index. In addition, there are arguments that allow values to be transformed before using them as keys in the index by applying regex expressions functions. Consult to the help text and code to see how those can be used. ### Queries in Nomdex Once an index is built, it can be queried against using the nomdex find command. For example, given a database that contains structs of the following type representing cities: ```go struct Row { City: String, State: String, GeoPos: struct { Latitude: Number, Longitude: Number, } } ``` The following commands could be used to build indexes on the City, State, Latitude and Longitude attibutes. ```shell nomdex up --in-path http://localhost:8000::cities --by .City --out-ds by-name nomdex up --in-path http://localhost:8000::cities --by .State --out-ds by-state nomdex up --in-path http://localhost:8000::cities --by .GeoPos.Latitude --out-ds by-lat nomdex up --in-path http://localhost:8000::cities --by .GeoPos.Longitude --out-ds by-lon ``` Once these indexes are created, the following queries could be made using the find command: ```shell // find all cities in California nomdex find --db http://localhost:8000 'by-state = "California"' // find all cities whose name begins with A, B, or C nomdex find --db http://localhost:8000 'by-name >= "A" and by-name < "D"' // Find all tropical cities whose name begins with A, B, or C nomdex find --db http://localhost:8000 '(by-name >= "A" and by-name < "D") and (by-lat >= -23.5 and by-lat <= 23.5) ``` The nomdex query language is simple, it consists of comparison expressions which take the form of '*indexName comparisonOperator constantValue*'. Index names are the dataset given as the ***'out-ds'*** argument to the *build* command. Comparison operators can be one of: <, <=, >, >=, =, !=. Constants are either String values which are quoted: "hi, I'm a string constant", and Numbers which consist of digits and an optional decimal point and minus sign: 1, -1, 2.3, -3.2. In addition, comparison expressions can be combined using "and" and "or". Parenthesis can, and should be used to express the order that evaluation should take place. Note: nomdex is not a complete query system. It's purpose is only to illustrate the fact that Noms maps have all the necessary properties to be used as indexes. A complete query system would have many additional features and the ability to optimize queries in an intelligent way. ================================================ FILE: samples/go/nomdex/expr.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "bytes" "fmt" "io" "sort" "github.com/attic-labs/noms/go/types" ) type expr interface { ranges() queryRangeSlice dbgPrintTree(w io.Writer, level int) indexName() string iterator(im *indexManager) types.SetIterator } // logExpr represents a logical 'and' or 'or' expression between two other expressions. // e.g. logExpr would represent the and/or expressions in this query: // (index1 > 0 and index1 < 9) or (index1 > 100 and index < 109) type logExpr struct { op boolOp expr1 expr expr2 expr idxName string } type compExpr struct { idxName string op compOp v1 types.Value } func (le logExpr) indexName() string { return le.idxName } func (le logExpr) iterator(im *indexManager) types.SetIterator { if le.idxName != "" { return unionizeIters(iteratorsFromRanges(im.indexes[le.idxName], le.ranges())) } i1 := le.expr1.iterator(im) i2 := le.expr2.iterator(im) var iter types.SetIterator switch le.op { case and: if i1 == nil || i2 == nil { return nil } iter = types.NewIntersectionIterator(le.expr1.iterator(im), le.expr2.iterator(im)) case or: if i1 == nil { return i2 } if i2 == nil { return i1 } iter = types.NewUnionIterator(le.expr1.iterator(im), le.expr2.iterator(im)) } return iter } func (le logExpr) ranges() (ranges queryRangeSlice) { rslice1 := le.expr1.ranges() rslice2 := le.expr2.ranges() rslice := queryRangeSlice{} switch le.op { case and: if len(rslice1) == 0 || len(rslice2) == 0 { return rslice } for _, r1 := range rslice1 { for _, r2 := range rslice2 { rslice = append(rslice, r1.and(r2)...) } } sort.Sort(rslice) return rslice case or: if len(rslice1) == 0 { return rslice2 } if len(rslice2) == 0 { return rslice1 } for _, r1 := range rslice1 { for _, r2 := range rslice2 { rslice = append(rslice, r1.or(r2)...) } } sort.Sort(rslice) return rslice } return queryRangeSlice{} } func (le logExpr) dbgPrintTree(w io.Writer, level int) { fmt.Fprintf(w, "%*s%s\n", 2*level, "", le.op) if le.expr1 != nil { le.expr1.dbgPrintTree(w, level+1) } if le.expr2 != nil { le.expr2.dbgPrintTree(w, level+1) } } func (re compExpr) indexName() string { return re.idxName } func iteratorsFromRange(index types.Map, rd queryRange) []types.SetIterator { first := true iterators := []types.SetIterator{} index.IterFrom(rd.lower.value, func(k, v types.Value) bool { if first && rd.lower.value != nil && !rd.lower.include && rd.lower.value.Equals(k) { return false } if rd.upper.value != nil { if !rd.upper.include && rd.upper.value.Equals(k) { return true } if rd.upper.value.Less(k) { return true } } s := v.(types.Set) iterators = append(iterators, s.Iterator()) return false }) return iterators } func iteratorsFromRanges(index types.Map, ranges queryRangeSlice) []types.SetIterator { iterators := []types.SetIterator{} for _, r := range ranges { iterators = append(iterators, iteratorsFromRange(index, r)...) } return iterators } func unionizeIters(iters []types.SetIterator) types.SetIterator { if len(iters) == 0 { return nil } if len(iters) <= 1 { return iters[0] } unionIters := []types.SetIterator{} var iter0 types.SetIterator for i, iter := range iters { if i%2 == 0 { iter0 = iter } else { unionIters = append(unionIters, types.NewUnionIterator(iter0, iter)) iter0 = nil } } if iter0 != nil { unionIters = append(unionIters, iter0) } return unionizeIters(unionIters) } func (re compExpr) iterator(im *indexManager) types.SetIterator { index := im.indexes[re.idxName] iters := iteratorsFromRanges(index, re.ranges()) return unionizeIters(iters) } func (re compExpr) ranges() (ranges queryRangeSlice) { var r queryRange switch re.op { case equals: e := bound{value: re.v1, include: true} r = queryRange{lower: e, upper: e} case gt: r = queryRange{lower: bound{re.v1, false, 0}, upper: bound{nil, true, 1}} case gte: r = queryRange{lower: bound{re.v1, true, 0}, upper: bound{nil, true, 1}} case lt: r = queryRange{lower: bound{nil, true, -1}, upper: bound{re.v1, false, 0}} case lte: r = queryRange{lower: bound{nil, true, -1}, upper: bound{re.v1, true, 0}} case ne: return queryRangeSlice{ {lower: bound{nil, true, -1}, upper: bound{re.v1, false, 0}}, {lower: bound{re.v1, false, 0}, upper: bound{nil, true, 1}}, } } return queryRangeSlice{r} } func (re compExpr) dbgPrintTree(w io.Writer, level int) { buf := bytes.Buffer{} types.WriteEncodedValue(&buf, re.v1) fmt.Fprintf(w, "%*s%s %s %s\n", 2*level, "", re.idxName, re.op, buf.String()) } ================================================ FILE: samples/go/nomdex/nomdex.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "fmt" "os" "github.com/attic-labs/kingpin" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/util/profile" "github.com/attic-labs/noms/go/util/verbose" ) func main() { registerUpdate() registerFind() verbose.RegisterVerboseFlags(kingpin.CommandLine) profile.RegisterProfileFlags(kingpin.CommandLine) switch kingpin.Parse() { case "up": runUpdate() case "find": runFind() } } func printError(err error, msgAndArgs ...interface{}) bool { if err != nil { err := d.Unwrap(err) switch len(msgAndArgs) { case 0: fmt.Fprintf(os.Stderr, "error: %s\n", err) case 1: fmt.Fprintf(os.Stderr, "%s%s\n", msgAndArgs[0], err) default: format, ok := msgAndArgs[0].(string) if ok { s1 := fmt.Sprintf(format, msgAndArgs[1:]...) fmt.Fprintf(os.Stderr, "%s%s\n", s1, err) } else { fmt.Fprintf(os.Stderr, "error: %s\n", err) } } } return err != nil } ================================================ FILE: samples/go/nomdex/nomdex_find.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "fmt" "io" "github.com/attic-labs/kingpin" "github.com/attic-labs/noms/go/config" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/outputpager" ) var longFindHelp = `'nomdex find' retrieves and prints objects that satisfy the 'query' argument. Indexes are built using the 'nomdex up' command. For information about building indexes, see: nomdex up -h Objects that have been indexed can be quickly found using the nomdex query language. For example, consider objects with the following type: struct Person { name String, geopos struct GeoPos { latitude Number, longitude Number, } } Objects of this type can be indexed on the name, latitude and longitude fields with the following commands: nomdex up --in-path ~/nomsdb::people.value --by .name --out-ds by-name nomdex up --in-path ~/nomsdb::people.value --by .geopos.latitude --out-ds by-lat nomdex up --in-path ~/nomsdb::people.value --by .geopos.longitude --out-ds by-lng The following query could be used to find all people with an address near the equator: nomdex find 'by-lat >= -1.0 and by-lat <= 1.0' We could also get a list of all people who live near the equator whose name begins with "A": nomdex find '(by-name >= "A" and by-name < "B") and (by-lat >= -1.0 and by-lat <= 1.0)' The query language is simple. It currently supports the following relational operators: <, <=, >, >=, =, != Relational expressions are always of the form: e.g. personId >= 2000. Indexes are the name given by the --out-ds argument in the 'nomdex up' command. Constants are either "strings" (in quotes) or numbers (e.g. 3, 3000, -2, -2.5, 3.147, etc). Relational expressions can be combined using the "and" and "or" operators. Parentheses can (and should) be used to ensure that the evaluation is done in the desired order. ` var dbPath = "" var query = "" func registerFind() { cmd := kingpin.Command("find", "Search an index") cmd.Flag("db", "Database containing index").Required().StringVar(&dbPath) cmd.Arg("query", "query to evalute").Required().StringVar(&query) outputpager.RegisterOutputpagerFlags(cmd) } func runFind() int { cfg := config.NewResolver() db, err := cfg.GetDatabase(dbPath) if printError(err, "Unable to open database\n\terror: ") { return 1 } defer db.Close() im := &indexManager{db: db, indexes: map[string]types.Map{}} expr, err := parseQuery(query, im) if err != nil { fmt.Printf("err: %s\n", err) return 1 } pgr := outputpager.Start() defer pgr.Stop() iter := expr.iterator(im) cnt := 0 if iter != nil { for v := iter.Next(); v != nil; v = iter.Next() { types.WriteEncodedValue(pgr.Writer, v) fmt.Fprintf(pgr.Writer, "\n") cnt++ } } fmt.Fprintf(pgr.Writer, "Found %d objects\n", cnt) return 0 } func printObjects(w io.Writer, index types.Map, ranges queryRangeSlice) { cnt := 0 first := true printObjectForRange := func(index types.Map, r queryRange) { index.IterFrom(r.lower.value, func(k, v types.Value) bool { if first && r.lower.value != nil && !r.lower.include && r.lower.value.Equals(k) { return false } if r.upper.value != nil { if !r.upper.include && r.upper.value.Equals(k) { return true } if r.upper.value.Less(k) { return true } } s := v.(types.Set) s.IterAll(func(v types.Value) { types.WriteEncodedValue(w, v) fmt.Fprintf(w, "\n") cnt++ }) return false }) } for _, r := range ranges { printObjectForRange(index, r) } fmt.Fprintf(w, "Found %d objects\n", cnt) } func openIndex(idxName string, im *indexManager) error { if _, hasIndex := im.indexes[idxName]; hasIndex { return nil } ds := im.db.GetDataset(idxName) commit, ok := ds.MaybeHead() if !ok { return fmt.Errorf("index '%s' not found", idxName) } index, ok := commit.Get(datas.ValueField).(types.Map) if !ok { return fmt.Errorf("Value of commit at '%s' is not a valid index", idxName) } // Todo: make this type be Map, Set> once Issue #2326 gets resolved and // IsSubtype() returns the correct value. typ := types.MakeMapType( types.MakeUnionType(types.StringType, types.NumberType), types.ValueType) if !types.IsValueSubtypeOf(index, typ) { return fmt.Errorf("%s does not point to a suitable index type:", idxName) } im.indexes[idxName] = index return nil } ================================================ FILE: samples/go/nomdex/nomdex_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "regexp" "testing" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/marshal" "github.com/attic-labs/noms/go/nbs" "github.com/attic-labs/noms/go/spec" "github.com/attic-labs/noms/go/util/clienttest" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) type TestObj struct { Key int Fname string Lname string Gender string Age int } type testSuite struct { clienttest.ClientTestSuite } func TestNomdex(t *testing.T) { suite.Run(t, &testSuite{}) } func makeTestDb(s *testSuite, dsId string) datas.Database { db := datas.NewDatabase(nbs.NewLocalStore(s.DBDir, clienttest.DefaultMemTableSize)) l1 := []TestObj{ {1, "will", "smith", "m", 40}, {2, "lana", "turner", "f", 91}, {3, "john", "wayne", "m", 86}, {4, "johnny", "depp", "m", 50}, {5, "merrill", "streep", "f", 60}, {6, "rob", "courdry", "m", 45}, {7, "bruce", "lee", "m", 72}, {8, "bruce", "willis", "m", 36}, {9, "luis", "bunuel", "m", 100}, {10, "andy", "sandberg", "m", 32}, {11, "walter", "coggins", "m", 28}, {12, "seth", "rogan", "m", 29}, } m1 := map[string]TestObj{ "lg": {13, "lady", "gaga", "f", 39}, "ss": {14, "sam", "smith", "m", 28}, "rp": {15, "robert", "plant", "m", 69}, "ml": {16, "meat", "loaf", "m", 65}, "gf": {17, "glenn", "frey", "m", 60}, "jr": {18, "joey", "ramone", "m", 55}, "rc": {19, "ray", "charles", "m", 72}, "bk": {20, "bb", "king", "m", 77}, "b": {21, "beck", "", "m", 38}, "md": {22, "miles", "davis", "m", 82}, "rd": {23, "roger", "daltry", "m", 62}, "jf": {24, "john", "fogerty", "m", 60}, } m := map[string]interface{}{"actors": l1, "musicians": m1} v, err := marshal.Marshal(db, m) s.NoError(err) _, err = db.CommitValue(db.GetDataset(dsId), v) s.NoError(err) return db } func (s *testSuite) TestNomdex() { dsId := "data" db := makeTestDb(s, dsId) s.NotNil(db) db.Close() fnameIdx := "fname-idx" dataSpec := spec.CreateValueSpecString("nbs", s.DBDir, dsId) dbSpec := spec.CreateDatabaseSpecString("nbs", s.DBDir) stdout, stderr := s.MustRun(main, []string{"up", "--out-ds", fnameIdx, "--in-path", dataSpec, "--by", ".fname"}) s.Contains(stdout, "Indexed 24 objects") s.Equal("", stderr) genderIdx := "gender-idx" stdout, stderr = s.MustRun(main, []string{"up", "--out-ds", genderIdx, "--in-path", dataSpec, "--by", ".gender"}) s.Contains(stdout, "Indexed 24 objects") s.Equal("", stderr) stdout, stderr = s.MustRun(main, []string{"find", "--db", dbSpec, `fname-idx = "lady"`}) s.Contains(stdout, "Found 1 objects") s.Equal("", stderr) stdout, stderr = s.MustRun(main, []string{"find", "--db", dbSpec, `fname-idx = "lady" and gender-idx = "f"`}) s.Contains(stdout, "Found 1 objects") s.Equal("", stderr) stdout, stderr = s.MustRun(main, []string{"find", "--db", dbSpec, `fname-idx != "lady" and gender-idx != "m"`}) s.Contains(stdout, "Found 2 objects") s.Equal("", stderr) stdout, stderr = s.MustRun(main, []string{"find", "--db", dbSpec, `fname-idx != "lady" and fname-idx != "john"`}) s.Contains(stdout, "Found 21 objects") s.Equal("", stderr) stdout, stderr = s.MustRun(main, []string{"find", "--db", dbSpec, `fname-idx != "lady" or gender-idx != "f"`}) s.Contains(stdout, "Found 23 objects") s.Equal("", stderr) } func TestTransform(t *testing.T) { assert := assert.New(t) tcs := [][]string{ []string{`"01/02/2003"`, "\"(\\d{2})/(\\d{2})/(\\d{4})\"", "$3/$2/$1", "2003/02/01"}, } for _, tc := range tcs { base, regex, replace, expected := tc[0], tc[1], tc[2], tc[3] testRe := regexp.MustCompile(regex) result := testRe.ReplaceAllString(base, replace) assert.Equal(expected, result) } tcs = [][]string{ []string{"343 STATE ST\nROCHESTER, NY 14650\n(43.161276, -77.619386)", "43.161276", "-77.619386"}, []string{"TWO EMBARCADERO CENTER\nPROMENADE LEVEL SAN FRANCISCO, CA 94111\n", "", ""}, } findLatRe := regexp.MustCompile("(?s)\\(([\\d.]+)") findLngRe := regexp.MustCompile("(?s)(-?[\\d.]+)\\)") for _, tc := range tcs { base, expectedLat, expectedLng := tc[0], tc[1], tc[2] lat := findLatRe.FindStringSubmatch(base) assert.True(len(lat) == 0 && expectedLat == "" || (len(lat) == 2 && expectedLat == lat[1])) lng := findLngRe.FindStringSubmatch(base) assert.True(len(lng) == 0 && expectedLng == "" || (len(lng) == 2 && expectedLng == lng[1])) } } ================================================ FILE: samples/go/nomdex/nomdex_update.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "fmt" "regexp" "strconv" "sync" "sync/atomic" "github.com/attic-labs/kingpin" humanize "github.com/dustin/go-humanize" "github.com/attic-labs/noms/go/config" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" "github.com/attic-labs/noms/go/util/profile" "github.com/attic-labs/noms/go/util/status" ) var ( inPathArg = "" outDsArg = "" relPathArg = "" txRegexArg = "" txReplaceArg = "" txConvertArg = "" ) var longUpHelp = `'nomdex up' builds indexes that are useful for rapidly accessing objects. This sample tool can index objects based on any string or number attribute of that object. The 'up' command works by scanning all the objects reachable from the --in-path command line argument. It tests the object to determine if there is a string or number value reachable by applying the --by path argument to the object. If so, the object is added to the index under that value. For example, if there are objects in the database that contain a personId and a gender field, 'nomdex up' can scan all the objects in a given dataset and build an index on the specified field with the following commands: nomdex up --in-path .value --by .gender --out-ds gender-index nomdex up --in-path .value --by .address.city --out-ds personId-index The previous commands can be understood as follows. The first command updates or builds an index by scanning all the objects that are reachable from |in-path| that have a string or number value reachable using |by| and stores the root of the resulting index in a dataset specified by |out-ds|. Notice that the --in-path argument has a value of '.value'. The '.value' is not strictly necessary but it's normally useful when indexing. Since datasets generally point to Commit objects in Noms, they usually have parents which are previous versions of the data. If you add .value to the end of the dataset, only the most recent version of the data will be indexed. Without the '.value' all objects in all previous commits will also be indexed which is most often not what is expected. There are three additional commands that can be useful for transforming the value being indexed: * tx-replace: used to modify behavior of tx-regex, see below * tx-regex: the behavior for this argument depends on whether a tx-replace argument is present. If so, the go routine "regexp.ReplaceAllString() is called: txRe := regex.MustCompile(|tx-regex|) txRe.ReplaceAllString(|index value|, |tx-replace| If tx-replace is not present then the following call is made on each value: txRe := regex.MustCompile(|tx-regex|) regex.FindStringSubmatch(|index value|) *tx-convert: attempts to convert the index value to the type specified. Currently the only value accepted for this arg is 'number' The resulting indexes can be used by the 'nomdex find command' for help on that see: nomdex find -h ` func registerUpdate() { cmd := kingpin.Command("up", "Build/update an index.") cmd.Flag("in-path", "a value to search for items to index within").Required().StringVar(&inPathArg) cmd.Flag("out-ds", "name of dataset to save the results to").Required().StringVar(&outDsArg) cmd.Flag("by", "a path relative to all the items in to index by").Required().StringVar(&relPathArg) cmd.Flag("tx-regex", "perform a string transformation on value before putting it in index").StringVar(&txRegexArg) cmd.Flag("tx-replace", "replace values matched by tx-regex").StringVar(&txReplaceArg) cmd.Flag("tx-convert", "convert the result of a tx regex/replace to this type (only does 'number' currently)").StringVar(&txConvertArg) } type StreamingSetEntry struct { valChan chan<- types.Value setChan <-chan types.Set } type IndexMap map[types.Value]StreamingSetEntry type Index struct { m IndexMap indexedCnt int64 seenCnt int64 mutex sync.Mutex } func runUpdate() int { defer profile.MaybeStartProfile().Stop() cfg := config.NewResolver() db, rootObject, err := cfg.GetPath(inPathArg) d.Chk.NoError(err) if rootObject == nil { fmt.Printf("Object not found: %s\n", inPathArg) return 1 } outDs := db.GetDataset(outDsArg) relPath, err := types.ParsePath(relPathArg) if printError(err, "Error parsing -by value\n\t") { return 1 } gb := types.NewGraphBuilder(db, types.MapKind) addElementsToGraphBuilder(gb, db, rootObject, relPath) indexMap := gb.Build().(types.Map) outDs, err = db.Commit(outDs, indexMap, datas.CommitOptions{}) d.Chk.NoError(err) fmt.Printf("Committed index with %d entries to dataset: %s\n", indexMap.Len(), outDsArg) return 0 } func addElementsToGraphBuilder(gb *types.GraphBuilder, db datas.Database, rootObject types.Value, relPath types.Path) { typeCacheMutex := sync.Mutex{} typeCache := map[hash.Hash]bool{} var txRe *regexp.Regexp if txRegexArg != "" { var err error txRe, err = regexp.Compile(txRegexArg) d.CheckError(err) } index := Index{m: IndexMap{}} types.WalkValues(rootObject, db, func(v types.Value) bool { typ := types.TypeOf(v) typeCacheMutex.Lock() hasPath, ok := typeCache[typ.Hash()] typeCacheMutex.Unlock() if !ok || hasPath { pathResolved := false tv := relPath.Resolve(v, db) if tv != nil { index.addToGraphBuilder(gb, tv, v, txRe) pathResolved = true } if !ok { typeCacheMutex.Lock() typeCache[typ.Hash()] = pathResolved typeCacheMutex.Unlock() } } return false }) status.Done() } func (idx *Index) addToGraphBuilder(gb *types.GraphBuilder, k, v types.Value, txRe *regexp.Regexp) { atomic.AddInt64(&idx.seenCnt, 1) if txRe != nil { k1 := types.EncodedValue(k) k2 := "" if txReplaceArg != "" { k2 = txRe.ReplaceAllString(string(k1), txReplaceArg) } else { matches := txRe.FindStringSubmatch(string(k1)) if len(matches) > 0 { k2 = matches[len(matches)-1] } } if txConvertArg == "number" { if k2 == "" { return } n, err := strconv.ParseFloat(k2, 64) if err != nil { fmt.Println("error converting to number: ", err) return } k = types.Number(n) } else { k = types.String(k2) } } atomic.AddInt64(&idx.indexedCnt, 1) gb.SetInsert(types.ValueSlice{k}, v) status.Printf("Found %s objects, Indexed %s objects", humanize.Comma(idx.seenCnt), humanize.Comma(idx.indexedCnt)) } ================================================ FILE: samples/go/nomdex/parser.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "fmt" "strconv" "strings" "text/scanner" "unicode" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" ) /**** Query language BNF query := expr expr := expr boolOp compExpr | group compExpr := indexToken compOp value group := '(' expr ')' | compExpr boolOp := 'and' | 'or' compOp := '=' | '<' | '<=' | '>' | '>=' | != value := "" | number number := '-' digits | digits digits := int | float */ type compOp string type boolOp string type indexManager struct { db datas.Database indexes map[string]types.Map } const ( equals compOp = "=" gt compOp = ">" gte compOp = ">=" lt compOp = "<" lte compOp = "<=" ne compOp = "!=" openP = "(" closeP = ")" and boolOp = "and" or boolOp = "or" ) var ( compOps = []compOp{equals, gt, gte, lt, lte, ne} boolOps = []boolOp{and, or} ) type qScanner struct { s scanner.Scanner peekedToken rune peekedText string peeked bool } func (qs *qScanner) Scan() rune { var r rune if qs.peeked { r = qs.peekedToken qs.peeked = false } else { r = qs.s.Scan() } return r } func (qs *qScanner) Peek() rune { var r rune if !qs.peeked { qs.peekedToken = qs.s.Scan() qs.peekedText = qs.s.TokenText() qs.peeked = true } r = qs.peekedToken return r } func (qs *qScanner) TokenText() string { var text string if qs.peeked { text = qs.peekedText } else { text = qs.s.TokenText() } return text } func (qs *qScanner) Pos() scanner.Position { return qs.s.Pos() } func parseQuery(q string, im *indexManager) (expr, error) { s := NewQueryScanner(q) var expr expr err := d.Try(func() { expr = s.parseExpr(0, im) }) return expr, err } func NewQueryScanner(query string) *qScanner { isIdentRune := func(r rune, i int) bool { identChars := ":/.>=-" startIdentChars := "!><" if i == 0 { return unicode.IsLetter(r) || strings.ContainsRune(startIdentChars, r) } return unicode.IsLetter(r) || unicode.IsDigit(r) || strings.ContainsRune(identChars, r) } errorFunc := func(s *scanner.Scanner, msg string) { d.PanicIfError(fmt.Errorf("%s, pos: %s\n", msg, s.Pos())) } var s scanner.Scanner s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanStrings | scanner.SkipComments s.Init(strings.NewReader(query)) s.IsIdentRune = isIdentRune s.Error = errorFunc qs := qScanner{s: s} return &qs } func (qs *qScanner) parseExpr(level int, im *indexManager) expr { tok := qs.Scan() switch tok { case '(': expr1 := qs.parseExpr(level+1, im) tok := qs.Scan() if tok != ')' { d.PanicIfError(fmt.Errorf("missing ending paren for expr")) } else { tok = qs.Peek() if tok == ')' { return expr1 } tok = qs.Scan() text := qs.TokenText() switch { case tok == scanner.Ident && isBoolOp(text): op := boolOp(text) expr2 := qs.parseExpr(level+1, im) return logExpr{op: op, expr1: expr1, expr2: expr2, idxName: idxNameIfSame(expr1, expr2)} case tok == scanner.EOF: return expr1 default: d.PanicIfError(fmt.Errorf("extra text found at end of expr, tok: %d, text: %s", int(tok), qs.TokenText())) } } case scanner.Ident: err := openIndex(qs.TokenText(), im) d.PanicIfError(err) expr1 := qs.parseCompExpr(level+1, qs.TokenText(), im) tok := qs.Peek() switch tok { case ')': return expr1 case rune(scanner.Ident): _ = qs.Scan() text := qs.TokenText() if isBoolOp(text) { op := boolOp(text) expr2 := qs.parseExpr(level+1, im) return logExpr{op: op, expr1: expr1, expr2: expr2, idxName: idxNameIfSame(expr1, expr2)} } else { d.PanicIfError(fmt.Errorf("expected boolean op, found: %s, level: %d", text, level)) } case rune(scanner.EOF): return expr1 default: _ = qs.Scan() } default: d.PanicIfError(fmt.Errorf("unexpected token in expr: %s, %d", qs.TokenText(), tok)) } return logExpr{} } func (qs *qScanner) parseCompExpr(level int, indexName string, im *indexManager) compExpr { qs.Scan() text := qs.TokenText() if !isCompOp(text) { d.PanicIfError(fmt.Errorf("expected relop token but found: '%s'", text)) } op := compOp(text) value := qs.parseValExpr() return compExpr{indexName, op, value} } func (qs *qScanner) parseValExpr() types.Value { tok := qs.Scan() text := qs.TokenText() isNeg := false if tok == '-' { isNeg = true tok = qs.Scan() text = qs.TokenText() } switch tok { case scanner.String: if isNeg { d.PanicIfError(fmt.Errorf("expected number after '-', found string: %s", text)) } return valueFromString(text) case scanner.Float: f, _ := strconv.ParseFloat(text, 64) if isNeg { f = -f } return types.Number(f) case scanner.Int: i, _ := strconv.ParseInt(text, 10, 64) if isNeg { i = -i } return types.Number(i) } d.PanicIfError(fmt.Errorf("expected value token, found: '%s'", text)) return nil // for compiler } func valueFromString(t string) types.Value { l := len(t) if l < 2 && t[0] == '"' && t[l-1] == '"' { d.PanicIfError(fmt.Errorf("Unable to get value from token: %s", t)) } return types.String(t[1 : l-1]) } func isCompOp(s string) bool { for _, op := range compOps { if s == string(op) { return true } } return false } func isBoolOp(s string) bool { for _, op := range boolOps { if s == string(op) { return true } } return false } func idxNameIfSame(expr1, expr2 expr) string { if expr1.indexName() == expr2.indexName() { return expr1.indexName() } return "" } ================================================ FILE: samples/go/nomdex/parser_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "testing" "text/scanner" "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/datas" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) type scannerResult struct { tok int text string } type parseResult struct { query string ex expr } func TestQueryScanner(t *testing.T) { assert := assert.New(t) s := NewQueryScanner(`9 (99.9) -9 0x7F "99.9" and or http://localhost:8000/cli-tour::yo <= >= < > = _ !=`) scannerResults := []scannerResult{ {tok: scanner.Int, text: "9"}, {tok: int('('), text: "("}, {tok: scanner.Float, text: "99.9"}, {tok: int(')'), text: ")"}, {tok: '-', text: "-"}, {tok: scanner.Int, text: "9"}, {tok: scanner.Int, text: "0x7F"}, {tok: scanner.String, text: `"99.9"`}, {tok: scanner.Ident, text: "and"}, {tok: scanner.Ident, text: "or"}, {tok: scanner.Ident, text: "http://localhost:8000/cli-tour::yo"}, {tok: scanner.Ident, text: "<="}, {tok: scanner.Ident, text: ">="}, {tok: scanner.Ident, text: "<"}, {tok: scanner.Ident, text: ">"}, {tok: int('='), text: "="}, {tok: int('_'), text: "_"}, {tok: scanner.Ident, text: "!="}, } for _, sr := range scannerResults { tok := s.Scan() assert.Equal(sr.tok, int(tok), "expected text: %s, found: %s, pos: %s", sr.text, s.TokenText(), s.Pos()) assert.Equal(sr.text, s.TokenText()) } tok := s.Scan() assert.Equal(scanner.EOF, int(tok)) } func TestPeek(t *testing.T) { assert := assert.New(t) s := NewQueryScanner(`_ < "one"`) scannerResults := []scannerResult{ {tok: int('_'), text: "_"}, {tok: scanner.Ident, text: "<"}, {tok: scanner.String, text: `"one"`}, {tok: scanner.EOF, text: ""}, } for _, sr := range scannerResults { assert.Equal(sr.tok, int(s.Peek())) assert.Equal(sr.text, s.TokenText()) assert.Equal(sr.tok, int(s.Scan())) assert.Equal(sr.text, s.TokenText()) } } func TestParsing(t *testing.T) { assert := assert.New(t) re1 := compExpr{"index1", equals, types.Number(2015)} re2 := compExpr{"index1", gte, types.Number(2020)} re3 := compExpr{"index1", lte, types.Number(2022)} re4 := compExpr{"index1", lt, types.Number(-2030)} re5 := compExpr{"index1", ne, types.Number(3.5)} re6 := compExpr{"index1", ne, types.Number(-3500.4536632)} re7 := compExpr{"index1", ne, types.String("whassup")} queries := []parseResult{ {`index1 = 2015`, re1}, {`(index1 = 2015 )`, re1}, {`(((index1 = 2015 ) ))`, re1}, {`index1 = 2015 or index1 >= 2020`, logExpr{or, re1, re2, "index1"}}, {`(index1 = 2015) or index1 >= 2020`, logExpr{or, re1, re2, "index1"}}, {`index1 = 2015 or (index1 >= 2020)`, logExpr{or, re1, re2, "index1"}}, {`(index1 = 2015 or index1 >= 2020)`, logExpr{or, re1, re2, "index1"}}, {`(index1 = 2015 or index1 >= 2020) and index1 <= 2022`, logExpr{and, logExpr{or, re1, re2, "index1"}, re3, "index1"}}, {`index1 = 2015 or index1 >= 2020 and index1 <= 2022`, logExpr{or, re1, logExpr{and, re2, re3, "index1"}, "index1"}}, {`index1 = 2015 or index1 >= 2020 and index1 <= 2022 or index1 < -2030`, logExpr{or, re1, logExpr{and, re2, logExpr{or, re3, re4, "index1"}, "index1"}, "index1"}}, {`(index1 = 2015 or index1 >= 2020) and (index1 <= 2022 or index1 < -2030)`, logExpr{and, logExpr{or, re1, re2, "index1"}, logExpr{or, re3, re4, "index1"}, "index1"}}, {`index1 != 3.5`, re5}, {`index1 != -3500.4536632`, re6}, {`index1 != "whassup"`, re7}, } storage := &chunks.MemoryStorage{} db := datas.NewDatabase(storage.NewView()) _, err := db.CommitValue(db.GetDataset("index1"), types.NewMap(db, types.String("one"), types.NewSet(db, types.String("two")))) assert.NoError(err) im := &indexManager{db: db, indexes: map[string]types.Map{}} for _, pr := range queries { expr, err := parseQuery(pr.query, im) assert.NoError(err) assert.Equal(pr.ex, expr, "bad query: %s", pr.query) } badQueries := []string{ `sdfsd = 2015`, `index1 = "unfinished string`, `index1 and 2015`, `index1 < `, `index1 < 2015 and ()`, `index1 < 2015 an index1 > 2016`, `(index1 < 2015) what`, `(index1< 2015`, `(badIndexName < 2015)`, } im1 := &indexManager{db: db, indexes: map[string]types.Map{}} for _, q := range badQueries { expr, err := parseQuery(q, im1) assert.Error(err) assert.Nil(expr) } } ================================================ FILE: samples/go/nomdex/query_range.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "bytes" "fmt" "io" "sort" "github.com/attic-labs/noms/go/types" ) type bound struct { value types.Value include bool infinity int8 } func (b bound) isLessThanOrEqual(o bound) (res bool) { return b.equals(o) || b.isLessThan(o) } func (b bound) isLessThan(o bound) (res bool) { if b.infinity < o.infinity { return true } if b.infinity > o.infinity { return false } if b.infinity == o.infinity && b.infinity != 0 { return false } if b.value.Less(o.value) { return true } if b.value.Equals(o.value) { if !b.include && o.include { return true } } return false } func (b bound) isGreaterThanOrEqual(o bound) (res bool) { return !b.isLessThan(o) } func (b bound) isGreaterThan(o bound) (res bool) { return !b.equals(o) || !b.isLessThan(o) } func (b bound) equals(o bound) bool { return b.infinity == o.infinity && b.include == o.include && (b.value == nil && o.value == nil || (b.value != nil && o.value != nil && b.value.Equals(o.value))) } func (b bound) String() string { var s1 string if b.value == nil { s1 = "" } else { buf := bytes.Buffer{} types.WriteEncodedValue(&buf, b.value) s1 = buf.String() } return fmt.Sprintf("bound{v: %s, include: %t, infinity: %d}", s1, b.include, b.infinity) } func (b bound) minValue(o bound) (res bound) { if b.isLessThan(o) { return b } return o } func (b bound) maxValue(o bound) (res bound) { if b.isLessThan(o) { return o } return b } type queryRange struct { lower bound upper bound } func (r queryRange) and(o queryRange) (rangeDescs queryRangeSlice) { if !r.intersects(o) { return []queryRange{} } lower := r.lower.maxValue(o.lower) upper := r.upper.minValue(o.upper) return []queryRange{{lower, upper}} } func (r queryRange) or(o queryRange) (rSlice queryRangeSlice) { if r.intersects(o) { v1 := r.lower.minValue(o.lower) v2 := r.upper.maxValue(o.upper) return queryRangeSlice{queryRange{v1, v2}} } rSlice = queryRangeSlice{r, o} sort.Sort(rSlice) return rSlice } func (r queryRange) intersects(o queryRange) (res bool) { if r.lower.isGreaterThanOrEqual(o.lower) && r.lower.isLessThanOrEqual(o.upper) { return true } if r.upper.isGreaterThanOrEqual(o.lower) && r.upper.isLessThanOrEqual(o.upper) { return true } if o.lower.isGreaterThanOrEqual(r.lower) && o.lower.isLessThanOrEqual(r.upper) { return true } if o.upper.isGreaterThanOrEqual(r.lower) && o.upper.isLessThanOrEqual(r.upper) { return true } return false } func (r queryRange) String() string { return fmt.Sprintf("queryRange{lower: %s, upper: %s", r.lower, r.upper) } // queryRangeSlice defines the sort.Interface. This implementation sorts queryRanges by // the lower bound in ascending order. type queryRangeSlice []queryRange func (rSlice queryRangeSlice) Len() int { return len(rSlice) } func (rSlice queryRangeSlice) Swap(i, j int) { rSlice[i], rSlice[j] = rSlice[j], rSlice[i] } func (rSlice queryRangeSlice) Less(i, j int) bool { return !rSlice[i].lower.equals(rSlice[j].lower) && rSlice[i].lower.isLessThanOrEqual(rSlice[j].lower) } func (rSlice queryRangeSlice) dbgPrint(w io.Writer) { for i, rd := range rSlice { if i == 0 { fmt.Fprintf(w, "\n#################\n") } fmt.Fprintf(w, "queryRange %d: %s\n", i, rd) } if len(rSlice) > 0 { fmt.Fprintf(w, "\n") } } ================================================ FILE: samples/go/nomdex/query_range_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "testing" "github.com/attic-labs/noms/go/types" "github.com/stretchr/testify/assert" ) const nilHolder = -1000000 var ( r1 = qr(2, true, 5, true) r2 = qr(0, true, 8, true) r3 = qr(0, true, 3, true) r4 = qr(3, true, 8, true) r5 = qr(0, true, 1, true) r6 = qr(6, true, 10, true) r7 = qr(nilHolder, true, 10, true) r8 = qr(3, true, nilHolder, true) r10 = qr(2, true, 5, false) r11 = qr(5, true, 10, true) ) func newBound(i int, include bool, infinity int) bound { var v types.Value if i != nilHolder { v = types.Number(i) } return bound{value: v, include: include, infinity: int8(infinity)} } func qr(lower int, lowerIncl bool, upper int, upperIncl bool) queryRange { lowerInf := 0 if lower == nilHolder { lowerInf = -1 } upperInf := 0 if upper == nilHolder { upperInf = 1 } return queryRange{newBound(lower, lowerIncl, lowerInf), newBound(upper, upperIncl, upperInf)} } func TestRangeIntersects(t *testing.T) { assert := assert.New(t) assert.True(r1.intersects(r2)) assert.True(r1.intersects(r3)) assert.True(r1.intersects(r4)) assert.True(r2.intersects(r1)) assert.True(r1.intersects(r7)) assert.True(r1.intersects(r8)) assert.True(r3.intersects(r4)) assert.True(r3.intersects(r4)) assert.False(r1.intersects(r5)) assert.False(r1.intersects(r6)) assert.False(r10.intersects(r11)) } func TestRangeAnd(t *testing.T) { assert := assert.New(t) assert.Empty(r1.and(r5)) assert.Empty(r1.and(r6)) assert.Equal(r1, r1.and(r2)[0]) assert.Equal(r1, r2.and(r1)[0]) expected := qr(3, true, 5, true) assert.Equal(expected, r1.and(r4)[0]) } func TestRangeOr(t *testing.T) { assert := assert.New(t) assert.Equal(r2, r1.or(r2)[0]) expected := qr(0, true, 5, true) assert.Equal(expected, r1.or(r3)[0]) expectedSlice := queryRangeSlice{r5, r1} assert.Equal(expectedSlice, r1.or(r5)) assert.Equal(expectedSlice, r5.or(r1)) } func TestIsLessThan(t *testing.T) { assert := assert.New(t) assert.True(newBound(1, true, 0).isLessThanOrEqual(newBound(2, true, 0))) assert.False(newBound(2, true, 0).isLessThanOrEqual(newBound(1, true, 0))) assert.True(newBound(1, true, 0).isLessThanOrEqual(newBound(1, true, 0))) assert.True(newBound(1, false, 0).isLessThanOrEqual(newBound(2, false, 0))) assert.False(newBound(2, false, 0).isLessThanOrEqual(newBound(1, false, 0))) assert.True(newBound(1, false, 0).isLessThanOrEqual(newBound(1, false, 0))) assert.False(newBound(1, true, 0).isLessThanOrEqual(newBound(1, false, 0))) assert.True(newBound(1, false, 0).isLessThanOrEqual(newBound(1, true, 0))) assert.True(newBound(nilHolder, true, -1).isLessThanOrEqual(newBound(1, true, 0))) assert.False(newBound(1, false, 0).isLessThanOrEqual(newBound(nilHolder, true, -1))) } func TestIsGreaterThan(t *testing.T) { assert := assert.New(t) assert.True(newBound(2, true, 0).isGreaterThanOrEqual(newBound(1, true, 0))) assert.False(newBound(1, true, 0).isGreaterThanOrEqual(newBound(2, true, 0))) assert.True(newBound(1, true, 0).isGreaterThanOrEqual(newBound(1, true, 0))) assert.True(newBound(2, false, 0).isGreaterThanOrEqual(newBound(1, false, 0))) assert.False(newBound(1, false, 0).isGreaterThanOrEqual(newBound(2, false, 0))) assert.True(newBound(1, false, 0).isGreaterThanOrEqual(newBound(1, false, 0))) assert.True(newBound(1, true, 0).isGreaterThanOrEqual(newBound(1, false, 0))) assert.False(newBound(1, false, 0).isGreaterThanOrEqual(newBound(2, true, 0))) assert.True(newBound(nilHolder, true, 1).isGreaterThanOrEqual(newBound(1, true, 0))) assert.False(newBound(1, true, 0).isGreaterThanOrEqual(newBound(nilHolder, true, 1))) } func TestMinValue(t *testing.T) { assert := assert.New(t) ve1 := newBound(5, false, 0) ve2 := newBound(5, true, 0) ve3 := newBound(nilHolder, true, -1) ve4 := newBound(nilHolder, true, 1) assert.Equal(ve1, ve1.minValue(ve2)) assert.Equal(ve3, ve1.minValue(ve3)) assert.Equal(ve1, ve1.minValue(ve4)) } func TestMaxValue(t *testing.T) { assert := assert.New(t) ve1 := newBound(5, false, 0) ve2 := newBound(5, true, 0) ve3 := newBound(nilHolder, true, -1) ve4 := newBound(nilHolder, true, 1) assert.Equal(ve2, ve1.maxValue(ve2)) assert.Equal(ve1, ve1.maxValue(ve3)) assert.Equal(ve4, ve1.maxValue(ve4)) } ================================================ FILE: tools/file/file.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package file import ( "fmt" "io" "os" "path/filepath" "runtime" "github.com/attic-labs/noms/go/d" ) // DumbCopy copies the contents of a regular file at srcPath (following symlinks) to a new regular file at dstPath. New file is created with same mode. func DumbCopy(srcPath, dstPath string) { chkClose := func(c io.Closer) { d.PanicIfError(c.Close()) } info, err := os.Stat(srcPath) d.PanicIfError(err) if info.IsDir() { d.PanicIfError(ErrNoCopyDir) } src, err := os.Open(srcPath) d.PanicIfError(err) defer chkClose(src) dst, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, info.Mode()) d.PanicIfError(err) defer chkClose(dst) _, err = io.Copy(dst, src) d.PanicIfError(err) } // MyDir returns the directory in which the file containing the calling source code resides. func MyDir() string { _, path, _, ok := runtime.Caller(1) if !ok { d.Panic("Should have been able to get Caller.") } return filepath.Dir(path) } var ErrNoCopyDir = fmt.Errorf("attempted to copy a directory") ================================================ FILE: tools/file/file_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package file import ( "io/ioutil" "os" "path/filepath" "testing" "github.com/attic-labs/noms/go/d" "github.com/stretchr/testify/suite" ) const ( contents = "hey" ) func TestSerialRunnerTestSuite(t *testing.T) { suite.Run(t, &FileTestSuite{}) } type FileTestSuite struct { suite.Suite dir, src, exc string } func (suite *FileTestSuite) SetupTest() { var err error suite.dir, err = ioutil.TempDir(os.TempDir(), "") suite.NoError(err) suite.src = filepath.Join(suite.dir, "srcfile") suite.exc = filepath.Join(suite.dir, "excfile") suite.NoError(ioutil.WriteFile(suite.src, []byte(contents), 0644)) suite.NoError(ioutil.WriteFile(suite.exc, []byte(contents), 0755)) } func (suite *FileTestSuite) TearDownTest() { os.Remove(suite.dir) } func (suite *FileTestSuite) TestCopyFile() { dst := filepath.Join(suite.dir, "dstfile") test := func(src string, mode int) { DumbCopy(src, dst) info, err := os.Stat(src) suite.NoError(err) suite.Equal(mode, int(info.Mode())) out, err := ioutil.ReadFile(dst) suite.NoError(err) suite.Equal(contents, string(out)) } test(suite.src, 0644) test(suite.exc, 0755) } func (suite *FileTestSuite) TestCopyLink() { link := filepath.Join(suite.dir, "link") suite.NoError(os.Symlink(suite.src, link)) dst := filepath.Join(suite.dir, "dstfile") DumbCopy(link, dst) info, err := os.Lstat(dst) suite.NoError(err) suite.True(info.Mode().IsRegular()) out, err := ioutil.ReadFile(dst) suite.NoError(err) suite.Equal(contents, string(out)) } func (suite *FileTestSuite) TestNoCopyDir() { dir, err := ioutil.TempDir(suite.dir, "") suite.NoError(err) dst := filepath.Join(suite.dir, "dst") suite.Error(d.Try(func() { DumbCopy(dir, dst) })) } ================================================ FILE: tools/licensify.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 # This script ensures our license header is present on all the files git knows # about within the current directory. # # It is safe to re-run this file on already-processed files. import os import re import subprocess import shutil import tempfile licenseRows = [ 'Copyright 2016 Attic Labs, Inc. All rights reserved.', 'Licensed under the Apache License, version 2.0:', 'http://www.apache.org/licenses/LICENSE-2.0', ] comment_markers = { 'go': ('', '// ', ''), 'js': ('', '// ', ''), 'py': ('', '# ', ''), 'html': (''), 'css': ('/**', ' * ', ' */'), } def main(): files = subprocess.check_output(['git', 'ls-files']).split('\n') for n in files: if n != '' and not n.startswith('vendor/') and ( not n.endswith('.min.js')): _, ext = os.path.splitext(n) if ext == '': continue ext = ext[1:] pattern = buildLicensePattern(ext) if pattern != None: with open(n, 'r+') as f: processFile(f, ext, pattern) def processFile(f, ext, pattern): '''Updates the license block in file |f|.''' content = f.read() f.seek(0) f.truncate() replacement = re.sub(pattern, getLicense(ext), content) f.write(replacement) def buildLicensePattern(ext): ''' Builds a regex pattern that matches license blocks in files with extension |ext|. ''' markers = comment_markers.get(ext) if markers is None: return None (first, mark, last) = [re.escape(m) for m in markers] prefix = '' # The first line must include the copyright string to avoid picking up # random other comment blocks at head of file. head = mark + r'Copyright \d+ (The Noms Authors|Attic Labs).*\n' body = '(' + mark + r'.*\n)*' suffix = '' if first != '': prefix = first + r'\n' if last != '': suffix = last + r'\n' # We want to make sure shebang files stay at head of file. shebang = r'(?P\#\!.+\n+|)' # Allow flow annotations flow = r'(?P// @flow\n+|)' # Doctype header doctype = r'(?P<\!doctype html>\n+|)' return ('^' + shebang + doctype + '(' + prefix + head + body + suffix + r'\n)?' + flow) def getLicense(ext): '''Gets the license block for files with extension |ext|.''' (first, mark, last) = comment_markers[ext] result = '\n'.join([mark + line for line in licenseRows]) if first != '': result = first + '\n' + result if last != '': result = result + '\n' + last return r'\g' + r'\g' + result + '\n\n\g' if __name__ == '__main__': main() ================================================ FILE: tools/loadtest/loadtest.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package main import ( "bytes" "encoding/binary" "fmt" "io" "io/ioutil" "math/rand" "net" "os" "os/exec" "strings" "time" ) // This script runs random noms commands against random datasets on a database. // // Example usage: // > go run path/to/loadtest.go http://demo.noms.io/cli-tour // // Imports should be Go builtin libraries only, so that this can be run with "go run". type runnerFn func(db, ds string) type runner struct { name string fn runnerFn } func main() { rand.Seed(time.Now().UnixNano() + bestEffortGetIP()) if len(os.Args) != 2 { fmt.Println("Usage: loadtest ") os.Exit(-1) } db := os.Args[1] rs := []runner{ {"diff", runDiff}, {"log diff", runLogDiff}, {"log show", runLogShow}, {"show", runShow}, {"sync", runSync}, } for ds := range streamDs(db) { start := time.Now() r := rs[rand.Intn(len(rs))] fmt.Println(time.Now().Format(time.Stamp), r.name, db, ds) r.fn(db, fmt.Sprintf("%s::%s", db, ds)) fmt.Println(" took", time.Since(start).String()) } } func bestEffortGetIP() (asNum int64) { addrs, err := net.InterfaceAddrs() if err != nil { return } for _, a := range addrs { if ipnet, ok := a.(*net.IPNet); ok && !ipnet.IP.IsLoopback() { if ipnet.IP.To4() != nil { asNum = int64(binary.BigEndian.Uint32([]byte(ipnet.IP.To4()))) break } } } return } func runDiff(db, ds string) { if parent := getParent(db, ds); parent != "" { call(nil, "noms", "diff", ds, parent) } else { fmt.Println(" (no parent, cannot diff)") } } func runLogDiff(db, ds string) { call(nil, "noms", "log", ds) } func runLogShow(db, ds string) { call(nil, "noms", "log", "--show-value", ds) } func runShow(db, ds string) { if strings.HasSuffix(ds, "/raw") { fmt.Println(" (skipping raw file, blobs are too slow)") } else { call(nil, "noms", "show", ds) } } func runSync(db, ds string) { dir, err := ioutil.TempDir("", "loadtest") if err != nil { fmt.Fprintln(os.Stderr, " ERROR: failed to create temp directory:", err.Error()) return } defer os.RemoveAll(dir) // Try to sync to parent, then from parent to head. // If there isn't a parent then just sync head. syncDs := fmt.Sprintf("ldb:%s::sync", dir) if parent := getParent(db, ds); parent != "" { call(nil, "noms", "sync", parent, syncDs) } call(nil, "noms", "sync", ds, syncDs) } func getParent(db, ds string) string { buf := &bytes.Buffer{} call(buf, "noms", "log", "-n", "2", "--oneline", ds) // Output will look like: // abc (Parent def) // def (Parent None) // We could use the first line and grab the Parent value from there, but it could also be Merge, // and it might be None, so easier to just get the 2nd row. lines := strings.SplitN(buf.String(), "\n", 2) if len(lines) != 2 { return "" } hsh := strings.SplitN(lines[0], " ", 2)[0] return fmt.Sprintf("%s::#%s", db, hsh) } func call(stdout io.Writer, name string, arg ...string) error { cmd := exec.Command(name, arg...) fmt.Println(" >", name, strings.Join(arg, " ")) cmd.Stdout = stdout cmd.Stderr = os.Stderr err := cmd.Run() if err != nil { fmt.Fprintf(os.Stderr, " ERROR: %s\n", err.Error()) } return err } func streamDs(db string) <-chan string { buf := &bytes.Buffer{} err := call(buf, "noms", "ds", db) if err != nil { fmt.Fprintln(os.Stderr, " ERROR: failed to get datasets") os.Exit(-1) } out := strings.Trim(buf.String(), " \n") if out == "" { fmt.Fprintln(os.Stderr, " ERROR: no datasets at", db) os.Exit(-1) } datasets := strings.Split(out, "\n") ch := make(chan string) go func() { for { ch <- datasets[rand.Intn(len(datasets))] } }() return ch } ================================================ FILE: tools/noms/README.md ================================================ # Noms build script helpers These are helper functions for writing your Noms app build and staging scripts. ## Writing your scripts ### Build scripts Your build script must be named *build.py*. It will be discovered by the system and executed in the directory in which it's found. It must require no arguments, though environment variables will propagate in. ### Staging scripts After your build script gets run, we'll run your staging script -- the purpose of which is to take your build products and put them in a directory that's ready to be packaged and deployed somewhere. This script must be called *stage.py* and take as its sole argument the path to a directory where all project code is being staged. ### Libraries We have provided a library to make writing your staging scripts easier. Example usage: ```python #!/usr/bin/python import noms.staging as staging if __name__ == '__main__': staging.Main('nerdosphere', staging.GlobCopier('index.html', 'styles.css', '*.js')) ``` Importing and using `noms.staging` handles determining where you should stage your code and creating the necessary directories for you. You just pass it the name of your project and a function that knows how to stage your build artifacts, given a path under which to put everything. ## Develop * To run unittests: `python -m unittest discover -p "*_test.py" -s $GOPATH/src/github.com/attic-labs/noms/tools` ================================================ FILE: tools/noms/__init__.py ================================================ # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 ================================================ FILE: tools/noms/copy.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import os.path, shutil def Peers(me, dstDir): """Peers copies the peers of me into dstDir. Peers looks for files, directories and symlinks next to me and copies them (with the same basenames) to dstDir, which is presumed to exist. """ myDir = os.path.dirname(os.path.abspath(me)) names = os.listdir(myDir) for basename in names: src = os.path.join(myDir, basename) dst = os.path.join(dstDir, basename) if os.path.samefile(me, src): continue if os.path.islink(src): linkto = os.readlink(src) os.symlink(linkto, dst) elif os.path.isfile(src): shutil.copy2(src, dst) elif os.path.isdir(src): shutil.copytree(src, dst) else: raise Exception("Unknown file type at " + src) ================================================ FILE: tools/noms/copy_test.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import os, os.path, shutil, tempfile, unittest import copy class TestCopy(unittest.TestCase): def setUp(self): self.tempdir = os.path.realpath(tempfile.mkdtemp()) def tearDown(self): shutil.rmtree(self.tempdir, ignore_errors=True) def test_CopyPeers(self): nested = tempfile.mkdtemp(dir=self.tempdir) otherNested = tempfile.mkdtemp(dir=self.tempdir) def mkfile(): with tempfile.NamedTemporaryFile(dir=nested, delete=False) as f: return f.name me = mkfile() peerFile = os.path.basename(mkfile()) peerDir = os.path.basename(tempfile.mkdtemp(dir=nested)) peerLink = 'link' peerLinkAbs = os.path.join(nested, 'link') os.symlink(peerFile, peerLinkAbs) copy.Peers(me, otherNested) self.assertTrue(os.path.islink(os.path.join(otherNested, peerLink))) self.assertTrue(os.path.isfile(os.path.join(otherNested, peerFile))) self.assertTrue(os.path.isdir(os.path.join(otherNested, peerDir))) self.assertFalse(os.path.lexists(os.path.join(otherNested, os.path.basename(me)))) if __name__ == '__main__': unittest.main() ================================================ FILE: tools/noms/pushd.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import os from contextlib import contextmanager @contextmanager def pushd(path): currentDir = os.getcwd() os.chdir(path) yield os.chdir(currentDir) ================================================ FILE: tools/noms/staging.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import argparse import glob import hashlib import os import os.path import re import shutil def Main(projectName, stagingFunction): """Main should be called by all staging scripts when executed. Main takes a project name and a callable. It creates a staging directory for your project and then runs the callable, passing it the path to the newly-created staging directory. For the common case of simply copying a set of files into the staging directory, use GlobCopier: #!/usr/bin/python import noms.staging as staging if __name__ == '__main__': staging.Main('nerdosphere', staging.GlobCopier('index.html', 'styles.css', '*.js')) """ parser = argparse.ArgumentParser(description='Stage build products from this directory.') parser.add_argument('staging_dir', metavar='path/to/staging/directory', type=_dir_path, help='top-level dir into which project build products are staged') args = parser.parse_args() project_staging_dir = os.path.join(args.staging_dir, projectName) normalized = os.path.realpath(project_staging_dir) if not _is_sub_dir(project_staging_dir, args.staging_dir): raise Exception(project_staging_dir + ' must be a subdir of ' + args.staging_dir) if not os.path.exists(normalized): os.makedirs(normalized) stagingFunction(normalized) def run_globs(staging_dir, globs, exclude): for pattern in globs: for f in glob.glob(pattern): if os.path.isdir(f): continue from_dir, name = os.path.split(f) if name in exclude: continue to_dir = os.path.join(staging_dir, from_dir) if not os.path.exists(to_dir): os.makedirs(to_dir) yield (f, to_dir) def rename_with_hash(f, to_dir, rename_dict): with open(f) as fh: sha = hashlib.sha256() sha.update(fh.read()) digest = sha.hexdigest() basename = os.path.basename(f) name, ext = os.path.splitext(basename) new_name = '%s.%s%s' % (name, digest[:20], ext) rename_dict[basename] = new_name shutil.move(os.path.join(to_dir, basename), os.path.join(to_dir, new_name)) def GlobCopier(*globs, **kwargs): ''' Returns a function that copies files defined by globs into a staging dir. Arguments: - Zero or more globs used to determine which files to copy. Keyword arguments: - rename (bool) - If True then the files gets renamed to name.%%hash.ext - index_file (str) - If present then this file is copied to the staging dir and its content is updated where the paths to the files are updated to the renamed file paths. ''' exclude = ('webpack.config.js',) rename = 'rename' in kwargs and kwargs['rename'] def stage(staging_dir): if rename: rename_dict = dict() for f, to_dir in run_globs(staging_dir, globs, exclude): shutil.copy2(f, to_dir) if rename: rename_with_hash(f, to_dir, rename_dict) # Update index_file and write it to to_dir. if 'index_file' not in kwargs: return index_file = kwargs['index_file'] from_dir, name = os.path.split(index_file) to_dir = os.path.join(staging_dir, from_dir) with open(index_file, 'r') as f: data = f.read() if rename: for old_name, new_name in rename_dict.iteritems(): r = re.compile(r'\b%s\b' % re.escape(old_name)) data = r.sub(new_name, data) with open(os.path.join(to_dir, name), 'w') as f: f.write(data) return stage def _dir_path(arg): normalized = os.path.realpath(os.path.abspath(arg)) if os.path.exists(normalized) and not os.path.isdir(normalized): raise ValueError(arg + ' is not a path to a directory.') return normalized def _is_sub_dir(subdir, directory): # Need the path-sep at the end to ensure that commonprefix returns the correct result below. directory = os.path.join(os.path.realpath(directory), '') subdir = os.path.realpath(subdir) # return true, if the common prefix of both is equal to directory e.g. /a/b/c/d.rst and # directory is /a/b, the common prefix is /a/b return os.path.commonprefix([subdir, directory]) == directory ================================================ FILE: tools/noms/staging_test.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import os, os.path, shutil, tempfile, unittest import staging class TestStaging(unittest.TestCase): def setUp(self): self.tempdir = os.path.realpath(tempfile.mkdtemp()) self.nested = tempfile.mkdtemp(dir=self.tempdir) def tearDown(self): shutil.rmtree(self.tempdir, ignore_errors=True) def test_Nested(self): self.assertTrue(staging._is_sub_dir(self.nested, self.tempdir)) def test_NotNested(self): otherNested = tempfile.mkdtemp(dir=self.tempdir) self.assertFalse(staging._is_sub_dir(self.nested, otherNested)) def test_DotDotNotReallyNested(self): notReallyNested = os.path.join(self.tempdir, 'foo', os.pardir, 'bar') self.assertFalse(staging._is_sub_dir(self.nested, notReallyNested)) def test_LinkNotReallyNested(self): otherNested = tempfile.mkdtemp(dir=self.tempdir) linkName = os.path.join(self.nested, 'link') os.symlink(otherNested, linkName) self.assertFalse(staging._is_sub_dir(linkName, self.nested)) def test_DirPath(self): linkName = os.path.join(self.tempdir, 'link') os.symlink(self.nested, linkName) norm = staging._dir_path(linkName) self.assertEqual(self.nested, norm) def test_DirPathFails(self): f = tempfile.NamedTemporaryFile(dir=self.tempdir) try: staging._dir_path(f.name) except ValueError: pass def test_GlobCopier(self): files = ( 'a.js', 'b.js', 'c.html', 'd.css', 'webpack.config.js', 'x/aa.js', 'x/bb.js', 'x/dd.css', 'x/webpack.config.js', 'x/xx/aaa.js', 'x/xx/bbb.js', 'x/xx/webpack.config.js', 'x/yy/aaa.js', 'x/yy/bbb.js', 'x/yy/webpack.config.js', 'y/aaaa.js', 'y/bbbb.js', 'y/webpack.config.js', 'y/xxx/a5.js', 'y/xxx/b5.js', 'y/xxx/webpack.config.js', 'z/a6.js', 'z/b6.js', 'z/webpack.config.js', ) for d in ('x/xx', 'x/yy', 'y/xxx', 'z'): os.makedirs(os.path.join(self.tempdir, d)) for name in files: with open(os.path.join(self.tempdir, name), 'w') as f: f.write('hi') cwd = os.getcwd() try: os.chdir(self.tempdir) staging.GlobCopier('*.js', 'c.html', 'x/*.js', 'x/xx/*', 'y/*', 'y/*')(self.nested) finally: os.chdir(cwd) self.assertEqual(sorted(['a.js', 'b.js', 'c.html', 'x', 'y']), sorted(os.listdir(self.nested))) self.assertEqual(sorted(['aa.js', 'bb.js', 'xx']), sorted(os.listdir(os.path.join(self.nested, 'x')))) self.assertEqual(sorted(['aaa.js', 'bbb.js']), sorted(os.listdir(os.path.join(self.nested, 'x/xx')))) self.assertEqual(sorted(['aaaa.js', 'bbbb.js']), sorted(os.listdir(os.path.join(self.nested, 'y')))) def test_GlobCopierWithRename(self): files = ( 'a.js', 'b.js', 'c.html', 'd.css', 'webpack.config.js', 'x/aa.js', 'x/bb.js', 'x/dd.css', 'x/webpack.config.js', 'x/xx/aaa.js', 'x/xx/bbb.js', 'x/xx/webpack.config.js', 'x/yy/aaa.js', 'x/yy/bbb.js', 'x/yy/webpack.config.js', 'y/aaaa.js', 'y/bbbb.js', 'y/webpack.config.js', 'y/xxx/a5.js', 'y/xxx/b5.js', 'y/xxx/webpack.config.js', 'z/a6.js', 'z/b6.js', 'z/webpack.config.js', ) with open(os.path.join(self.tempdir, 'index.html'), 'w') as f: f.write('index.html') for d in ('x/xx', 'x/yy', 'y/xxx', 'z'): os.makedirs(os.path.join(self.tempdir, d)) for name in files: with open(os.path.join(self.tempdir, name), 'w') as f: f.write('hi! name: ' + name) cwd = os.getcwd() try: os.chdir(self.tempdir) staging.GlobCopier( '*.js', 'c.html', 'x/*.js', 'x/xx/*', 'y/*', 'y/*', index_file='index.html', rename=True)(self.nested) finally: os.chdir(cwd) self.assertEqual(sorted(['a.702f720d2b49bd41c30f.js', 'b.49cf685c13e7de516ebc.js', 'c.fe1a3b03473494234e2d.html', 'index.html', 'x', 'y']), sorted(os.listdir(self.nested))) self.assertEqual(sorted(['aa.eb0f5ae6432d325f9448.js', 'bb.480969faecf03a9eb729.js', 'xx']), sorted(os.listdir(os.path.join(self.nested, 'x')))) self.assertEqual(sorted(['aaa.a9810946370699474422.js', 'bbb.c06f75d2d61cb6717b2c.js']), sorted(os.listdir(os.path.join(self.nested, 'x/xx')))) self.assertEqual(sorted(['aaaa.a68d3caf6e0e971ab96f.js', 'bbbb.84bd5947630aca231726.js']), sorted(os.listdir(os.path.join(self.nested, 'y')))) if __name__ == '__main__': unittest.main() ================================================ FILE: tools/noms/symlink.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import os class LinkError(Exception): """Raised when forcing a symlink fails for a non-OS reason.""" pass def Force(source, linkName): """ Force forces linkName to be a symlink to source, as long as its not a dir. Creates a symlink from linkName to source, clobbering linkName as long as its not a directory. """ if not os.path.lexists(linkName): os.symlink(source, linkName) return if os.path.islink(linkName) or os.path.isfile(linkName): os.remove(linkName) os.symlink(source, linkName) return raise LinkError("Refusing to clobber " + linkName) ================================================ FILE: tools/noms/symlink_test.py ================================================ #!/usr/bin/python # Copyright 2016 Attic Labs, Inc. All rights reserved. # Licensed under the Apache License, version 2.0: # http://www.apache.org/licenses/LICENSE-2.0 import os, os.path, shutil, tempfile, unittest import symlink class TestForceSymlink(unittest.TestCase): CONTENTS = 'test file contents' def setUp(self): self.tempdir = tempfile.mkdtemp() self.source = tempfile.NamedTemporaryFile(dir=self.tempdir, delete=False) with self.source.file as f: f.write(self.CONTENTS) def tearDown(self): shutil.rmtree(self.tempdir, ignore_errors=True) def verifySymlink(self, linkName): with open(linkName, 'r') as f: actual = f.read() self.assertEqual(self.CONTENTS, actual) def test_ClobberFile(self): linkName = os.path.join(self.tempdir, 'link') with open(linkName, 'w') as f: f.write('foo') symlink.Force(self.source.name, linkName) self.verifySymlink(linkName) def test_ClobberSymlink(self): linkName = os.path.join(self.tempdir, 'link') os.symlink('nowhere', linkName) symlink.Force(self.source.name, linkName) self.verifySymlink(linkName) def test_NoClobberDir(self): linkName = os.path.join(self.tempdir, 'link') os.mkdir(linkName, 0777) try: symlink.Force(self.source.name, linkName) except symlink.LinkError: pass if __name__ == '__main__': unittest.main() ================================================ FILE: tools/runner/serial.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package runner import ( "fmt" "io" "os" "os/exec" "path/filepath" "runtime" "github.com/attic-labs/noms/go/d" ) // Env is a map of env vars, mapping key string to value string. type Env map[string]string func (e Env) toStrings() (out []string) { out = os.Environ() // Sadly, it seems like we need to force-set GOROOT in the environment to handle some funky runtime environments (e.g. on our Travis setup) if e == nil { e = Env{} } if _, overridden := e["GOROOT"]; !overridden { e["GOROOT"] = runtime.GOROOT() } for n, v := range e { out = append(out, fmt.Sprintf("%s=%s", n, v)) } return } // ForceRun runs 'exe [args...]' in current working directory, and d.Chk()s on failure. Inherits the environment of the current process. func ForceRun(exe string, args ...string) { err := runEnvDir(os.Stdout, os.Stderr, Env{}, "", exe, args...) d.Chk.NoError(err) } // ForceRunInDir runs 'exe [args...]' in the given directory, and d.Chk()s on failure. Inherits the environment of the current process. func ForceRunInDir(dir string, env Env, exe string, args ...string) { info, err := os.Stat(dir) if err != nil { d.Panic("Can't stat %s", dir) } if !info.IsDir() { d.Panic("%s must be a path to a directory.", dir) } d.Chk.NoError(runEnvDir(os.Stdout, os.Stderr, env, dir, exe, args...)) } // RunInDir runs 'exe [args...]' in the given directory, returning any failure. The child's stdout and stderr are mapped to out and err respectively. Inherits the environment of the current process. func RunInDir(out, err io.Writer, dir, exe string, args ...string) error { return runEnvDir(out, err, Env{}, dir, exe, args...) } // runEnvDir 'exe [args...]' in dir with the environment env overlaid on that of the current process. If dir == "", use the current working directory. func runEnvDir(out, err io.Writer, env Env, dir, exe string, args ...string) error { cmd := exec.Command(exe, args...) cmd.Dir = dir cmd.Env = env.toStrings() cmd.Stdout = out cmd.Stderr = err return cmd.Run() } // Serial serially runs all instances of filename found under dir, mapping stdout and stderr to each subprocess in the obvious way. env is overlaid on the environment of the current process. If args are provided, they're passed en masse to each subprocess. func Serial(stdout, stderr io.Writer, env Env, dir, filename string, args ...string) bool { success := true err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if os.IsNotExist(err) { // Some programs like npm create temporary log files which confuse filepath.Walk. return nil } if err != nil { d.Panic("Failed directory traversal at %s", path) } if !info.IsDir() && filepath.Base(path) == filename { scriptAndArgs := append([]string{filepath.Base(path)}, args...) runErr := runEnvDir(stdout, stderr, env, filepath.Dir(path), "python", scriptAndArgs...) if runErr != nil { success = false fmt.Fprintf(stderr, "Running %s failed with %v\n", path, runErr) } } return nil }) d.PanicIfError(err) return success } ================================================ FILE: tools/runner/serial_test.go ================================================ // Copyright 2016 Attic Labs, Inc. All rights reserved. // Licensed under the Apache License, version 2.0: // http://www.apache.org/licenses/LICENSE-2.0 package runner import ( "bytes" "fmt" "io" "io/ioutil" "os" "path/filepath" "runtime" "strings" "testing" "github.com/stretchr/testify/suite" ) const ( boilerplate = ` from __future__ import print_function import os, sys %s ` buildFileBasename = "build.py" ) func TestSerialRunnerTestSuite(t *testing.T) { suite.Run(t, &SerialRunnerTestSuite{}) } type SerialRunnerTestSuite struct { suite.Suite dir string index int } func (suite *SerialRunnerTestSuite) SetupTest() { var err error suite.dir, err = ioutil.TempDir(os.TempDir(), "") suite.NoError(err) } func (suite *SerialRunnerTestSuite) TearDownTest() { os.Remove(suite.dir) } func (suite *SerialRunnerTestSuite) TestForceRunInDir() { scriptPath := filepath.Join(suite.dir, buildFileBasename) suite.makeTestBuildFile(scriptPath, []string{"print(os.getcwd(), file=sys.stdout)"}) old := os.Stdout // keep backup of the real stdout r, w, err := os.Pipe() suite.NoError(err) os.Stdout = w defer func() { os.Stdout = old }() defer r.Close() outC := make(chan string) // copy the output in a separate goroutine so printing can't block indefinitely go func() { buf := &bytes.Buffer{} io.Copy(buf, r) outC <- buf.String() }() ForceRunInDir(suite.dir, nil, "python", scriptPath) w.Close() out := strings.TrimSpace(<-outC) actualSuiteDir, err := filepath.EvalSymlinks(suite.dir) suite.NoError(err) suite.Equal(actualSuiteDir, out) } func (suite *SerialRunnerTestSuite) TestRunInDir() { scriptPath := filepath.Join(suite.dir, buildFileBasename) suite.makeTestBuildFile(scriptPath, []string{ "print(os.getcwd(), file=sys.stdout)", "print('error', file=sys.stderr)", }) stdout := &bytes.Buffer{} stderr := &bytes.Buffer{} RunInDir(stdout, stderr, suite.dir, "python", scriptPath) actualSuiteDir, err := filepath.EvalSymlinks(suite.dir) suite.NoError(err) suite.Equal(actualSuiteDir, strings.TrimSpace(string(stdout.Bytes()))) suite.Equal("error", strings.TrimSpace(string(stderr.Bytes()))) } func (suite *SerialRunnerTestSuite) TestEnvVars() { makeEnvVarPrintBuildFile := func(path, varname string) { fmtStatement := fmt.Sprintf(`print(os.environ['%s'], file=sys.stdout)`, varname) suite.makeTestBuildFile(path, []string{fmtStatement}) } type testCase struct { path, varname, expected string } env := Env{ "PATH": os.Getenv("PATH"), "GOPATH": os.Getenv("GOPATH"), "NOMS_CHECKOUT_PATH": "/where/noms/is", "ATTIC_CHECKOUT_PATH": "/where/attic/is", } tests := []testCase{} for n, v := range env { tc := testCase{suite.uniqueBuildFile(), n, v} makeEnvVarPrintBuildFile(tc.path, tc.varname) tests = append(tests, tc) } gorootTestCase := testCase{suite.uniqueBuildFile(), "GOROOT", runtime.GOROOT()} makeEnvVarPrintBuildFile(gorootTestCase.path, gorootTestCase.varname) tests = append(tests, gorootTestCase) log := &bytes.Buffer{} if suite.True(Serial(log, log, env, suite.dir, buildFileBasename), "Serial() should have succeeded! logs:\n%s", string(log.Bytes())) { logText := string(log.Bytes()) for _, tc := range tests { suite.Contains(logText, tc.expected) } } } func (suite *SerialRunnerTestSuite) TestFailure() { type testCase struct { path, expected string } tests := []testCase{ {suite.uniqueBuildFile(), "Scoobaz"}, {suite.uniqueBuildFile(), "at the disco"}, } goodOne := testCase{suite.uniqueBuildFile(), "All's well"} suite.makeTestBuildFile(tests[0].path, []string{"Scoobaz() # Won't compile."}) suite.makeTestBuildFile(tests[1].path, []string{`assert(False, "at the disco") # Won't run.`}) suite.makeTestBuildFile(goodOne.path, []string{fmt.Sprintf(`print "%s"`, goodOne.expected)}) log := &bytes.Buffer{} suite.False(Serial(log, log, Env{}, suite.dir, buildFileBasename)) logText := string(log.Bytes()) suite.Contains(logText, tests[0].expected) suite.Contains(logText, tests[1].expected) } func (suite *SerialRunnerTestSuite) uniqueBuildFile() string { suite.index++ return filepath.Join(suite.dir, fmt.Sprintf("%d", suite.index), buildFileBasename) } func (suite *SerialRunnerTestSuite) makeTestBuildFile(path string, statements []string) { buf := &bytes.Buffer{} fmt.Fprintf(buf, boilerplate, strings.Join(statements, "\n")) err := os.MkdirAll(filepath.Dir(path), 0777) suite.NoError(err) err = ioutil.WriteFile(path, buf.Bytes(), 0755) suite.NoError(err) }