Repository: couchbase/vellum Branch: master Commit: abd0418dd6c7 Files: 121 Total size: 517.3 KB Directory structure: gitextract_4hy_b8n1/ ├── .github/ │ └── workflows/ │ └── tests.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── automaton.go ├── builder.go ├── builder_test.go ├── cmd/ │ └── vellum/ │ ├── cmd/ │ │ ├── dot.go │ │ ├── dump.go │ │ ├── fuzzy.go │ │ ├── grep.go │ │ ├── info.go │ │ ├── map.go │ │ ├── range.go │ │ ├── root.go │ │ ├── set.go │ │ └── svg.go │ └── main.go ├── common.go ├── common_test.go ├── data/ │ └── words-1000.txt ├── decoder_v1.go ├── decoder_v1_test.go ├── docs/ │ └── format.md ├── encoder_v1.go ├── encoder_v1_test.go ├── encoding.go ├── example_test.go ├── fst.go ├── fst_iterator.go ├── fst_iterator_test.go ├── go.mod ├── go.sum ├── levenshtein/ │ ├── LICENSE │ ├── README.md │ ├── alphabet.go │ ├── alphabet_test.go │ ├── benchmark_test.go │ ├── dfa.go │ ├── dfa_test.go │ ├── levenshtein.go │ ├── levenshtein_nfa.go │ ├── levenshtein_test.go │ └── parametric_dfa.go ├── merge_iterator.go ├── merge_iterator_test.go ├── pack.go ├── pack_test.go ├── regexp/ │ ├── compile.go │ ├── compile_test.go │ ├── dfa.go │ ├── inst.go │ ├── regexp.go │ ├── regexp_test.go │ ├── sparse.go │ └── sparse_test.go ├── registry.go ├── registry_test.go ├── transducer.go ├── utf8/ │ ├── utf8.go │ └── utf8_test.go ├── vellum.go ├── vellum_mmap.go ├── vellum_nommap.go ├── vellum_test.go ├── vendor/ │ ├── github.com/ │ │ ├── edsrzf/ │ │ │ └── mmap-go/ │ │ │ ├── LICENSE │ │ │ ├── mmap.go │ │ │ ├── mmap_unix.go │ │ │ ├── mmap_windows.go │ │ │ ├── msync_netbsd.go │ │ │ └── msync_unix.go │ │ ├── inconshreveable/ │ │ │ └── mousetrap/ │ │ │ ├── LICENSE │ │ │ ├── trap_others.go │ │ │ ├── trap_windows.go │ │ │ └── trap_windows_1.4.go │ │ ├── spf13/ │ │ │ ├── cobra/ │ │ │ │ ├── LICENSE.txt │ │ │ │ ├── bash_completions.go │ │ │ │ ├── cobra.go │ │ │ │ ├── command.go │ │ │ │ ├── command_notwin.go │ │ │ │ ├── command_win.go │ │ │ │ └── doc/ │ │ │ │ ├── man_docs.go │ │ │ │ ├── md_docs.go │ │ │ │ ├── util.go │ │ │ │ └── yaml_docs.go │ │ │ └── pflag/ │ │ │ ├── LICENSE │ │ │ ├── bool.go │ │ │ ├── bool_slice.go │ │ │ ├── count.go │ │ │ ├── duration.go │ │ │ ├── flag.go │ │ │ ├── float32.go │ │ │ ├── float64.go │ │ │ ├── golangflag.go │ │ │ ├── int.go │ │ │ ├── int32.go │ │ │ ├── int64.go │ │ │ ├── int8.go │ │ │ ├── int_slice.go │ │ │ ├── ip.go │ │ │ ├── ip_slice.go │ │ │ ├── ipmask.go │ │ │ ├── ipnet.go │ │ │ ├── string.go │ │ │ ├── string_array.go │ │ │ ├── string_slice.go │ │ │ ├── uint.go │ │ │ ├── uint16.go │ │ │ ├── uint32.go │ │ │ ├── uint64.go │ │ │ ├── uint8.go │ │ │ └── uint_slice.go │ │ └── willf/ │ │ └── bitset/ │ │ ├── LICENSE │ │ ├── bitset.go │ │ ├── popcnt.go │ │ ├── popcnt_amd64.go │ │ ├── popcnt_amd64.s │ │ └── popcnt_generic.go │ └── manifest ├── writer.go └── writer_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/tests.yml ================================================ on: push: branches: - master pull_request: name: Tests jobs: test: strategy: matrix: go-version: [1.13.x, 1.14.x] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: - name: Install Go uses: actions/setup-go@v1 with: go-version: ${{ matrix.go-version }} - name: Checkout code uses: actions/checkout@v2 - name: Test run: | go version go test -race ./... ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Vellum We look forward to your contributions, but ask that you first review these guidelines. ### Sign the CLA As Vellum is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Vellum project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. ### Submitting a Pull Request All types of contributions are welcome, but please keep the following in mind: - If you're planning a large change, you should really discuss it in a github issue first. This helps avoid duplicate effort and spending time on something that may not be merged. - Existing tests should continue to pass, new tests for the contribution are nice to have. - All code should have gone through `go fmt` - All code should pass `go vet` ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # ![vellum](docs/logo.png) vellum # NOTE: active development of the vellum library has moved to [https://github.com/blevesearch/vellum](https://github.com/blevesearch/vellum) This repository will remain as is to support previous Couchbase builds. [![Tests](https://github.com/couchbase/vellum/workflows/Tests/badge.svg?branch=master&event=push)](https://github.com/couchbase/vellum/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster) [![Coverage Status](https://coveralls.io/repos/github/couchbase/vellum/badge.svg?branch=master)](https://coveralls.io/github/couchbase/vellum?branch=master) [![GoDoc](https://godoc.org/github.com/couchbase/vellum?status.svg)](https://godoc.org/github.com/couchbase/vellum) [![Go Report Card](https://goreportcard.com/badge/github.com/couchbase/vellum)](https://goreportcard.com/report/github.com/couchbase/vellum) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) A Go library implementing an FST (finite state transducer) capable of: - mapping between keys ([]byte) and a value (uint64) - enumerating keys in lexicographic order Some additional goals of this implementation: - bounded memory use while building the FST - streaming out FST data while building - mmap FST runtime to support very large FTSs (optional) ## Usage ### Building an FST To build an FST, create a new builder using the `New()` method. This method takes an `io.Writer` as an argument. As the FST is being built, data will be streamed to the writer as soon as possible. With this builder you **MUST** insert keys in lexicographic order. Inserting keys out of order will result in an error. After inserting the last key into the builder, you **MUST** call `Close()` on the builder. This will flush all remaining data to the underlying writer. In memory: ```go var buf bytes.Buffer builder, err := vellum.New(&buf, nil) if err != nil { log.Fatal(err) } ``` To disk: ```go f, err := os.Create("/tmp/vellum.fst") if err != nil { log.Fatal(err) } builder, err := vellum.New(f, nil) if err != nil { log.Fatal(err) } ``` **MUST** insert keys in lexicographic order: ```go err = builder.Insert([]byte("cat"), 1) if err != nil { log.Fatal(err) } err = builder.Insert([]byte("dog"), 2) if err != nil { log.Fatal(err) } err = builder.Insert([]byte("fish"), 3) if err != nil { log.Fatal(err) } err = builder.Close() if err != nil { log.Fatal(err) } ``` ### Using an FST After closing the builder, the data can be used to instantiate an FST. If the data was written to disk, you can use the `Open()` method to mmap the file. If the data is already in memory, or you wish to load/mmap the data yourself, you can instantiate the FST with the `Load()` method. Load in memory: ```go fst, err := vellum.Load(buf.Bytes()) if err != nil { log.Fatal(err) } ``` Open from disk: ```go fst, err := vellum.Open("/tmp/vellum.fst") if err != nil { log.Fatal(err) } ``` Get key/value: ```go val, exists, err = fst.Get([]byte("dog")) if err != nil { log.Fatal(err) } if exists { fmt.Printf("contains dog with val: %d\n", val) } else { fmt.Printf("does not contain dog") } ``` Iterate key/values: ```go itr, err := fst.Iterator(startKeyInclusive, endKeyExclusive) for err == nil { key, val := itr.Current() fmt.Printf("contains key: %s val: %d", key, val) err = itr.Next() } if err != nil { log.Fatal(err) } ``` ### How does the FST get built? A full example of the implementation is beyond the scope of this README, but let's consider a small example where we want to insert 3 key/value pairs. First we insert "are" with the value 4. ![step1](docs/demo1.png) Next, we insert "ate" with the value 2. ![step2](docs/demo2.png) Notice how the values associated with the transitions were adjusted so that by summing them while traversing we still get the expected value. At this point, we see that state 5 looks like state 3, and state 4 looks like state 2. But, we cannot yet combine them because future inserts could change this. Now, we insert "see" with value 3. Once it has been added, we now know that states 5 and 4 can longer change. Since they are identical to 3 and 2, we replace them. ![step3](docs/demo3.png) Again, we see that states 7 and 8 appear to be identical to 2 and 3. Having inserted our last key, we call `Close()` on the builder. ![step4](docs/demo4.png) Now, states 7 and 8 can safely be replaced with 2 and 3. For additional information, see the references at the bottom of this document. ### What does the serialized format look like? We've broken out a separate document on the [vellum disk format v1](docs/format.md). ### What if I want to use this on a system that doesn't have mmap? The mmap library itself is guarded with system/architecture build tags, but we've also added an additional build tag in vellum. If you'd like to Open() a file based representation of an FST, but not use mmap, you can build the library with the `nommap` build tag. NOTE: if you do this, the entire FST will be read into memory. ### Can I use this with Unicode strings? Yes, however this implementation is only aware of the byte representation you choose. In order to find matches, you must work with some canonical byte representation of the string. In the future, some encoding-aware traversals may be possible on top of the lower-level byte transitions. ### How did this library come to be? In my work on the [Bleve](https://github.com/blevesearch/bleve) project I became aware of the power of the FST for many search-related tasks. The obvious starting point for such a thing in Go was the [mafsa](https://github.com/smartystreets/mafsa) project. While working with mafsa I encountered some issues. First, it did not stream data to disk while building. Second, it chose to use a rune as the fundamental unit of transition in the FST, but I felt using a byte would be more powerful in the end. My hope is that higher-level encoding-aware traversals will be possible when necessary. Finally, as I reported bugs and submitted PRs I learned that the mafsa project was mainly a research project and no longer being maintained. I wanted to build something that could be used in production. As the project advanced more and more techniques from the [BurntSushi/fst](https://github.com/BurntSushi/fst) were adapted to our implementation. ### Are there tools to work with vellum files? Under the cmd/vellum subdirectory, there's a command-line tool which features subcommands that can allow you to create, inspect and query vellum files. ### How can I generate a state transition diagram from a vellum file? The vellum command-line tool has a "dot" subcommand that can emit graphviz dot output data from an input vellum file. The dot file can in turn be converted into an image using graphviz tools. Example... $ vellum dot myFile.vellum > output.dot $ dot -Tpng output.dot -o output.png ## Related Work Much credit goes to two existing projects: - [mafsa](https://github.com/smartystreets/mafsa) - [BurntSushi/fst](https://github.com/BurntSushi/fst) Most of the original implementation here started with my digging into the internals of mafsa. As the implementation progressed, I continued to borrow ideas/approaches from the BurntSushi/fst library as well. For a great introduction to this topic, please read the blog post [Index 1,600,000,000 Keys with Automata and Rust](http://blog.burntsushi.net/transducers/) ================================================ FILE: automaton.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum // Automaton represents the general contract of a byte-based finite automaton type Automaton interface { // Start returns the start state Start() int // IsMatch returns true if and only if the state is a match IsMatch(int) bool // CanMatch returns true if and only if it is possible to reach a match // in zero or more steps CanMatch(int) bool // WillAlwaysMatch returns true if and only if the current state matches // and will always match no matter what steps are taken WillAlwaysMatch(int) bool // Accept returns the next state given the input to the specified state Accept(int, byte) int } // AutomatonContains implements an generic Contains() method which works // on any implementation of Automaton func AutomatonContains(a Automaton, k []byte) bool { i := 0 curr := a.Start() for a.CanMatch(curr) && i < len(k) { curr = a.Accept(curr, k[i]) if curr == noneAddr { break } i++ } if i != len(k) { return false } return a.IsMatch(curr) } // AlwaysMatch is an Automaton implementation which always matches type AlwaysMatch struct{} // Start returns the AlwaysMatch start state func (m *AlwaysMatch) Start() int { return 0 } // IsMatch always returns true func (m *AlwaysMatch) IsMatch(int) bool { return true } // CanMatch always returns true func (m *AlwaysMatch) CanMatch(int) bool { return true } // WillAlwaysMatch always returns true func (m *AlwaysMatch) WillAlwaysMatch(int) bool { return true } // Accept returns the next AlwaysMatch state func (m *AlwaysMatch) Accept(int, byte) int { return 0 } // creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations. var alwaysMatchAutomaton = &AlwaysMatch{} ================================================ FILE: builder.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" "io" ) var defaultBuilderOpts = &BuilderOpts{ Encoder: 1, RegistryTableSize: 10000, RegistryMRUSize: 2, } // A Builder is used to build a new FST. When possible data is // streamed out to the underlying Writer as soon as possible. type Builder struct { unfinished *unfinishedNodes registry *registry last []byte len int lastAddr int encoder encoder opts *BuilderOpts builderNodePool *builderNodePool } const noneAddr = 1 const emptyAddr = 0 // NewBuilder returns a new Builder which will stream out the // underlying representation to the provided Writer as the set is built. func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) { if opts == nil { opts = defaultBuilderOpts } builderNodePool := &builderNodePool{} rv := &Builder{ unfinished: newUnfinishedNodes(builderNodePool), registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize), builderNodePool: builderNodePool, opts: opts, lastAddr: noneAddr, } var err error rv.encoder, err = loadEncoder(opts.Encoder, w) if err != nil { return nil, err } err = rv.encoder.start() if err != nil { return nil, err } return rv, nil } func (b *Builder) Reset(w io.Writer) error { b.unfinished.Reset() b.registry.Reset() b.lastAddr = noneAddr b.encoder.reset(w) b.last = nil b.len = 0 err := b.encoder.start() if err != nil { return err } return nil } // Insert the provided value to the set being built. // NOTE: values must be inserted in lexicographical order. func (b *Builder) Insert(key []byte, val uint64) error { // ensure items are added in lexicographic order if bytes.Compare(key, b.last) < 0 { return ErrOutOfOrder } if len(key) == 0 { b.len = 1 b.unfinished.setRootOutput(val) return nil } prefixLen, out := b.unfinished.findCommonPrefixAndSetOutput(key, val) b.len++ err := b.compileFrom(prefixLen) if err != nil { return err } b.copyLastKey(key) b.unfinished.addSuffix(key[prefixLen:], out) return nil } func (b *Builder) copyLastKey(key []byte) { if b.last == nil { b.last = make([]byte, 0, 64) } else { b.last = b.last[:0] } b.last = append(b.last, key...) } // Close MUST be called after inserting all values. func (b *Builder) Close() error { err := b.compileFrom(0) if err != nil { return err } root := b.unfinished.popRoot() rootAddr, err := b.compile(root) if err != nil { return err } return b.encoder.finish(b.len, rootAddr) } func (b *Builder) compileFrom(iState int) error { addr := noneAddr for iState+1 < len(b.unfinished.stack) { var node *builderNode if addr == noneAddr { node = b.unfinished.popEmpty() } else { node = b.unfinished.popFreeze(addr) } var err error addr, err = b.compile(node) if err != nil { return nil } } b.unfinished.topLastFreeze(addr) return nil } func (b *Builder) compile(node *builderNode) (int, error) { if node.final && len(node.trans) == 0 && node.finalOutput == 0 { return 0, nil } found, addr, entry := b.registry.entry(node) if found { return addr, nil } addr, err := b.encoder.encodeState(node, b.lastAddr) if err != nil { return 0, err } b.lastAddr = addr entry.addr = addr return addr, nil } type unfinishedNodes struct { stack []*builderNodeUnfinished // cache allocates a reasonable number of builderNodeUnfinished // objects up front and tries to keep reusing them // because the main data structure is a stack, we assume the // same access pattern, and don't track items separately // this means calls get() and pushXYZ() must be paired, // as well as calls put() and popXYZ() cache []builderNodeUnfinished builderNodePool *builderNodePool } func (u *unfinishedNodes) Reset() { u.stack = u.stack[:0] for i := 0; i < len(u.cache); i++ { u.cache[i] = builderNodeUnfinished{} } u.pushEmpty(false) } func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes { rv := &unfinishedNodes{ stack: make([]*builderNodeUnfinished, 0, 64), cache: make([]builderNodeUnfinished, 64), builderNodePool: p, } rv.pushEmpty(false) return rv } // get new builderNodeUnfinished, reusing cache if possible func (u *unfinishedNodes) get() *builderNodeUnfinished { if len(u.stack) < len(u.cache) { return &u.cache[len(u.stack)] } // full now allocate a new one return &builderNodeUnfinished{} } // return builderNodeUnfinished, clearing it for reuse func (u *unfinishedNodes) put() { if len(u.stack) >= len(u.cache) { return // do nothing, not part of cache } u.cache[len(u.stack)] = builderNodeUnfinished{} } func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte, out uint64) (int, uint64) { var i int for i < len(key) { if i >= len(u.stack) { break } var addPrefix uint64 if !u.stack[i].hasLastT { break } if u.stack[i].lastIn == key[i] { commonPre := outputPrefix(u.stack[i].lastOut, out) addPrefix = outputSub(u.stack[i].lastOut, commonPre) out = outputSub(out, commonPre) u.stack[i].lastOut = commonPre i++ } else { break } if addPrefix != 0 { u.stack[i].addOutputPrefix(addPrefix) } } return i, out } func (u *unfinishedNodes) pushEmpty(final bool) { next := u.get() next.node = u.builderNodePool.Get() next.node.final = final u.stack = append(u.stack, next) } func (u *unfinishedNodes) popRoot() *builderNode { l := len(u.stack) var unfinished *builderNodeUnfinished u.stack, unfinished = u.stack[:l-1], u.stack[l-1] rv := unfinished.node u.put() return rv } func (u *unfinishedNodes) popFreeze(addr int) *builderNode { l := len(u.stack) var unfinished *builderNodeUnfinished u.stack, unfinished = u.stack[:l-1], u.stack[l-1] unfinished.lastCompiled(addr) rv := unfinished.node u.put() return rv } func (u *unfinishedNodes) popEmpty() *builderNode { l := len(u.stack) var unfinished *builderNodeUnfinished u.stack, unfinished = u.stack[:l-1], u.stack[l-1] rv := unfinished.node u.put() return rv } func (u *unfinishedNodes) setRootOutput(out uint64) { u.stack[0].node.final = true u.stack[0].node.finalOutput = out } func (u *unfinishedNodes) topLastFreeze(addr int) { last := len(u.stack) - 1 u.stack[last].lastCompiled(addr) } func (u *unfinishedNodes) addSuffix(bs []byte, out uint64) { if len(bs) == 0 { return } last := len(u.stack) - 1 u.stack[last].hasLastT = true u.stack[last].lastIn = bs[0] u.stack[last].lastOut = out for _, b := range bs[1:] { next := u.get() next.node = u.builderNodePool.Get() next.hasLastT = true next.lastIn = b next.lastOut = 0 u.stack = append(u.stack, next) } u.pushEmpty(true) } type builderNodeUnfinished struct { node *builderNode lastOut uint64 lastIn byte hasLastT bool } func (b *builderNodeUnfinished) lastCompiled(addr int) { if b.hasLastT { transIn := b.lastIn transOut := b.lastOut b.hasLastT = false b.lastOut = 0 b.node.trans = append(b.node.trans, transition{ in: transIn, out: transOut, addr: addr, }) } } func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) { if b.node.final { b.node.finalOutput = outputCat(prefix, b.node.finalOutput) } for i := range b.node.trans { b.node.trans[i].out = outputCat(prefix, b.node.trans[i].out) } if b.hasLastT { b.lastOut = outputCat(prefix, b.lastOut) } } type builderNode struct { finalOutput uint64 trans []transition final bool // intrusive linked list next *builderNode } // reset resets the receiver builderNode to a re-usable state. func (n *builderNode) reset() { n.final = false n.finalOutput = 0 for i := range n.trans { n.trans[i] = emptyTransition } n.trans = n.trans[:0] n.next = nil } func (n *builderNode) equiv(o *builderNode) bool { if n.final != o.final { return false } if n.finalOutput != o.finalOutput { return false } if len(n.trans) != len(o.trans) { return false } for i, ntrans := range n.trans { otrans := o.trans[i] if ntrans.in != otrans.in { return false } if ntrans.addr != otrans.addr { return false } if ntrans.out != otrans.out { return false } } return true } var emptyTransition = transition{} type transition struct { out uint64 addr int in byte } func outputPrefix(l, r uint64) uint64 { if l < r { return l } return r } func outputSub(l, r uint64) uint64 { return l - r } func outputCat(l, r uint64) uint64 { return l + r } // builderNodePool pools builderNodes using a singly linked list. // // NB: builderNode lifecylce is described by the following interactions - // +------------------------+ +----------------------+ // | Unfinished Nodes | Transfer once | Registry | // |(not frozen builderNode)|-----builderNode is ------->| (frozen builderNode) | // +------------------------+ marked frozen +----------------------+ // ^ | // | | // | Put() // | Get() on +-------------------+ when // +-new char--------| builderNode Pool |<-----------evicted // +-------------------+ type builderNodePool struct { head *builderNode } func (p *builderNodePool) Get() *builderNode { if p.head == nil { return &builderNode{} } head := p.head p.head = p.head.next return head } func (p *builderNodePool) Put(v *builderNode) { if v == nil { return } v.reset() v.next = p.head p.head = v } ================================================ FILE: builder_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bufio" "io/ioutil" "math/rand" "os" "sort" "testing" ) func init() { thousandTestWords, _ = loadWords("data/words-1000.txt") } // this simple test case only has a shared final state // it also tests out of order insert func TestBuilderSimple(t *testing.T) { b, err := New(ioutil.Discard, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } // add our first string err = b.Insert([]byte("jul"), 0) if err != nil { t.Errorf("got error inserting string: %v", err) } // expect len to be 1 if b.len != 1 { t.Errorf("expected node count to be 1, got %v", b.len) } // try to add a value out of order (not allowed) err = b.Insert([]byte("abc"), 0) if err == nil { t.Errorf("expected err, got nil") } // add a second string err = b.Insert([]byte("mar"), 0) if err != nil { t.Errorf("got error inserting string: %v", err) } // expect len to grow by 1 if b.len != 2 { t.Errorf("expected node count to be 2, got %v", b.len) } // now close the builder err = b.Close() if err != nil { t.Errorf("got error closing set builder: %v", err) } } func TestBuilderSharedPrefix(t *testing.T) { b, err := New(ioutil.Discard, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } // add our first string err = b.Insert([]byte("car"), 0) if err != nil { t.Errorf("got error inserting string: %v", err) } // expect len to be 1 if b.len != 1 { t.Errorf("expected node count to be 1, got %v", b.len) } // add a second string err = b.Insert([]byte("cat"), 0) if err != nil { t.Errorf("got error inserting string: %v", err) } // expect len to be 2 if b.len != 2 { t.Errorf("expected node count to be 2, got %v", b.len) } // now close the builder err = b.Close() if err != nil { t.Errorf("got error closing set builder: %v", err) } } func randomValues(list []string) []uint64 { rv := make([]uint64, len(list)) for i := range list { rv[i] = uint64(rand.Uint64()) } return rv } func insertStrings(b *Builder, list []string, vals []uint64) error { for i, item := range list { err := b.Insert([]byte(item), vals[i]) if err != nil { return err } } return nil } var smallSample = map[string]uint64{ "mon": 2, "tues": 3, "thurs": 5, "tye": 99, } func insertStringMap(b *Builder, m map[string]uint64) error { // make list of keys keys := make([]string, 0, len(m)) for k := range m { keys = append(keys, k) } // sort it sort.Strings(keys) // insert in sorted order for _, k := range keys { err := b.Insert([]byte(k), m[k]) if err != nil { return err } } return nil } func TestBuilderNodeEquiv(t *testing.T) { tests := []struct { desc string a *builderNode b *builderNode want bool }{ { "both states final", &builderNode{ final: true, }, &builderNode{ final: true, }, true, }, { "both states final, different final val", &builderNode{ final: true, finalOutput: 7, }, &builderNode{ final: true, finalOutput: 9, }, false, }, { "both states final, same transitions, but different trans val", &builderNode{ final: true, trans: []transition{ {in: 'a', out: 7}, }, }, &builderNode{ final: true, trans: []transition{ {in: 'a', out: 9}, }, }, false, }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { got := test.a.equiv(test.b) if got != test.want { t.Errorf("wanted: %t, got: %t", test.want, got) } }) } } func loadWords(path string) ([]string, error) { var rv []string file, err := os.Open(path) if err != nil { return nil, err } scanner := bufio.NewScanner(file) for scanner.Scan() { word := append([]byte(nil), scanner.Bytes()...) rv = append(rv, string(word)) if err != nil { return nil, err } } if err = scanner.Err(); err != nil { return nil, err } err = file.Close() if err != nil { return nil, err } return rv, nil } var thousandTestWords []string func BenchmarkBuilder(b *testing.B) { dataset := thousandTestWords randomThousandVals := randomValues(dataset) b.ResetTimer() for i := 0; i < b.N; i++ { builder, err := New(ioutil.Discard, nil) if err != nil { b.Fatalf("error creating builder: %v", err) } err = insertStrings(builder, dataset, randomThousandVals) if err != nil { b.Fatalf("error inserting thousand words: %v", err) } err = builder.Close() if err != nil { b.Fatalf("error closing builder: %v", err) } } } ================================================ FILE: cmd/vellum/cmd/dot.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "io" "os" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) var dotCmd = &cobra.Command{ Use: "dot", Short: "Dot prints the contents of this vellum FST file in the dot format", Long: `Dot prints the contents of this vellum FST file in the dot format.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } return dotToWriter(fst, os.Stdout) }, } func dotToWriter(fst *vellum.FST, w io.Writer) error { _, err := fmt.Fprint(w, dotHeader) if err != nil { return err } err = fst.Debug(func(n int, state interface{}) error { if d, ok := state.(dotStringer); ok { _, err = fmt.Fprintf(w, "%s", d.DotString(n)) if err != nil { return err } } return nil }) if err != nil { return err } _, err = fmt.Fprint(w, dotFooter) if err != nil { return err } return nil } const dotHeader = ` digraph automaton { labelloc="l"; labeljust="l"; rankdir="LR"; ` const dotFooter = `} ` type dotStringer interface { DotString(int) string } func init() { RootCmd.AddCommand(dotCmd) } ================================================ FILE: cmd/vellum/cmd/dump.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) // dumpCmd represents the dump command var dumpCmd = &cobra.Command{ Use: "dump", Short: "Dumps the contents of this vellum FST file", Long: `Dumps the contents of this vellum FST file.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } return fst.Debug(debugPrint) }, } func debugPrint(n int, state interface{}) error { fmt.Printf("%v\n", state) return nil } func init() { RootCmd.AddCommand(dumpCmd) } ================================================ FILE: cmd/vellum/cmd/fuzzy.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "github.com/couchbase/vellum" "github.com/couchbase/vellum/levenshtein" "github.com/spf13/cobra" ) var query string var distance int var fuzzyCmd = &cobra.Command{ Use: "fuzzy", Short: "Fuzzy runs a fuzzy query over the contents of this vellum FST file", Long: `Fuzzy runs a fuzzy query over the contents of this vellum FST file.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } if len(args) > 1 { query = args[1] } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } lb, err := levenshtein.NewLevenshteinAutomatonBuilder(uint8(distance), false) if err != nil { return err } fuzzy, err := lb.BuildDfa(query, uint8(distance)) if err != nil { return err } var startKeyB, endKeyB []byte if startKey != "" { startKeyB = []byte(startKey) } if endKey != "" { endKeyB = []byte(endKey) } itr, err := fst.Search(fuzzy, startKeyB, endKeyB) for err == nil { key, val := itr.Current() fmt.Printf("%s - %d\n", key, val) err = itr.Next() } return nil }, } func init() { RootCmd.AddCommand(fuzzyCmd) fuzzyCmd.Flags().StringVar(&startKey, "start", "", "start key inclusive") fuzzyCmd.Flags().StringVar(&endKey, "end", "", "end key inclusive") fuzzyCmd.Flags().IntVar(&distance, "distance", 1, "edit distance in Unicode codepoints") } ================================================ FILE: cmd/vellum/cmd/grep.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "github.com/couchbase/vellum" "github.com/couchbase/vellum/regexp" "github.com/spf13/cobra" ) var grepCmd = &cobra.Command{ Use: "grep", Short: "Grep runs regular expression searches over the contents of this " + "vellum FST file.", Long: `Grep runs regular expression searches over the contents of this ` + `vellum FST file.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } if len(args) > 1 { query = args[1] } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } r, err := regexp.New(query) if err != nil { return err } var startKeyB, endKeyB []byte if startKey != "" { startKeyB = []byte(startKey) } if endKey != "" { endKeyB = []byte(endKey) } itr, err := fst.Search(r, startKeyB, endKeyB) for err == nil { key, val := itr.Current() fmt.Printf("%s - %d\n", key, val) err = itr.Next() } return nil }, } func init() { RootCmd.AddCommand(grepCmd) grepCmd.Flags().StringVar(&startKey, "start", "", "start key inclusive") grepCmd.Flags().StringVar(&endKey, "end", "", "end key inclusive") } ================================================ FILE: cmd/vellum/cmd/info.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) var infoCmd = &cobra.Command{ Use: "info", Short: "Prints info about this vellum FST file", Long: `Prints info about this vellum FST file.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } fmt.Printf("version: %d\n", fst.Version()) fmt.Printf("length: %d\n", fst.Len()) return nil }, } func init() { RootCmd.AddCommand(infoCmd) } ================================================ FILE: cmd/vellum/cmd/map.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "encoding/csv" "fmt" "io" "log" "os" "strconv" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) var mapCmd = &cobra.Command{ Use: "map", Short: "Map builds a new FST from a CSV file containing key,val pairs", Long: `Map builds a new FST from a CSV file containing key,val pairs.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("source and target paths are required") } if len(args) < 2 { return fmt.Errorf("target path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { if !sorted { return fmt.Errorf("only sorted input supported at this time") } file, err := os.Open(args[0]) if err != nil { log.Fatal(err) } defer file.Close() f, err := os.Create(args[1]) if err != nil { return err } b, err := vellum.New(f, nil) if err != nil { return err } reader := csv.NewReader(file) reader.FieldsPerRecord = 2 var record []string record, err = reader.Read() for err == nil { var v uint64 v, err = strconv.ParseUint(record[1], 10, 64) if err != nil { return err } err = b.Insert([]byte(record[0]), v) if err != nil { return err } record, err = reader.Read() } if err != io.EOF { return err } err = b.Close() if err != nil { return err } return nil }, } func init() { RootCmd.AddCommand(mapCmd) mapCmd.Flags().BoolVar(&sorted, "sorted", false, "input already sorted") } ================================================ FILE: cmd/vellum/cmd/range.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) var startKey string var endKey string var rangeCmd = &cobra.Command{ Use: "range", Short: "Range iterates over the contents of this vellum FST file", Long: `Range iterates over the contents of this vellum FST file. You can optionally specify start/end keys after the filename.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } var startKeyB, endKeyB []byte if startKey != "" { startKeyB = []byte(startKey) } if endKey != "" { endKeyB = []byte(endKey) } itr, err := fst.Iterator(startKeyB, endKeyB) for err == nil { key, val := itr.Current() fmt.Printf("%s - %d\n", key, val) err = itr.Next() } return nil }, } func init() { RootCmd.AddCommand(rangeCmd) rangeCmd.Flags().StringVar(&startKey, "start", "", "start key inclusive") rangeCmd.Flags().StringVar(&endKey, "end", "", "end key inclusive") } ================================================ FILE: cmd/vellum/cmd/root.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "net/http" "os" "github.com/spf13/cobra" ) var expvarBind string // RootCmd represents the base command when called without any subcommands var RootCmd = &cobra.Command{ Use: "vellum", Short: "A utility to work with vellum FST files", Long: `A utility to work with vellum FST files.`, PersistentPreRunE: func(cmd *cobra.Command, args []string) error { if expvarBind != "" { go http.ListenAndServe(expvarBind, nil) } return nil }, } // Execute adds all child commands to the root command sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. func Execute() { if err := RootCmd.Execute(); err != nil { fmt.Println(err) os.Exit(-1) } } func init() { RootCmd.PersistentFlags().StringVar(&expvarBind, "expvar", "", "bind address for expvar, default none") } ================================================ FILE: cmd/vellum/cmd/set.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "bufio" "fmt" "log" "os" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) var sorted bool var setCmd = &cobra.Command{ Use: "set", Short: "Set builds a new FST from a file containing new-line separated values", Long: `Set builds a new FST from a file containing new-line separated values.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("source and target paths are required") } if len(args) < 2 { return fmt.Errorf("target path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { if !sorted { return fmt.Errorf("only sorted input supported at this time") } file, err := os.Open(args[0]) if err != nil { log.Fatal(err) } defer file.Close() f, err := os.Create(args[1]) if err != nil { return err } b, err := vellum.New(f, nil) if err != nil { return err } scanner := bufio.NewScanner(file) for scanner.Scan() { word := append([]byte(nil), scanner.Bytes()...) err = b.Insert(word, 0) if err != nil { return err } } if err = scanner.Err(); err != nil { log.Fatal(err) } err = b.Close() if err != nil { return err } return nil }, } func init() { RootCmd.AddCommand(setCmd) setCmd.Flags().BoolVar(&sorted, "sorted", false, "input already sorted") } ================================================ FILE: cmd/vellum/cmd/svg.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cmd import ( "fmt" "io" "io/ioutil" "os" "os/exec" "github.com/couchbase/vellum" "github.com/spf13/cobra" ) var svgCmd = &cobra.Command{ Use: "svg", Short: "SVG prints the contents of this vellum FST file in the SVG format", Long: `SVG prints the contents of this vellum FST file in the SVG format.`, PreRunE: func(cmd *cobra.Command, args []string) error { if len(args) < 1 { return fmt.Errorf("path is required") } return nil }, RunE: func(cmd *cobra.Command, args []string) error { fst, err := vellum.Open(args[0]) if err != nil { return err } return svgToWriter(fst, os.Stdout) }, } func svgToWriter(fst *vellum.FST, w io.Writer) error { pr, pw := io.Pipe() go func() { defer func() { _ = pw.Close() }() _ = dotToWriter(fst, pw) }() cmd := exec.Command("dot", "-Tsvg") cmd.Stdin = pr cmd.Stdout = w cmd.Stderr = ioutil.Discard err := cmd.Run() if err != nil { return err } return nil } func init() { RootCmd.AddCommand(svgCmd) } ================================================ FILE: cmd/vellum/main.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( _ "expvar" "github.com/couchbase/vellum/cmd/vellum/cmd" ) func main() { cmd.Execute() } ================================================ FILE: common.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum const maxCommon = 1<<6 - 1 func encodeCommon(in byte) byte { val := byte((int(commonInputs[in]) + 1) % 256) if val > maxCommon { return 0 } return val } func decodeCommon(in byte) byte { return commonInputsInv[in-1] } var commonInputs = []byte{ 84, // '\x00' 85, // '\x01' 86, // '\x02' 87, // '\x03' 88, // '\x04' 89, // '\x05' 90, // '\x06' 91, // '\x07' 92, // '\x08' 93, // '\t' 94, // '\n' 95, // '\x0b' 96, // '\x0c' 97, // '\r' 98, // '\x0e' 99, // '\x0f' 100, // '\x10' 101, // '\x11' 102, // '\x12' 103, // '\x13' 104, // '\x14' 105, // '\x15' 106, // '\x16' 107, // '\x17' 108, // '\x18' 109, // '\x19' 110, // '\x1a' 111, // '\x1b' 112, // '\x1c' 113, // '\x1d' 114, // '\x1e' 115, // '\x1f' 116, // ' ' 80, // '!' 117, // '"' 118, // '#' 79, // '$' 39, // '%' 30, // '&' 81, // "'" 75, // '(' 74, // ')' 82, // '*' 57, // '+' 66, // ',' 16, // '-' 12, // '.' 2, // '/' 19, // '0' 20, // '1' 21, // '2' 27, // '3' 32, // '4' 29, // '5' 35, // '6' 36, // '7' 37, // '8' 34, // '9' 24, // ':' 73, // ';' 119, // '<' 23, // '=' 120, // '>' 40, // '?' 83, // '@' 44, // 'A' 48, // 'B' 42, // 'C' 43, // 'D' 49, // 'E' 46, // 'F' 62, // 'G' 61, // 'H' 47, // 'I' 69, // 'J' 68, // 'K' 58, // 'L' 56, // 'M' 55, // 'N' 59, // 'O' 51, // 'P' 72, // 'Q' 54, // 'R' 45, // 'S' 52, // 'T' 64, // 'U' 65, // 'V' 63, // 'W' 71, // 'X' 67, // 'Y' 70, // 'Z' 77, // '[' 121, // '\\' 78, // ']' 122, // '^' 31, // '_' 123, // '`' 4, // 'a' 25, // 'b' 9, // 'c' 17, // 'd' 1, // 'e' 26, // 'f' 22, // 'g' 13, // 'h' 7, // 'i' 50, // 'j' 38, // 'k' 14, // 'l' 15, // 'm' 10, // 'n' 3, // 'o' 8, // 'p' 60, // 'q' 6, // 'r' 5, // 's' 0, // 't' 18, // 'u' 33, // 'v' 11, // 'w' 41, // 'x' 28, // 'y' 53, // 'z' 124, // '{' 125, // '|' 126, // '}' 76, // '~' 127, // '\x7f' 128, // '\x80' 129, // '\x81' 130, // '\x82' 131, // '\x83' 132, // '\x84' 133, // '\x85' 134, // '\x86' 135, // '\x87' 136, // '\x88' 137, // '\x89' 138, // '\x8a' 139, // '\x8b' 140, // '\x8c' 141, // '\x8d' 142, // '\x8e' 143, // '\x8f' 144, // '\x90' 145, // '\x91' 146, // '\x92' 147, // '\x93' 148, // '\x94' 149, // '\x95' 150, // '\x96' 151, // '\x97' 152, // '\x98' 153, // '\x99' 154, // '\x9a' 155, // '\x9b' 156, // '\x9c' 157, // '\x9d' 158, // '\x9e' 159, // '\x9f' 160, // '\xa0' 161, // '¡' 162, // '¢' 163, // '£' 164, // '¤' 165, // '¥' 166, // '¦' 167, // '§' 168, // '¨' 169, // '©' 170, // 'ª' 171, // '«' 172, // '¬' 173, // '\xad' 174, // '®' 175, // '¯' 176, // '°' 177, // '±' 178, // '²' 179, // '³' 180, // '´' 181, // 'µ' 182, // '¶' 183, // '·' 184, // '¸' 185, // '¹' 186, // 'º' 187, // '»' 188, // '¼' 189, // '½' 190, // '¾' 191, // '¿' 192, // 'À' 193, // 'Á' 194, // 'Â' 195, // 'Ã' 196, // 'Ä' 197, // 'Å' 198, // 'Æ' 199, // 'Ç' 200, // 'È' 201, // 'É' 202, // 'Ê' 203, // 'Ë' 204, // 'Ì' 205, // 'Í' 206, // 'Î' 207, // 'Ï' 208, // 'Ð' 209, // 'Ñ' 210, // 'Ò' 211, // 'Ó' 212, // 'Ô' 213, // 'Õ' 214, // 'Ö' 215, // '×' 216, // 'Ø' 217, // 'Ù' 218, // 'Ú' 219, // 'Û' 220, // 'Ü' 221, // 'Ý' 222, // 'Þ' 223, // 'ß' 224, // 'à' 225, // 'á' 226, // 'â' 227, // 'ã' 228, // 'ä' 229, // 'å' 230, // 'æ' 231, // 'ç' 232, // 'è' 233, // 'é' 234, // 'ê' 235, // 'ë' 236, // 'ì' 237, // 'í' 238, // 'î' 239, // 'ï' 240, // 'ð' 241, // 'ñ' 242, // 'ò' 243, // 'ó' 244, // 'ô' 245, // 'õ' 246, // 'ö' 247, // '÷' 248, // 'ø' 249, // 'ù' 250, // 'ú' 251, // 'û' 252, // 'ü' 253, // 'ý' 254, // 'þ' 255, // 'ÿ' } var commonInputsInv = []byte{ 't', 'e', '/', 'o', 'a', 's', 'r', 'i', 'p', 'c', 'n', 'w', '.', 'h', 'l', 'm', '-', 'd', 'u', '0', '1', '2', 'g', '=', ':', 'b', 'f', '3', 'y', '5', '&', '_', '4', 'v', '9', '6', '7', '8', 'k', '%', '?', 'x', 'C', 'D', 'A', 'S', 'F', 'I', 'B', 'E', 'j', 'P', 'T', 'z', 'R', 'N', 'M', '+', 'L', 'O', 'q', 'H', 'G', 'W', 'U', 'V', ',', 'Y', 'K', 'J', 'Z', 'X', 'Q', ';', ')', '(', '~', '[', ']', '$', '!', '\'', '*', '@', '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\t', '\n', '\x0b', '\x0c', '\r', '\x0e', '\x0f', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', ' ', '"', '#', '<', '>', '\\', '^', '`', '{', '|', '}', '\x7f', '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', } ================================================ FILE: common_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import "testing" func TestCommonInputs(t *testing.T) { // first ensure items that can be encoded round trip properly for i := 0; i < 256; i++ { roundTrip(t, byte(i)) } // G maps to 62, +1 is 63, which is highest 6-bit value we can encode enc := encodeCommon('G') if enc != 63 { t.Errorf("expected G to encode to 63, got %d", enc) } // W encodes to 63, +1 is 64, which is too big to fit enc = encodeCommon('W') if enc != 0 { t.Errorf("expected W to encode to 0, got %d", enc) } } func roundTrip(t *testing.T, b byte) { enc := encodeCommon(b) if enc > 0 { dec := decodeCommon(enc) if dec != b { t.Errorf("error round trip common input: %d", b) } } } ================================================ FILE: data/words-1000.txt ================================================ American Congress Democrat I Mr Mrs PM Republican TV a ability able about above accept according account across act action activity actually add address administration admit adult affect after again against age agency agent ago agree agreement ahead air all allow almost alone along already also although always among amount analysis and animal another answer any anyone anything appear apply approach area argue arm around arrive art article artist as ask assume at attack attention attorney audience author authority available avoid away baby back bad bag ball bank bar base be beat beautiful because become bed before begin behavior behind believe benefit best better between beyond big bill billion bit black blood blue board body book born both box boy break bring brother budget build building business but buy by call camera campaign can cancer candidate capital car card care career carry case catch cause cell center central century certain certainly chair challenge chance change character charge check child choice choose church citizen city civil claim class clear clearly close coach cold collection college color come commercial common community company compare computer concern condition conference consider consumer contain continue control cost could country couple course court cover create crime cultural culture cup current customer cut dark data daughter day dead deal death debate decade decide decision deep defense degree democratic describe design despite detail determine develop development die difference different difficult dinner direction director discover discuss discussion disease do doctor dog door down draw dream drive drop drug during each early east easy eat economic economy edge education effect effort eight either election else employee end energy enjoy enough enter entire environment environmental especially establish even evening event ever every everybody everyone everything evidence exactly example executive exist expect experience expert explain eye face fact factor fail fall family far fast father fear federal feel feeling few field fight figure fill film final finally financial find fine finger finish fire firm first fish five floor fly focus follow food foot for force foreign forget form former forward four free friend from front full fund future game garden gas general generation get girl give glass go goal good government great green ground group grow growth guess gun guy hair half hand hang happen happy hard have he head health hear heart heat heavy help her here herself high him himself his history hit hold home hope hospital hot hotel hour house how however huge human hundred husband idea identify if image imagine impact important improve in include including increase indeed indicate individual industry information inside instead institution interest interesting international interview into investment involve issue it item its itself job join just keep key kid kill kind kitchen know knowledge land language large last late later laugh law lawyer lay lead leader learn least leave left leg legal less let letter level lie life light like likely line list listen little live local long look lose loss lot love low machine magazine main maintain major majority make man manage management manager many market marriage material matter may maybe me mean measure media medical meet meeting member memory mention message method middle might military million mind minute miss mission model modern moment money month more morning most mother mouth move movement movie much music must my myself n't name nation national natural nature near nearly necessary need network never new news newspaper next nice night no none nor north not note nothing notice now number occur of off offer office officer official often oh oil ok old on once one only onto open operation opportunity option or order organization other others our out outside over own owner page pain painting paper parent part participant particular particularly partner party pass past patient pattern pay peace people per perform performance perhaps period person personal phone physical pick picture piece place plan plant play player point police policy political politics poor popular population position positive possible power practice prepare present president pressure pretty prevent price private probably problem process produce product production professional professor program project property protect prove provide public pull purpose push put quality question quickly quite race radio raise range rate rather reach read ready real reality realize really reason receive recent recently recognize record red reduce reflect region relate relationship religious remain remember remove report represent require research resource respond response responsibility rest result return reveal rich right rise risk road rock role room rule run safe same save say scene school science scientist score sea season seat second section security see seek seem sell send senior sense series serious serve service set seven several sex sexual shake share she shoot short shot should shoulder show side sign significant similar simple simply since sing single sister sit site situation six size skill skin small smile so social society soldier some somebody someone something sometimes son song soon sort sound source south southern space speak special specific speech spend sport spring staff stage stand standard star start state statement station stay step still stock stop store story strategy street strong structure student study stuff style subject success successful such suddenly suffer suggest summer support sure surface system table take talk task tax teach teacher team technology television tell ten tend term test than thank that the their them themselves then theory there these they thing think third this those though thought thousand threat three through throughout throw thus time to today together tonight too top total tough toward town trade traditional training travel treat treatment tree trial trip trouble true truth try turn two type under understand unit until up upon us use usually value various very victim view violence visit voice vote wait walk wall want war watch water way we weapon wear week weight well west western what whatever when where whether which while white who whole whom whose why wide wife will win wind window wish with within without woman wonder word work worker world worry would write writer wrong yard yeah year yes yet you young your yourself ================================================ FILE: decoder_v1.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" "encoding/binary" "fmt" "strconv" ) func init() { registerDecoder(versionV1, func(data []byte) decoder { return newDecoderV1(data) }) } type decoderV1 struct { data []byte } func newDecoderV1(data []byte) *decoderV1 { return &decoderV1{ data: data, } } func (d *decoderV1) getRoot() int { if len(d.data) < footerSizeV1 { return noneAddr } footer := d.data[len(d.data)-footerSizeV1:] root := binary.LittleEndian.Uint64(footer[8:]) return int(root) } func (d *decoderV1) getLen() int { if len(d.data) < footerSizeV1 { return 0 } footer := d.data[len(d.data)-footerSizeV1:] dlen := binary.LittleEndian.Uint64(footer) return int(dlen) } func (d *decoderV1) stateAt(addr int, prealloc fstState) (fstState, error) { state, ok := prealloc.(*fstStateV1) if ok && state != nil { *state = fstStateV1{} // clear the struct } else { state = &fstStateV1{} } err := state.at(d.data, addr) if err != nil { return nil, err } return state, nil } type fstStateV1 struct { data []byte top int bottom int numTrans int // single trans only singleTransChar byte singleTransNext bool singleTransAddr uint64 singleTransOut uint64 // shared transSize int outSize int // multiple trans only final bool transTop int transBottom int destTop int destBottom int outTop int outBottom int outFinal int } func (f *fstStateV1) isEncodedSingle() bool { if f.data[f.top]>>7 > 0 { return true } return false } func (f *fstStateV1) at(data []byte, addr int) error { f.data = data if addr == emptyAddr { return f.atZero() } else if addr == noneAddr { return f.atNone() } if addr > len(data) || addr < 16 { return fmt.Errorf("invalid address %d/%d", addr, len(data)) } f.top = addr f.bottom = addr if f.isEncodedSingle() { return f.atSingle(data, addr) } return f.atMulti(data, addr) } func (f *fstStateV1) atZero() error { f.top = 0 f.bottom = 1 f.numTrans = 0 f.final = true f.outFinal = 0 return nil } func (f *fstStateV1) atNone() error { f.top = 0 f.bottom = 1 f.numTrans = 0 f.final = false f.outFinal = 0 return nil } func (f *fstStateV1) atSingle(data []byte, addr int) error { // handle single transition case f.numTrans = 1 f.singleTransNext = data[f.top]&transitionNext > 0 f.singleTransChar = data[f.top] & maxCommon if f.singleTransChar == 0 { f.bottom-- // extra byte for uncommon f.singleTransChar = data[f.bottom] } else { f.singleTransChar = decodeCommon(f.singleTransChar) } if f.singleTransNext { // now we know the bottom, can compute next addr f.singleTransAddr = uint64(f.bottom - 1) f.singleTransOut = 0 } else { f.bottom-- // extra byte with pack sizes f.transSize, f.outSize = decodePackSize(data[f.bottom]) f.bottom -= f.transSize // exactly one trans f.singleTransAddr = readPackedUint(data[f.bottom : f.bottom+f.transSize]) if f.outSize > 0 { f.bottom -= f.outSize // exactly one out (could be length 0 though) f.singleTransOut = readPackedUint(data[f.bottom : f.bottom+f.outSize]) } else { f.singleTransOut = 0 } // need to wait till we know bottom if f.singleTransAddr != 0 { f.singleTransAddr = uint64(f.bottom) - f.singleTransAddr } } return nil } func (f *fstStateV1) atMulti(data []byte, addr int) error { // handle multiple transitions case f.final = data[f.top]&stateFinal > 0 f.numTrans = int(data[f.top] & maxNumTrans) if f.numTrans == 0 { f.bottom-- // extra byte for number of trans f.numTrans = int(data[f.bottom]) if f.numTrans == 1 { // can't really be 1 here, this is special case that means 256 f.numTrans = 256 } } f.bottom-- // extra byte with pack sizes f.transSize, f.outSize = decodePackSize(data[f.bottom]) f.transTop = f.bottom f.bottom -= f.numTrans // one byte for each transition f.transBottom = f.bottom f.destTop = f.bottom f.bottom -= f.numTrans * f.transSize f.destBottom = f.bottom if f.outSize > 0 { f.outTop = f.bottom f.bottom -= f.numTrans * f.outSize f.outBottom = f.bottom if f.final { f.bottom -= f.outSize f.outFinal = f.bottom } } return nil } func (f *fstStateV1) Address() int { return f.top } func (f *fstStateV1) Final() bool { return f.final } func (f *fstStateV1) FinalOutput() uint64 { if f.final && f.outSize > 0 { return readPackedUint(f.data[f.outFinal : f.outFinal+f.outSize]) } return 0 } func (f *fstStateV1) NumTransitions() int { return f.numTrans } func (f *fstStateV1) TransitionAt(i int) byte { if f.isEncodedSingle() { return f.singleTransChar } transitionKeys := f.data[f.transBottom:f.transTop] return transitionKeys[f.numTrans-i-1] } func (f *fstStateV1) TransitionFor(b byte) (int, int, uint64) { if f.isEncodedSingle() { if f.singleTransChar == b { return 0, int(f.singleTransAddr), f.singleTransOut } return -1, noneAddr, 0 } transitionKeys := f.data[f.transBottom:f.transTop] pos := bytes.IndexByte(transitionKeys, b) if pos < 0 { return -1, noneAddr, 0 } transDests := f.data[f.destBottom:f.destTop] dest := int(readPackedUint(transDests[pos*f.transSize : pos*f.transSize+f.transSize])) if dest > 0 { // convert delta dest = f.bottom - dest } transVals := f.data[f.outBottom:f.outTop] var out uint64 if f.outSize > 0 { out = readPackedUint(transVals[pos*f.outSize : pos*f.outSize+f.outSize]) } return f.numTrans - pos - 1, dest, out } func (f *fstStateV1) String() string { rv := "" rv += fmt.Sprintf("State: %d (%#x)", f.top, f.top) if f.final { rv += " final" fout := f.FinalOutput() if fout != 0 { rv += fmt.Sprintf(" (%d)", fout) } } rv += "\n" rv += fmt.Sprintf("Data: % x\n", f.data[f.bottom:f.top+1]) for i := 0; i < f.numTrans; i++ { transChar := f.TransitionAt(i) _, transDest, transOut := f.TransitionFor(transChar) rv += fmt.Sprintf(" - %d (%#x) '%s' ---> %d (%#x) with output: %d", transChar, transChar, string(transChar), transDest, transDest, transOut) rv += "\n" } if f.numTrans == 0 { rv += "\n" } return rv } func (f *fstStateV1) DotString(num int) string { rv := "" label := fmt.Sprintf("%d", num) final := "" if f.final { final = ",peripheries=2" } rv += fmt.Sprintf(" %d [label=\"%s\"%s];\n", f.top, label, final) for i := 0; i < f.numTrans; i++ { transChar := f.TransitionAt(i) _, transDest, transOut := f.TransitionFor(transChar) out := "" if transOut != 0 { out = fmt.Sprintf("/%d", transOut) } rv += fmt.Sprintf(" %d -> %d [label=\"%s%s\"];\n", f.top, transDest, escapeInput(transChar), out) } return rv } func escapeInput(b byte) string { x := strconv.AppendQuoteRune(nil, rune(b)) return string(x[1:(len(x) - 1)]) } ================================================ FILE: decoder_v1_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "reflect" "testing" ) func TestDecoderVersionError(t *testing.T) { _, err := loadDecoder(629, nil) if err == nil { t.Errorf("expected error loading decoder version 629, got nil") } } func TestShortHeader(t *testing.T) { header := make([]byte, 15) _, _, err := decodeHeader(header) if err == nil { t.Errorf("expected error decoding short header, got nil") } } func TestDecoderRootLen(t *testing.T) { d := newDecoderV1([]byte{1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0}) if d.getLen() != 1 { t.Fatalf("expected parsed footer length 1, got %d", d.getLen()) } if d.getRoot() != 2 { t.Fatalf("expected parsed footer length 2, got %d", d.getLen()) } } func TestDecoderStateAt(t *testing.T) { tests := []struct { desc string data []byte want *fstStateV1 }{ { "one trans, trans next, common char", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data oneTransition | transitionNext | encodeCommon('a'), // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 1, top: 16, bottom: 16, singleTransChar: 'a', singleTransNext: true, singleTransAddr: 15, }, }, { "one trans, trans next, uncommon char", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 0xff, oneTransition | transitionNext, // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 1, top: 17, bottom: 16, singleTransChar: 0xff, singleTransNext: true, singleTransAddr: 15, }, }, { "one trans, trans not next, common char", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 4, // delta address packed 1<<4 | 0, // pack sizes oneTransition | encodeCommon('a'), // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 1, top: 18, bottom: 16, singleTransChar: 'a', singleTransNext: false, singleTransAddr: 12, transSize: 1, }, }, { "one trans, trans not next, uncommon char", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 4, // delta address packed 1<<4 | 0, // pack sizes 0xff, oneTransition, // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 1, top: 19, bottom: 16, singleTransChar: 0xff, singleTransNext: false, singleTransAddr: 12, transSize: 1, }, }, { "one trans, trans not next, common char, with value", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 27, // trans value 4, // delta address packed 1<<4 | 1, // pack sizes oneTransition | encodeCommon('a'), // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 1, top: 19, bottom: 16, singleTransChar: 'a', singleTransNext: false, singleTransAddr: 12, singleTransOut: 27, transSize: 1, outSize: 1, }, }, { "one trans, trans not next, uncommon char, with value", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 39, // trans val 4, // delta address packed 1<<4 | 1, // pack sizes 0xff, oneTransition, // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 1, top: 20, bottom: 16, singleTransChar: 0xff, singleTransNext: false, singleTransAddr: 12, singleTransOut: 39, transSize: 1, outSize: 1, }, }, { "many trans, not final, no values", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 2, // delta addresses packed 3, 4, 'c', // encoded keys reversed 'b', 'a', 1<<4 | 0, // pack sizes encodeNumTrans(3), // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 3, top: 23, bottom: 16, transSize: 1, destBottom: 16, destTop: 19, transBottom: 19, transTop: 22, }, }, { "many trans, not final, with values", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 7, // values reversed 0, 3, 2, // delta addresses reversed 3, 4, 'c', // encoded keys reversed 'b', 'a', 1<<4 | 1, // pack sizes encodeNumTrans(3), // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 3, top: 26, bottom: 16, transSize: 1, outSize: 1, outBottom: 16, outTop: 19, destBottom: 19, destTop: 22, transBottom: 22, transTop: 25, }, }, { "many trans, final, with values", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data 9, // node final val 7, // values reversed 0, 3, 2, // delta addresses reversed 3, 4, 'c', // encoded keys reversed 'b', 'a', 1<<4 | 1, // pack sizes stateFinal | encodeNumTrans(3), // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ final: true, numTrans: 3, top: 27, bottom: 16, transSize: 1, outSize: 1, outBottom: 17, outTop: 20, destBottom: 20, destTop: 23, transBottom: 23, transTop: 26, outFinal: 16, }, }, { "max trans, ", []byte{ // header 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // test node data // delta addresses packed 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, // encoded keys reversed 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 1<<4 | 0, // pack sizes 1, // actual trans 1 == 256 0, // zero trans (wont fit) // footer 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, }, &fstStateV1{ numTrans: 256, top: 530, bottom: 16, transSize: 1, destBottom: 16, destTop: 272, transBottom: 272, transTop: 528, }, }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { d := newDecoderV1(test.data) test.want.data = test.data got, err := d.stateAt(len(test.data)-17, nil) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(got, test.want) { t.Errorf("wanted: %+v, got: %+v", test.want, got) } addr := got.Address() if addr != test.want.top { t.Errorf("expected address to match: %d - %d", addr, test.want.top) } fin := got.Final() if fin != test.want.final { t.Errorf("expected final to match: %t - %t", fin, test.want.final) } ntrans := got.NumTransitions() if ntrans != test.want.numTrans { t.Errorf("expected num trans to match: %d - %d", ntrans, test.want.numTrans) } }) } } func TestFSTStateFinalOutput(t *testing.T) { tests := []struct { desc string in *fstStateV1 want uint64 }{ { "final output for final state", &fstStateV1{ data: []byte{7}, numTrans: 2, final: true, outSize: 1, outFinal: 0, }, 7, }, { "final output for non-final state", &fstStateV1{ data: []byte{7}, numTrans: 2, final: false, outSize: 1, }, 0, }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { got := test.in.FinalOutput() if got != test.want { t.Errorf("wanted: %d, got: %d", test.want, got) } }) } } func TestDecodeStateZero(t *testing.T) { var state fstStateV1 err := state.at(nil, 0) if err != nil { t.Fatal(err) } if state.numTrans != 0 { t.Errorf("expected 0 states, got %d", state.numTrans) } if !state.final { t.Errorf("expected state final, got %t", state.final) } } func TestDecodeAtInvalid(t *testing.T) { var state fstStateV1 err := state.at(nil, 15) if err == nil { t.Errorf("expected error invalid address, got nil") } } func TestFSTStateTransitionAt(t *testing.T) { state := fstStateV1{ data: []byte{oneTransition | encodeCommon('a')}, numTrans: 1, singleTransChar: 'a', } got := state.TransitionAt(0) if got != state.singleTransChar { t.Errorf("expected %s got %s", string(state.singleTransChar), string(got)) } state = fstStateV1{ data: []byte{'b', 'a'}, numTrans: 2, transBottom: 0, transTop: 2, } got = state.TransitionAt(0) if got != 'a' { t.Errorf("expected %s got %s", string('a'), string(got)) } } ================================================ FILE: docs/format.md ================================================ # vellum file format v1 The v1 file format for vellum has been designed by trying to understand the file format used by [BurntSushi/fst](https://github.com/BurntSushi/fst) library. It should be binary compatible, but no attempt has been made to verify this. ## Overview The file has 3 sections: - header - edge/transition data - footer ### Header The header is 16 bytes in total. - 8 bytes version, uint64 little-endian - 8 bytes type, uint64 little-endian (currently always 0, no meaning assigned) A side-effect of this header is that when computing transition target addresses at runtime, any address < 16 is invalid. ### State/Transition Data A state is encoded with the following sections, HOWEVER, many sections are optional and omitted for various combinations of settings. In the order they occur: - node final output value (packed integer of the computed output size for this node) - n transition output values (packed integers, of the computed output size for this node, in REVERSE transition order) - n transition addresses (delta encoded, relative the lowest byte of this node, packed at the computed transition address size for this node, in REVERSE transition order) - n transition bytes (1 byte for each transition, in REVERSE transition order) - pack sizes, 1 byte, high 4 bits transition address size, low 4 bits output size - number of transitions, 1 byte (ONLY if it didn't fit in the top byte), value of 1 in this byte means 256 (1 would have fit into the top byte) - single transition byte, 1 byte (ONLY if it didn't fit in the top byte) - top byte, encodes various flags, and uses remaining bits depending on those flags, broken out separate below #### State Top Byte - high bit - 1 means, this edge has just 1 transition - 0 means, this edge has multiple transitions ##### 1 transition States - second bit flags jump to previous - 1 means this transition target is the immediately preceding state - 0 means there will transition address in the rest of the data - remaining 6 bits attempt to encode the transition byte - Obviously this requires 8 bits, but we map the most frequently used bytes into the lowest 6 bits (see common.go). If the byte we need to encode doesn't fit, we encode 0, and read it fully in the following byte. This allows the most common bytes in a single transition edge to fit into just a single byte. ##### Multiple Transition States - second bit flags final states - 1 means this is a final state - 0 means this is not a final state - remaining 6 bits attempt to encode the number of transitions - Obviously, this can require 8 bits, be we assume that many states have fewer transition, and will fit. If the number won't fit, we encode 0 here, and read it fully in the following byte. Because we could 256 transitions, that full byte still isn't enough, so we reuse the value 1 to mean 256. The value of 1 would never naturally occur in this position, since 1 transition would have fit into the top byte (NOTE: single transition states that are final are encoded as multi-transition states, but the value of 1 would fit in the top 6 bytes). ### Single Transition Jump To Previous The flag marking that a single transition state should jump to the previous state works because we encode all of the node data backwards (ie, we start processing state date with the last byte). Since, at runtime, we can always compute the lowest byte of the state we're in, we can trivially compute the start address of the previous node, just by subtracting one. This allows saving another set of bytes in many cases which would have otherwise been needed to encode that address. ### Delta Addresses All transition target addresses are delta encoded, relative to the lowest byte in the current state. ### Packed Integer Encoding For both the output values and transition target addresses, we choose a fixed size number of bytes that will work for encoding all the appropriate values in this state. Because this length will be recorded (in the pack sizes section), we don't need to use varint encoding, we can instead simply use the minimum number of bytes required. So, 8-bit values take just 1 byte, etc. This has the advantage that small values take less space, but the sizes are still fixed, so we can easily navigate without excessive computation. ### Footer The footer is 16 bytes in total. - 8 bytes number of keys, uint64 little-endian - 8 bytes root address (absolute, not delta encoded like other addresses in file), uint64 little-endian ## Encoding Streaming States are written out to the underlying writer as soon as possible. This allows us to get an early start on I/O while still building the FST, reducing the overall time to build, and it also allows us to reduce the memory consumed during the build process. Because of this, the root node will always be the last node written in the file. ================================================ FILE: encoder_v1.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "encoding/binary" "fmt" "io" ) const versionV1 = 1 const oneTransition = 1 << 7 const transitionNext = 1 << 6 const stateFinal = 1 << 6 const footerSizeV1 = 16 func init() { registerEncoder(versionV1, func(w io.Writer) encoder { return newEncoderV1(w) }) } type encoderV1 struct { bw *writer } func newEncoderV1(w io.Writer) *encoderV1 { return &encoderV1{ bw: newWriter(w), } } func (e *encoderV1) reset(w io.Writer) { e.bw.Reset(w) } func (e *encoderV1) start() error { header := make([]byte, headerSize) binary.LittleEndian.PutUint64(header, versionV1) binary.LittleEndian.PutUint64(header[8:], uint64(0)) // type n, err := e.bw.Write(header) if err != nil { return err } if n != headerSize { return fmt.Errorf("short write of header %d/%d", n, headerSize) } return nil } func (e *encoderV1) encodeState(s *builderNode, lastAddr int) (int, error) { if len(s.trans) == 0 && s.final && s.finalOutput == 0 { return 0, nil } else if len(s.trans) != 1 || s.final { return e.encodeStateMany(s) } else if !s.final && s.trans[0].out == 0 && s.trans[0].addr == lastAddr { return e.encodeStateOneFinish(s, transitionNext) } return e.encodeStateOne(s) } func (e *encoderV1) encodeStateOne(s *builderNode) (int, error) { start := uint64(e.bw.counter) outPackSize := 0 if s.trans[0].out != 0 { outPackSize = packedSize(s.trans[0].out) err := e.bw.WritePackedUintIn(s.trans[0].out, outPackSize) if err != nil { return 0, err } } delta := deltaAddr(start, uint64(s.trans[0].addr)) transPackSize := packedSize(delta) err := e.bw.WritePackedUintIn(delta, transPackSize) if err != nil { return 0, err } packSize := encodePackSize(transPackSize, outPackSize) err = e.bw.WriteByte(packSize) if err != nil { return 0, err } return e.encodeStateOneFinish(s, 0) } func (e *encoderV1) encodeStateOneFinish(s *builderNode, next byte) (int, error) { enc := encodeCommon(s.trans[0].in) // not a common input if enc == 0 { err := e.bw.WriteByte(s.trans[0].in) if err != nil { return 0, err } } err := e.bw.WriteByte(oneTransition | next | enc) if err != nil { return 0, err } return e.bw.counter - 1, nil } func (e *encoderV1) encodeStateMany(s *builderNode) (int, error) { start := uint64(e.bw.counter) transPackSize := 0 outPackSize := packedSize(s.finalOutput) anyOutputs := s.finalOutput != 0 for i := range s.trans { delta := deltaAddr(start, uint64(s.trans[i].addr)) tsize := packedSize(delta) if tsize > transPackSize { transPackSize = tsize } osize := packedSize(s.trans[i].out) if osize > outPackSize { outPackSize = osize } anyOutputs = anyOutputs || s.trans[i].out != 0 } if !anyOutputs { outPackSize = 0 } if anyOutputs { // output final value if s.final { err := e.bw.WritePackedUintIn(s.finalOutput, outPackSize) if err != nil { return 0, err } } // output transition values (in reverse) for j := len(s.trans) - 1; j >= 0; j-- { err := e.bw.WritePackedUintIn(s.trans[j].out, outPackSize) if err != nil { return 0, err } } } // output transition dests (in reverse) for j := len(s.trans) - 1; j >= 0; j-- { delta := deltaAddr(start, uint64(s.trans[j].addr)) err := e.bw.WritePackedUintIn(delta, transPackSize) if err != nil { return 0, err } } // output transition keys (in reverse) for j := len(s.trans) - 1; j >= 0; j-- { err := e.bw.WriteByte(s.trans[j].in) if err != nil { return 0, err } } packSize := encodePackSize(transPackSize, outPackSize) err := e.bw.WriteByte(packSize) if err != nil { return 0, err } numTrans := encodeNumTrans(len(s.trans)) // if number of transitions wont fit in edge header byte // write out separately if numTrans == 0 { if len(s.trans) == 256 { // this wouldn't fit in single byte, but reuse value 1 // which would have always fit in the edge header instead err = e.bw.WriteByte(1) if err != nil { return 0, err } } else { err = e.bw.WriteByte(byte(len(s.trans))) if err != nil { return 0, err } } } // finally write edge header if s.final { numTrans |= stateFinal } err = e.bw.WriteByte(numTrans) if err != nil { return 0, err } return e.bw.counter - 1, nil } func (e *encoderV1) finish(count, rootAddr int) error { footer := make([]byte, footerSizeV1) binary.LittleEndian.PutUint64(footer, uint64(count)) // root addr binary.LittleEndian.PutUint64(footer[8:], uint64(rootAddr)) // root addr n, err := e.bw.Write(footer) if err != nil { return err } if n != footerSizeV1 { return fmt.Errorf("short write of footer %d/%d", n, footerSizeV1) } err = e.bw.Flush() if err != nil { return err } return nil } ================================================ FILE: encoder_v1_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" "reflect" "testing" ) // FIXME add tests for longjmp (wider delta address) // FIXME add tests for wider values // FIXME add tests for mixed value sizes in same edge (fixed size, but padded) // FIXME add test for final state (must include final val even if 0) func TestEncoderVersionError(t *testing.T) { _, err := loadEncoder(629, nil) if err == nil { t.Errorf("expected error loading encoder version 629, got nil") } } func TestEncoderStart(t *testing.T) { var headerV1 = []byte{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } var buf bytes.Buffer e := newEncoderV1(&buf) err := e.start() if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } got := buf.Bytes() if !reflect.DeepEqual(got, headerV1) { t.Errorf("expected header: %v, got %v", headerV1, got) } } func TestEncoderStateOneNextWithCommonInput(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 'a', addr: 27, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // now encode the curr state _, err := e.encodeState(curr, 27) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ oneTransition | transitionNext | encodeCommon('a'), } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateOneNextWithUncommonInput(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 0xff, addr: 27, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // now encode the curr state _, err := e.encodeState(curr, 27) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 0xff, oneTransition | transitionNext, } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateOneNotNextWithCommonInputNoValue(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 'a', addr: 32, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 32, // delta address packed 1<<4 | 0, // pack sizes oneTransition | encodeCommon('a'), } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateOneNotNextWithUncommonInputNoValue(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 0xff, addr: 32, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 32, // delta address packed 1<<4 | 0, // pack sizes 0xff, oneTransition, } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateOneNotNextWithCommonInputWithValue(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 'a', addr: 32, out: 27, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 27, // trans value 32, // delta address packed 1<<4 | 1, // pack sizes oneTransition | encodeCommon('a'), } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateOneNotNextWithUncommonInputWithValue(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 0xff, addr: 32, out: 39, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 39, // trans val 32, // delta address packed 1<<4 | 1, // pack sizes 0xff, oneTransition, } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateManyWithNoValues(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 'a', addr: 32, }, { in: 'b', addr: 45, }, { in: 'c', addr: 52, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 12, // delta addresses packed 19, 32, 'c', // encoded keys reversed 'b', 'a', 1<<4 | 0, // pack sizes encodeNumTrans(3), } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateManyWithValues(t *testing.T) { curr := &builderNode{ trans: []transition{ { in: 'a', addr: 32, out: 3, }, { in: 'b', addr: 45, out: 0, }, { in: 'c', addr: 52, out: 7, }, }, } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want = []byte{ 7, // values reversed 0, 3, 12, // delta addresses reversed 19, 32, 'c', // encoded keys reversed 'b', 'a', 1<<4 | 1, // pack sizes encodeNumTrans(3), } got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } func TestEncoderStateMaxTransitions(t *testing.T) { testEncoderStateNTransitions(t, 256) } func TestEncoderStateMoreTransitionsThanFitInHeader(t *testing.T) { testEncoderStateNTransitions(t, 1<<6) } func testEncoderStateNTransitions(t *testing.T, n int) { curr := &builderNode{ trans: make([]transition, n), } for i := 0; i < n; i++ { curr.trans[i] = transition{ in: byte(i), addr: 32, } } var buf bytes.Buffer e := newEncoderV1(&buf) // pretend we're at a position in the file e.bw.counter = 64 // now encode the curr state _, err := e.encodeState(curr, 64) if err != nil { t.Fatal(err) } // manually flush err = e.bw.Flush() if err != nil { t.Fatal(err) } // now look at the bytes produced var want []byte // append 256 delta addresses for i := 0; i < n; i++ { want = append(want, 32) } // append transition keys (reversed) for i := n - 1; i >= 0; i-- { want = append(want, byte(i)) } // append pack sizes want = append(want, 1<<4|0) if n > 1<<6-1 { // append separate byte of pack sizes if n == 256 { // 256 is specially encoded as 1 want = append(want, 1) } else { want = append(want, byte(n)) } } // append header byte, which is all 0 in this case want = append(want, 0) got := buf.Bytes() if !reflect.DeepEqual(got, want) { t.Errorf("expected bytes: %v, got %v", want, got) } } ================================================ FILE: encoding.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "encoding/binary" "fmt" "io" ) const headerSize = 16 type encoderConstructor func(w io.Writer) encoder type decoderConstructor func([]byte) decoder var encoders = map[int]encoderConstructor{} var decoders = map[int]decoderConstructor{} type encoder interface { start() error encodeState(s *builderNode, addr int) (int, error) finish(count, rootAddr int) error reset(w io.Writer) } func loadEncoder(ver int, w io.Writer) (encoder, error) { if cons, ok := encoders[ver]; ok { return cons(w), nil } return nil, fmt.Errorf("no encoder for version %d registered", ver) } func registerEncoder(ver int, cons encoderConstructor) { encoders[ver] = cons } type decoder interface { getRoot() int getLen() int stateAt(addr int, prealloc fstState) (fstState, error) } func loadDecoder(ver int, data []byte) (decoder, error) { if cons, ok := decoders[ver]; ok { return cons(data), nil } return nil, fmt.Errorf("no decoder for version %d registered", ver) } func registerDecoder(ver int, cons decoderConstructor) { decoders[ver] = cons } func decodeHeader(header []byte) (ver int, typ int, err error) { if len(header) < headerSize { err = fmt.Errorf("invalid header < 16 bytes") return } ver = int(binary.LittleEndian.Uint64(header[0:8])) typ = int(binary.LittleEndian.Uint64(header[8:16])) return } // fstState represents a state inside the FTS runtime // It is the main contract between the FST impl and the decoder // The FST impl should work only with this interface, while only the decoder // impl knows the physical representation. type fstState interface { Address() int Final() bool FinalOutput() uint64 NumTransitions() int TransitionFor(b byte) (int, int, uint64) TransitionAt(i int) byte } ================================================ FILE: example_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum_test import ( "bytes" "fmt" "log" "github.com/couchbase/vellum" ) func Example() { var buf bytes.Buffer builder, err := vellum.New(&buf, nil) if err != nil { log.Fatal(err) } err = builder.Insert([]byte("cat"), 1) if err != nil { log.Fatal(err) } err = builder.Insert([]byte("dog"), 2) if err != nil { log.Fatal(err) } err = builder.Insert([]byte("fish"), 3) if err != nil { log.Fatal(err) } err = builder.Close() if err != nil { log.Fatal(err) } fst, err := vellum.Load(buf.Bytes()) if err != nil { log.Fatal(err) } val, exists, err := fst.Get([]byte("cat")) if err != nil { log.Fatal(err) } if exists { fmt.Println(val) } val, exists, err = fst.Get([]byte("dog")) if err != nil { log.Fatal(err) } if exists { fmt.Println(val) } val, exists, err = fst.Get([]byte("fish")) if err != nil { log.Fatal(err) } if exists { fmt.Println(val) } // Output: 1 // 2 // 3 } ================================================ FILE: fst.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "io" "github.com/willf/bitset" ) // FST is an in-memory representation of a finite state transducer, // capable of returning the uint64 value associated with // each []byte key stored, as well as enumerating all of the keys // in order. type FST struct { f io.Closer ver int len int typ int data []byte decoder decoder } func new(data []byte, f io.Closer) (rv *FST, err error) { rv = &FST{ data: data, f: f, } rv.ver, rv.typ, err = decodeHeader(data) if err != nil { return nil, err } rv.decoder, err = loadDecoder(rv.ver, rv.data) if err != nil { return nil, err } rv.len = rv.decoder.getLen() return rv, nil } // Contains returns true if this FST contains the specified key. func (f *FST) Contains(val []byte) (bool, error) { _, exists, err := f.Get(val) return exists, err } // Get returns the value associated with the key. NOTE: a value of zero // does not imply the key does not exist, you must consult the second // return value as well. func (f *FST) Get(input []byte) (uint64, bool, error) { return f.get(input, nil) } func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) { var total uint64 curr := f.decoder.getRoot() state, err := f.decoder.stateAt(curr, prealloc) if err != nil { return 0, false, err } for _, c := range input { _, curr, output := state.TransitionFor(c) if curr == noneAddr { return 0, false, nil } state, err = f.decoder.stateAt(curr, state) if err != nil { return 0, false, err } total += output } if state.Final() { total += state.FinalOutput() return total, true, nil } return 0, false, nil } // Version returns the encoding version used by this FST instance. func (f *FST) Version() int { return f.ver } // Len returns the number of entries in this FST instance. func (f *FST) Len() int { return f.len } // Type returns the type of this FST instance. func (f *FST) Type() int { return f.typ } // Close will unmap any mmap'd data (if managed by vellum) and it will close // the backing file (if managed by vellum). You MUST call Close() for any // FST instance that is created. func (f *FST) Close() error { if f.f != nil { err := f.f.Close() if err != nil { return err } } f.data = nil f.decoder = nil return nil } // Start returns the start state of this Automaton func (f *FST) Start() int { return f.decoder.getRoot() } // IsMatch returns if this state is a matching state in this Automaton func (f *FST) IsMatch(addr int) bool { match, _ := f.IsMatchWithVal(addr) return match } // CanMatch returns if this state can ever transition to a matching state // in this Automaton func (f *FST) CanMatch(addr int) bool { if addr == noneAddr { return false } return true } // WillAlwaysMatch returns if from this state the Automaton will always // be in a matching state func (f *FST) WillAlwaysMatch(int) bool { return false } // Accept returns the next state for this Automaton on input of byte b func (f *FST) Accept(addr int, b byte) int { next, _ := f.AcceptWithVal(addr, b) return next } // IsMatchWithVal returns if this state is a matching state in this Automaton // and also returns the final output value for this state func (f *FST) IsMatchWithVal(addr int) (bool, uint64) { s, err := f.decoder.stateAt(addr, nil) if err != nil { return false, 0 } return s.Final(), s.FinalOutput() } // AcceptWithVal returns the next state for this Automaton on input of byte b // and also returns the output value for the transition func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) { s, err := f.decoder.stateAt(addr, nil) if err != nil { return noneAddr, 0 } _, next, output := s.TransitionFor(b) return next, output } // Iterator returns a new Iterator capable of enumerating the key/value pairs // between the provided startKeyInclusive and endKeyExclusive. func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { return newIterator(f, startKeyInclusive, endKeyExclusive, nil) } // Search returns a new Iterator capable of enumerating the key/value pairs // between the provided startKeyInclusive and endKeyExclusive that also // satisfy the provided automaton. func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { return newIterator(f, startKeyInclusive, endKeyExclusive, aut) } // Debug is only intended for debug purposes, it simply asks the underlying // decoder visit each state, and pass it to the provided callback. func (f *FST) Debug(callback func(int, interface{}) error) error { addr := f.decoder.getRoot() set := bitset.New(uint(addr)) stack := addrStack{addr} stateNumber := 0 stack, addr = stack[:len(stack)-1], stack[len(stack)-1] for addr != noneAddr { if set.Test(uint(addr)) { stack, addr = stack.Pop() continue } set.Set(uint(addr)) state, err := f.decoder.stateAt(addr, nil) if err != nil { return err } err = callback(stateNumber, state) if err != nil { return err } for i := 0; i < state.NumTransitions(); i++ { tchar := state.TransitionAt(i) _, dest, _ := state.TransitionFor(tchar) stack = append(stack, dest) } stateNumber++ stack, addr = stack.Pop() } return nil } type addrStack []int func (a addrStack) Pop() (addrStack, int) { l := len(a) if l < 1 { return a, noneAddr } return a[:l-1], a[l-1] } // Reader() returns a Reader instance that a single thread may use to // retrieve data from the FST func (f *FST) Reader() (*Reader, error) { return &Reader{f: f}, nil } func (f *FST) GetMinKey() ([]byte, error) { var rv []byte curr := f.decoder.getRoot() state, err := f.decoder.stateAt(curr, nil) if err != nil { return nil, err } for !state.Final() { nextTrans := state.TransitionAt(0) _, curr, _ = state.TransitionFor(nextTrans) state, err = f.decoder.stateAt(curr, state) if err != nil { return nil, err } rv = append(rv, nextTrans) } return rv, nil } func (f *FST) GetMaxKey() ([]byte, error) { var rv []byte curr := f.decoder.getRoot() state, err := f.decoder.stateAt(curr, nil) if err != nil { return nil, err } for state.NumTransitions() > 0 { nextTrans := state.TransitionAt(state.NumTransitions() - 1) _, curr, _ = state.TransitionFor(nextTrans) state, err = f.decoder.stateAt(curr, state) if err != nil { return nil, err } rv = append(rv, nextTrans) } return rv, nil } // A Reader is meant for a single threaded use type Reader struct { f *FST prealloc fstStateV1 } func (r *Reader) Get(input []byte) (uint64, bool, error) { return r.f.get(input, &r.prealloc) } ================================================ FILE: fst_iterator.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" ) // Iterator represents a means of visiting key/value pairs in order. type Iterator interface { // Current() returns the key/value pair currently pointed to. // The []byte of the key is ONLY guaranteed to be valid until // another call to Next/Seek/Close. If you need it beyond that // point you MUST make a copy. Current() ([]byte, uint64) // Next() advances the iterator to the next key/value pair. // If no more key/value pairs exist, ErrIteratorDone is returned. Next() error // Seek() advances the iterator the specified key, or the next key // if it does not exist. // If no keys exist after that point, ErrIteratorDone is returned. Seek(key []byte) error // Reset resets the Iterator' internal state to allow for iterator // reuse (e.g. pooling). Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error // Close() frees any resources held by this iterator. Close() error } // FSTIterator is a structure for iterating key/value pairs in this FST in // lexicographic order. Iterators should be constructed with the FSTIterator // method on the parent FST structure. type FSTIterator struct { f *FST aut Automaton startKeyInclusive []byte endKeyExclusive []byte statesStack []fstState keysStack []byte keysPosStack []int valsStack []uint64 autStatesStack []int nextStart []byte } func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) (*FSTIterator, error) { rv := &FSTIterator{} err := rv.Reset(f, startKeyInclusive, endKeyExclusive, aut) if err != nil { return nil, err } return rv, nil } // Reset resets the Iterator' internal state to allow for iterator // reuse (e.g. pooling). func (i *FSTIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error { if aut == nil { aut = alwaysMatchAutomaton } i.f = f i.startKeyInclusive = startKeyInclusive i.endKeyExclusive = endKeyExclusive i.aut = aut return i.pointTo(startKeyInclusive) } // pointTo attempts to point us to the specified location func (i *FSTIterator) pointTo(key []byte) error { // tried to seek before start if bytes.Compare(key, i.startKeyInclusive) < 0 { key = i.startKeyInclusive } // tried to see past end if i.endKeyExclusive != nil && bytes.Compare(key, i.endKeyExclusive) > 0 { key = i.endKeyExclusive } // reset any state, pointTo always starts over i.statesStack = i.statesStack[:0] i.keysStack = i.keysStack[:0] i.keysPosStack = i.keysPosStack[:0] i.valsStack = i.valsStack[:0] i.autStatesStack = i.autStatesStack[:0] root, err := i.f.decoder.stateAt(i.f.decoder.getRoot(), nil) if err != nil { return err } autStart := i.aut.Start() maxQ := -1 // root is always part of the path i.statesStack = append(i.statesStack, root) i.autStatesStack = append(i.autStatesStack, autStart) for j := 0; j < len(key); j++ { keyJ := key[j] curr := i.statesStack[len(i.statesStack)-1] autCurr := i.autStatesStack[len(i.autStatesStack)-1] pos, nextAddr, nextVal := curr.TransitionFor(keyJ) if nextAddr == noneAddr { // needed transition doesn't exist // find last trans before the one we needed for q := curr.NumTransitions() - 1; q >= 0; q-- { if curr.TransitionAt(q) < keyJ { maxQ = q break } } break } autNext := i.aut.Accept(autCurr, keyJ) next, err := i.f.decoder.stateAt(nextAddr, nil) if err != nil { return err } i.statesStack = append(i.statesStack, next) i.keysStack = append(i.keysStack, keyJ) i.keysPosStack = append(i.keysPosStack, pos) i.valsStack = append(i.valsStack, nextVal) i.autStatesStack = append(i.autStatesStack, autNext) continue } if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 { return i.next(maxQ) } return nil } // Current returns the key and value currently pointed to by the iterator. // If the iterator is not pointing at a valid value (because Iterator/Next/Seek) // returned an error previously, it may return nil,0. func (i *FSTIterator) Current() ([]byte, uint64) { curr := i.statesStack[len(i.statesStack)-1] if curr.Final() { var total uint64 for _, v := range i.valsStack { total += v } total += curr.FinalOutput() return i.keysStack, total } return nil, 0 } // Next advances this iterator to the next key/value pair. If there is none // or the advancement goes beyond the configured endKeyExclusive, then // ErrIteratorDone is returned. func (i *FSTIterator) Next() error { return i.next(-1) } func (i *FSTIterator) next(lastOffset int) error { // remember where we started with keysStack in this next() call i.nextStart = append(i.nextStart[:0], i.keysStack...) nextOffset := lastOffset + 1 allowCompare := false OUTER: for true { curr := i.statesStack[len(i.statesStack)-1] autCurr := i.autStatesStack[len(i.autStatesStack)-1] if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare { // check to see if new keystack might have gone too far if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { return ErrIteratorDone } cmp := bytes.Compare(i.keysStack, i.nextStart) if cmp > 0 { // in final state greater than start key return nil } } numTrans := curr.NumTransitions() INNER: for nextOffset < numTrans { t := curr.TransitionAt(nextOffset) autNext := i.aut.Accept(autCurr, t) if !i.aut.CanMatch(autNext) { // TODO: potential optimization to skip nextOffset // forwards more directly to something that the // automaton likes rather than a linear scan? nextOffset += 1 continue INNER } pos, nextAddr, v := curr.TransitionFor(t) // the next slot in the statesStack might have an // fstState instance that we can reuse var nextPrealloc fstState if len(i.statesStack) < cap(i.statesStack) { nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)] } // push onto stack next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc) if err != nil { return err } i.statesStack = append(i.statesStack, next) i.keysStack = append(i.keysStack, t) i.keysPosStack = append(i.keysPosStack, pos) i.valsStack = append(i.valsStack, v) i.autStatesStack = append(i.autStatesStack, autNext) nextOffset = 0 allowCompare = true continue OUTER } // no more transitions, so need to backtrack and stack pop if len(i.statesStack) <= 1 { // stack len is 1 (root), can't go back further, we're done break } // if the top of the stack represents a linear chain of states // (i.e., a suffix of nodes linked by single transitions), // then optimize by popping the suffix in one shot without // going back all the way to the OUTER loop var popNum int for j := len(i.statesStack) - 1; j > 0; j-- { if j == 1 || i.statesStack[j].NumTransitions() != 1 { popNum = len(i.statesStack) - 1 - j break } } if popNum < 1 { // always pop at least 1 entry from the stacks popNum = 1 } nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1 allowCompare = false i.statesStack = i.statesStack[:len(i.statesStack)-popNum] i.keysStack = i.keysStack[:len(i.keysStack)-popNum] i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum] i.valsStack = i.valsStack[:len(i.valsStack)-popNum] i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum] } return ErrIteratorDone } // Seek advances this iterator to the specified key/value pair. If this key // is not in the FST, Current() will return the next largest key. If this // seek operation would go past the last key, or outside the configured // startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned. func (i *FSTIterator) Seek(key []byte) error { return i.pointTo(key) } // Close will free any resources held by this iterator. func (i *FSTIterator) Close() error { // at the moment we don't do anything, // but wanted this for API completeness return nil } ================================================ FILE: fst_iterator_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" "reflect" "testing" "github.com/couchbase/vellum/levenshtein" "github.com/couchbase/vellum/regexp" ) func TestIterator(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(smallSample, got) { t.Errorf("expected %v, got: %v", smallSample, got) } } func TestIteratorReset(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } itr, err := fst.Iterator(nil, nil) if err != nil { t.Fatalf("error creating an iterator: %v", err) } buf.Reset() b, err = New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } smallSample2 := map[string]uint64{ "bold": 25, "last": 1, "next": 500, "tank": 0, } err = insertStringMap(b, smallSample2) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err = Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } got := map[string]uint64{} err = itr.Reset(fst, nil, nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(smallSample2, got) { t.Errorf("expected %v, got: %v", smallSample2, got) } } func TestIteratorStartKey(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } // with start key < "mon", we should still get it got := map[string]uint64{} itr, err := fst.Iterator([]byte("a"), nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(smallSample, got) { t.Errorf("expected %v, got: %v", smallSample, got) } // with start key = "mon", we should still get it got = map[string]uint64{} itr, err = fst.Iterator([]byte("mon"), nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(smallSample, got) { t.Errorf("expected %v, got: %v", smallSample, got) } // with start key > "mon", we don't expect to get it expect := map[string]uint64{ "tues": smallSample["tues"], "thurs": smallSample["thurs"], "tye": smallSample["tye"], } got = map[string]uint64{} itr, err = fst.Iterator([]byte("mona"), nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // with start key > "mon", we don't expect to get it expect = map[string]uint64{ "tues": smallSample["tues"], "thurs": smallSample["thurs"], "tye": smallSample["tye"], } got = map[string]uint64{} itr, err = fst.Iterator([]byte("my"), nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } } func TestIteratorEndKey(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } // with end key > "tye", we should still get it got := map[string]uint64{} itr, err := fst.Iterator(nil, []byte("zeus")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(smallSample, got) { t.Errorf("expected %v, got: %v", smallSample, got) } // with end key = "tye", we should NOT get it (end key exclusive) expect := map[string]uint64{ "mon": smallSample["mon"], "tues": smallSample["tues"], "thurs": smallSample["thurs"], } got = map[string]uint64{} itr, err = fst.Iterator(nil, []byte("tye")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // with start key < "tye", we don't expect to get it got = map[string]uint64{} itr, err = fst.Iterator(nil, []byte("tv")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } } func TestIteratorSeek(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } // seek past thurs (exactly to tues) expect := map[string]uint64{ "mon": smallSample["mon"], "tues": smallSample["tues"], "tye": smallSample["tye"], } got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val if string(key) == "mon" { err = itr.Seek([]byte("tue")) } else { err = itr.Next() } } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // similar but seek to something after thurs before tues got = map[string]uint64{} itr, err = fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val if string(key) == "mon" { err = itr.Seek([]byte("thv")) } else { err = itr.Next() } } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // similar but seek to thurs+suffix got = map[string]uint64{} itr, err = fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val if string(key) == "mon" { err = itr.Seek([]byte("thursday")) } else { err = itr.Next() } } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // seek past last key (still inside iterator boundaries) expect = map[string]uint64{ "mon": smallSample["mon"], } got = map[string]uint64{} itr, err = fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val if string(key) == "mon" { err = itr.Seek([]byte("zzz")) } else { err = itr.Next() } } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } } func TestIteratorSeekOutsideBoundaries(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } // first test with boundaries should just see thurs/tues expect := map[string]uint64{ "thurs": smallSample["thurs"], "tues": smallSample["tues"], } got := map[string]uint64{} itr, err := fst.Iterator([]byte("th"), []byte("tuesd")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // this time try to seek before the start, // still shouldn't see mon got = map[string]uint64{} itr, err = fst.Iterator([]byte("th"), []byte("tuesd")) if err != nil { t.Fatalf("error before seeking: %v", err) } err = itr.Seek([]byte("cat")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(expect, got) { t.Errorf("expected %v, got: %v", expect, got) } // this time try to seek past the end // should see nothing itr, err = fst.Iterator([]byte("th"), []byte("tuesd")) if err != nil { t.Fatalf("error before seeking: %v", err) } err = itr.Seek([]byte("ty")) if err != ErrIteratorDone { t.Fatalf("expected ErrIteratorDone, got %v", err) } } var key []byte var val uint64 func BenchmarkFSTIteratorAllInMem(b *testing.B) { // first build the FST once dataset := thousandTestWords randomThousandVals := randomValues(dataset) var buf bytes.Buffer builder, err := New(&buf, nil) if err != nil { b.Fatalf("error creating builder: %v", err) } err = insertStrings(builder, dataset, randomThousandVals) if err != nil { b.Fatalf("error inserting thousand words: %v", err) } err = builder.Close() if err != nil { b.Fatalf("error closing builder: %v", err) } b.ResetTimer() for i := 0; i < b.N; i++ { fst, err := Load(buf.Bytes()) if err != nil { b.Fatalf("error loading FST: %v", err) } itr, err := fst.Iterator(nil, nil) for err == nil { key, val = itr.Current() err = itr.Next() } if err != ErrIteratorDone { b.Fatalf("iterator error: %v", err) } err = fst.Close() if err != nil { b.Fatalf("error closing FST: %v", err) } } } func TestFuzzySearch(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } lb, err := levenshtein.NewLevenshteinAutomatonBuilder(uint8(1), false) if err != nil { t.Fatalf("error loading set: %v", err) } fuzzy, err := lb.BuildDfa("tue", 1) if err != nil { t.Fatalf("error building levenshtein automaton: %v", err) } want := map[string]uint64{ "tues": 3, "tye": 99, } got := map[string]uint64{} itr, err := fst.Search(fuzzy, nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("expected %v, got: %v", want, got) } } func TestRegexpSearch(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } r, err := regexp.New(`t.*s`) if err != nil { t.Fatalf("error building regexp automaton: %v", err) } want := map[string]uint64{ "thurs": 5, "tues": 3, } got := map[string]uint64{} itr, err := fst.Search(r, nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("expected %v, got: %v", want, got) } got = map[string]uint64{} itr, err = fst.Search(r, []byte("t"), nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("with start key t, expected %v, got: %v", want, got) } got = map[string]uint64{} itr, err = fst.Search(r, nil, []byte("u")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("with end key u, expected %v, got: %v", want, got) } got = map[string]uint64{} itr, err = fst.Search(r, []byte("t"), []byte("u")) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("with start key t, end key u, expected %v, got: %v", want, got) } } func TestIssue32(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = b.Insert(bytes.Repeat([]byte{'a'}, 1000000), 0) if err != nil { t.Fatalf("error inserting large key: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatalf("error loading set: %v", err) } itr, err := fst.Iterator(nil, nil) for err == nil { err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } } ================================================ FILE: go.mod ================================================ module github.com/couchbase/vellum go 1.12 require ( github.com/blevesearch/mmap-go v1.0.2 github.com/spf13/cobra v0.0.5 github.com/willf/bitset v1.1.10 golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect ) ================================================ FILE: go.sum ================================================ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/blevesearch/mmap-go v1.0.2 h1:JtMHb+FgQCTTYIhtMvimw15dJwu1Y5lrZDMOFXVWPk0= github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= ================================================ FILE: levenshtein/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: levenshtein/README.md ================================================ # levenshtein levenshtein automaton This package makes it fast and simple to build a finite determinic automaton that computes the levenshtein distance from a given string. # Sample usage: ``` // build a re-usable builder lb := NewLevenshteinAutomatonBuilder(2, false) origTerm := "couchbasefts" dfa := lb.BuildDfa("couchbases", 2) ed := dfa.eval([]byte(origTerm)) if ed.distance() != 2 { log.Errorf("expected distance 2, actual: %d", ed.distance()) } ``` This implementation is inspired by [blog post](https://fulmicoton.com/posts/levenshtein/) and is intended to be a port of original rust implementation: https://github.com/tantivy-search/levenshtein-automata Micro Benchmark Results against the current vellum/levenshtein is as below. ``` BenchmarkNewEditDistance1-8 30000 52684 ns/op 89985 B/op 295 allocs/op BenchmarkOlderEditDistance1-8 10000 132931 ns/op 588892 B/op 363 allocs/op BenchmarkNewEditDistance2-8 10000 199127 ns/op 377532 B/op 1019 allocs/op BenchmarkOlderEditDistance2-8 2000 988109 ns/op 4236609 B/op 1898 allocs/op ``` ================================================ FILE: levenshtein/alphabet.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "fmt" "sort" "unicode/utf8" ) type FullCharacteristicVector []uint32 func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 { bucketID := offset / 32 align := offset - bucketID*32 if align == 0 { return fcv[bucketID] & mask } left := fcv[bucketID] >> align right := fcv[bucketID+1] << (32 - align) return (left | right) & mask } type tuple struct { char rune fcv FullCharacteristicVector } type sortRunes []rune func (s sortRunes) Less(i, j int) bool { return s[i] < s[j] } func (s sortRunes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s sortRunes) Len() int { return len(s) } func sortRune(r []rune) []rune { sort.Sort(sortRunes(r)) return r } type Alphabet struct { charset []tuple index uint32 } func (a *Alphabet) resetNext() { a.index = 0 } func (a *Alphabet) next() (rune, FullCharacteristicVector, error) { if int(a.index) >= len(a.charset) { return 0, nil, fmt.Errorf("eof") } rv := a.charset[a.index] a.index++ return rv.char, rv.fcv, nil } func dedupe(in string) string { lookUp := make(map[rune]struct{}, len(in)) var rv string for len(in) > 0 { r, size := utf8.DecodeRuneInString(in) in = in[size:] if _, ok := lookUp[r]; !ok { rv += string(r) lookUp[r] = struct{}{} } } return rv } func queryChars(qChars string) Alphabet { chars := dedupe(qChars) inChars := sortRune([]rune(chars)) charsets := make([]tuple, 0, len(inChars)) for _, c := range inChars { tempChars := qChars var bits []uint32 for len(tempChars) > 0 { var chunk string if len(tempChars) > 32 { chunk = tempChars[0:32] tempChars = tempChars[32:] } else { chunk = tempChars tempChars = tempChars[:0] } chunkBits := uint32(0) bit := uint32(1) for _, chr := range chunk { if chr == c { chunkBits |= bit } bit <<= 1 } bits = append(bits, chunkBits) } bits = append(bits, 0) charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)}) } return Alphabet{charset: charsets} } ================================================ FILE: levenshtein/alphabet_test.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import "testing" func TestAlphabet(t *testing.T) { chars := "happy" alphabet := queryChars(chars) c, chi, _ := alphabet.next() if c != 'a' { t.Errorf("expecting 'a', got: %v", c) } if chi[0] != 2 { t.Errorf("expecting 2, got: %v", chi[0]) } c, chi, _ = alphabet.next() if c != 'h' { t.Errorf("expecting 'h', got: %v", c) } if chi[0] != 1 { t.Errorf("expecting 1, got: %v", chi[0]) } c, chi, _ = alphabet.next() if c != 'p' { t.Errorf("expecting 'p', got: %v", c) } if chi[0] != 12 { t.Errorf("expecting 12, got: %v", chi[0]) } c, chi, _ = alphabet.next() if c != 'y' { t.Errorf("expecting 'y', got: %v", c) } if chi[0] != 16 { t.Errorf("expecting 16, got: %v", chi[0]) } } func TestFullCharacteristic(t *testing.T) { fcv := FullCharacteristicVector([]uint32{2, 0}) if fcv.shiftAndMask(1, 1) != 1 { t.Errorf("expected 1, got: %v", fcv.shiftAndMask(1, 1)) } fcv = FullCharacteristicVector([]uint32{1<<5 + 1<<10, 0}) if fcv.shiftAndMask(3, 63) != 4 { t.Errorf("expected 4, got: %v", fcv.shiftAndMask(3, 63)) } } func TestLongCharacteristic(t *testing.T) { qChars := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcabewa" alphabet := queryChars(qChars) c, chi, _ := alphabet.next() if c != 'a' { t.Errorf("expecting 'a', got: %v", c) } if chi.shiftAndMask(0, 7) != 7 { t.Errorf("expecting 7 , got: %v", chi.shiftAndMask(0, 7)) } if chi.shiftAndMask(28, 7) != 3 { t.Errorf("expecting 3 , got: %v", chi.shiftAndMask(28, 7)) } if chi.shiftAndMask(28, 127) != 1+2+16 { t.Errorf("expecting 19 , got: %v", chi.shiftAndMask(28, 127)) } if chi.shiftAndMask(28, 4095) != 1+2+16+256 { t.Errorf("expecting 275 , got: %v", chi.shiftAndMask(28, 4095)) } c, chi, _ = alphabet.next() if c != 'b' { t.Errorf("expecting 'b', got: %v", c) } if chi.shiftAndMask(0, 7) != 0 { t.Errorf("expecting 0 , got: %v", chi.shiftAndMask(0, 7)) } if chi.shiftAndMask(28, 15) != 4 { t.Errorf("expecting 4 , got: %v", chi.shiftAndMask(28, 15)) } if chi.shiftAndMask(28, 63) != 4+32 { t.Errorf("expecting 36 , got: %v", chi.shiftAndMask(28, 63)) } } ================================================ FILE: levenshtein/benchmark_test.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "testing" ) func BenchmarkNewEvalEditDistance1(b *testing.B) { lb, _ := NewLevenshteinAutomatonBuilder(1, true) query := "coucibase" for i := 0; i < b.N; i++ { dfa, _ := lb.BuildDfa("couchbase", 1) ed := dfa.eval([]byte(query)) if ed.distance() != 1 { b.Errorf("expected distance 1, actual: %d", ed.distance()) } } } func BenchmarkNewEvalEditDistance2(b *testing.B) { lb, _ := NewLevenshteinAutomatonBuilder(2, false) query := "couchbasefts" for i := 0; i < b.N; i++ { dfa, _ := lb.BuildDfa("couchbases", 2) ed := dfa.eval([]byte(query)) if ed.distance() != 2 { b.Errorf("expected distance 2, actual: %d", ed.distance()) } } } func BenchmarkNewEditDistance1(b *testing.B) { lb, _ := NewLevenshteinAutomatonBuilder(1, true) query := "coucibase" for i := 0; i < b.N; i++ { dfa, _ := lb.BuildDfa("couchbase", 1) state := dfa.initialState() for _, b := range []byte(query) { state = dfa.transition(state, b) } if !dfa.IsMatch(state) { b.Errorf("expected isMatch %t, got %t", true, !dfa.IsMatch(state)) } } } func BenchmarkNewEditDistance2(b *testing.B) { lb, _ := NewLevenshteinAutomatonBuilder(2, false) query := "couchbasefts" for i := 0; i < b.N; i++ { dfa, _ := lb.BuildDfa("couchbases", 2) state := dfa.initialState() for _, b := range []byte(query) { state = dfa.transition(state, b) } if !dfa.IsMatch(state) { b.Errorf("expected isMatch %t, got %t", true, !dfa.IsMatch(state)) } } } ================================================ FILE: levenshtein/dfa.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "fmt" "math" ) const SinkState = uint32(0) type DFA struct { transitions [][256]uint32 distances []Distance initState int ed uint8 } /// Returns the initial state func (d *DFA) initialState() int { return d.initState } /// Returns the Levenshtein distance associated to the /// current state. func (d *DFA) distance(stateId int) Distance { return d.distances[stateId] } /// Returns the number of states in the `DFA`. func (d *DFA) numStates() int { return len(d.transitions) } /// Returns the destination state reached after consuming a given byte. func (d *DFA) transition(fromState int, b uint8) int { return int(d.transitions[fromState][b]) } func (d *DFA) eval(bytes []uint8) Distance { state := d.initialState() for _, b := range bytes { state = d.transition(state, b) } return d.distance(state) } func (d *DFA) Start() int { return int(d.initialState()) } func (d *DFA) IsMatch(state int) bool { if _, ok := d.distance(state).(Exact); ok { return true } return false } func (d *DFA) CanMatch(state int) bool { return state > 0 && state < d.numStates() } func (d *DFA) Accept(state int, b byte) int { return int(d.transition(state, b)) } // WillAlwaysMatch returns if the specified state will always end in a // matching state. func (d *DFA) WillAlwaysMatch(state int) bool { return false } func fill(dest []uint32, val uint32) { for i := range dest { dest[i] = val } } func fillTransitions(dest *[256]uint32, val uint32) { for i := range dest { dest[i] = val } } type Utf8DFAStateBuilder struct { dfaBuilder *Utf8DFABuilder stateID uint32 defaultSuccessor []uint32 } func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8, toStateID uint32) { sb.dfaBuilder.transitions[fromStateID][b] = toStateID } func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) { fromStateID := sb.stateID chars := []byte(string(in)) lastByte := chars[len(chars)-1] for i, ch := range chars[:len(chars)-1] { remNumBytes := len(chars) - i - 1 defaultSuccessor := sb.defaultSuccessor[remNumBytes] intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch] if intermediateStateID == defaultSuccessor { intermediateStateID = sb.dfaBuilder.allocate() fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID], sb.defaultSuccessor[remNumBytes-1]) } sb.addTransitionID(fromStateID, ch, intermediateStateID) fromStateID = intermediateStateID } toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID)) sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded) } type Utf8StateId uint32 func original(stateId uint32) Utf8StateId { return predecessor(stateId, 0) } func predecessor(stateId uint32, numSteps uint8) Utf8StateId { return Utf8StateId(stateId*4 + uint32(numSteps)) } // Utf8DFABuilder makes it possible to define a DFA // that takes unicode character, and build a `DFA` // that operates on utf-8 encoded type Utf8DFABuilder struct { index []uint32 distances []Distance transitions [][256]uint32 initialState uint32 numStates uint32 maxNumStates uint32 } func withMaxStates(maxStates uint32) *Utf8DFABuilder { rv := &Utf8DFABuilder{ index: make([]uint32, maxStates*2+100), distances: make([]Distance, 0, maxStates), transitions: make([][256]uint32, 0, maxStates), maxNumStates: maxStates, } for i := range rv.index { rv.index[i] = math.MaxUint32 } return rv } func (dfab *Utf8DFABuilder) allocate() uint32 { newState := dfab.numStates dfab.numStates++ dfab.distances = append(dfab.distances, Atleast{d: 255}) dfab.transitions = append(dfab.transitions, [256]uint32{}) return newState } func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 { if int(state) >= cap(dfab.index) { cloneIndex := make([]uint32, int(state)*2) copy(cloneIndex, dfab.index) dfab.index = cloneIndex } if dfab.index[state] != math.MaxUint32 { return dfab.index[state] } nstate := dfab.allocate() dfab.index[state] = nstate return nstate } func (dfab *Utf8DFABuilder) setInitialState(iState uint32) { decodedID := dfab.getOrAllocate(original(iState)) dfab.initialState = decodedID } func (dfab *Utf8DFABuilder) build(ed uint8) *DFA { return &DFA{ transitions: dfab.transitions, distances: dfab.distances, initState: int(dfab.initialState), ed: ed, } } func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32, distance Distance) (*Utf8DFAStateBuilder, error) { if state > dfab.maxNumStates { return nil, fmt.Errorf("State id is larger than maxNumStates") } stateID := dfab.getOrAllocate(original(state)) dfab.distances[stateID] = distance defaultSuccID := dfab.getOrAllocate(original(default_suc_orig)) // creates a chain of states of predecessors of `default_suc_orig`. // Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]` // leads to the `default_suc_orig` state. predecessorStates := []uint32{defaultSuccID, defaultSuccID, defaultSuccID, defaultSuccID} for numBytes := uint8(1); numBytes < 4; numBytes++ { predecessorState := predecessor(default_suc_orig, numBytes) predecessorStateID := dfab.getOrAllocate(predecessorState) predecessorStates[numBytes] = predecessorStateID succ := predecessorStates[numBytes-1] fillTransitions(&dfab.transitions[predecessorStateID], succ) } // 1-byte encoded chars. fill(dfab.transitions[stateID][0:192], predecessorStates[0]) // 2-bytes encoded chars. fill(dfab.transitions[stateID][192:224], predecessorStates[1]) // 3-bytes encoded chars. fill(dfab.transitions[stateID][224:240], predecessorStates[2]) // 4-bytes encoded chars. fill(dfab.transitions[stateID][240:256], predecessorStates[3]) return &Utf8DFAStateBuilder{ dfaBuilder: dfab, stateID: stateID, defaultSuccessor: predecessorStates}, nil } ================================================ FILE: levenshtein/dfa_test.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "testing" ) func TestLevenshtein2(t *testing.T) { dfaBuilder := withMaxStates(2) dfaBuilder.addState(0, 1, Exact{d: 1}) dfaBuilder.addState(1, 0, Exact{d: 0}) dfaBuilder.setInitialState(1) _ = dfaBuilder.build(1) } ================================================ FILE: levenshtein/levenshtein.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import "fmt" // StateLimit is the maximum number of states allowed const StateLimit = 10000 // ErrTooManyStates is returned if you attempt to build a Levenshtein // automaton which requires too many states. var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states", StateLimit) // LevenshteinAutomatonBuilder wraps a precomputed // datastructure that allows to produce small (but not minimal) DFA. type LevenshteinAutomatonBuilder struct { pDfa *ParametricDFA } // NewLevenshteinAutomatonBuilder creates a // reusable, threadsafe Levenshtein automaton builder. // `maxDistance` - maximum distance considered by the automaton. // `transposition` - assign a distance of 1 for transposition // // Building this automaton builder is computationally intensive. // While it takes only a few milliseconds for `d=2`, it grows // exponentially with `d`. It is only reasonable to `d <= 5`. func NewLevenshteinAutomatonBuilder(maxDistance uint8, transposition bool) (*LevenshteinAutomatonBuilder, error) { lnfa := newLevenshtein(maxDistance, transposition) pdfa, err := fromNfa(lnfa) if err != nil { return nil, err } return &LevenshteinAutomatonBuilder{pDfa: pdfa}, nil } // BuildDfa builds the levenshtein automaton for serving // queries with a given edit distance. func (lab *LevenshteinAutomatonBuilder) BuildDfa(query string, fuzziness uint8) (*DFA, error) { return lab.pDfa.buildDfa(query, fuzziness, false) } // MaxDistance returns the MaxEdit distance supported by the // LevenshteinAutomatonBuilder builder. func (lab *LevenshteinAutomatonBuilder) MaxDistance() uint8 { return lab.pDfa.maxDistance } ================================================ FILE: levenshtein/levenshtein_nfa.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "math" "sort" ) /// Levenshtein Distance computed by a Levenshtein Automaton. /// /// Levenshtein automata can only compute the exact Levenshtein distance /// up to a given `max_distance`. /// /// Over this distance, the automaton will invariably /// return `Distance::AtLeast(max_distance + 1)`. type Distance interface { distance() uint8 } type Exact struct { d uint8 } func (e Exact) distance() uint8 { return e.d } type Atleast struct { d uint8 } func (a Atleast) distance() uint8 { return a.d } func characteristicVector(query []rune, c rune) uint64 { chi := uint64(0) for i := 0; i < len(query); i++ { if query[i] == c { chi |= 1 << uint64(i) } } return chi } type NFAState struct { Offset uint32 Distance uint8 InTranspose bool } type NFAStates []NFAState func (ns NFAStates) Len() int { return len(ns) } func (ns NFAStates) Less(i, j int) bool { if ns[i].Offset != ns[j].Offset { return ns[i].Offset < ns[j].Offset } if ns[i].Distance != ns[j].Distance { return ns[i].Distance < ns[j].Distance } return !ns[i].InTranspose && ns[j].InTranspose } func (ns NFAStates) Swap(i, j int) { ns[i], ns[j] = ns[j], ns[i] } func (ns *NFAState) imply(other NFAState) bool { transposeImply := ns.InTranspose if !other.InTranspose { transposeImply = !other.InTranspose } deltaOffset := ns.Offset - other.Offset if ns.Offset < other.Offset { deltaOffset = other.Offset - ns.Offset } if transposeImply { return uint32(other.Distance) >= (uint32(ns.Distance) + deltaOffset) } return uint32(other.Distance) > (uint32(ns.Distance) + deltaOffset) } type MultiState struct { states []NFAState } func (ms *MultiState) States() []NFAState { return ms.states } func (ms *MultiState) Clear() { ms.states = ms.states[:0] } func newMultiState() *MultiState { return &MultiState{states: make([]NFAState, 0)} } func (ms *MultiState) normalize() uint32 { minOffset := uint32(math.MaxUint32) for _, s := range ms.states { if s.Offset < minOffset { minOffset = s.Offset } } if minOffset == uint32(math.MaxUint32) { minOffset = 0 } for i := 0; i < len(ms.states); i++ { ms.states[i].Offset -= minOffset } sort.Sort(NFAStates(ms.states)) return minOffset } func (ms *MultiState) addStates(nState NFAState) { for _, s := range ms.states { if s.imply(nState) { return } } i := 0 for i < len(ms.states) { if nState.imply(ms.states[i]) { ms.states = append(ms.states[:i], ms.states[i+1:]...) } else { i++ } } ms.states = append(ms.states, nState) } func extractBit(bitset uint64, pos uint8) bool { shift := bitset >> pos bit := shift & 1 return bit == uint64(1) } func dist(left, right uint32) uint32 { if left > right { return left - right } return right - left } type LevenshteinNFA struct { mDistance uint8 damerau bool } func newLevenshtein(maxD uint8, transposition bool) *LevenshteinNFA { return &LevenshteinNFA{mDistance: maxD, damerau: transposition, } } func (la *LevenshteinNFA) maxDistance() uint8 { return la.mDistance } func (la *LevenshteinNFA) msDiameter() uint8 { return 2*la.mDistance + 1 } func (la *LevenshteinNFA) initialStates() *MultiState { ms := MultiState{} nfaState := NFAState{} ms.addStates(nfaState) return &ms } func (la *LevenshteinNFA) multistateDistance(ms *MultiState, queryLen uint32) Distance { minDistance := Atleast{d: la.mDistance + 1} for _, s := range ms.states { t := s.Distance + uint8(dist(queryLen, s.Offset)) if t <= uint8(la.mDistance) { if minDistance.distance() > t { minDistance.d = t } } } if minDistance.distance() == la.mDistance+1 { return Atleast{d: la.mDistance + 1} } return minDistance } func (la *LevenshteinNFA) simpleTransition(state NFAState, symbol uint64, ms *MultiState) { if state.Distance < la.mDistance { // insertion ms.addStates(NFAState{Offset: state.Offset, Distance: state.Distance + 1, InTranspose: false}) // substitution ms.addStates(NFAState{Offset: state.Offset + 1, Distance: state.Distance + 1, InTranspose: false}) n := la.mDistance + 1 - state.Distance for d := uint8(1); d < n; d++ { if extractBit(symbol, d) { // for d > 0, as many deletion and character match ms.addStates(NFAState{Offset: state.Offset + 1 + uint32(d), Distance: state.Distance + d, InTranspose: false}) } } if la.damerau && extractBit(symbol, 1) { ms.addStates(NFAState{ Offset: state.Offset, Distance: state.Distance + 1, InTranspose: true}) } } if extractBit(symbol, 0) { ms.addStates(NFAState{Offset: state.Offset + 1, Distance: state.Distance, InTranspose: false}) } if state.InTranspose && extractBit(symbol, 0) { ms.addStates(NFAState{Offset: state.Offset + 2, Distance: state.Distance, InTranspose: false}) } } func (la *LevenshteinNFA) transition(cState *MultiState, dState *MultiState, scv uint64) { dState.Clear() mask := (uint64(1) << la.msDiameter()) - uint64(1) for _, state := range cState.states { cv := (scv >> state.Offset) & mask la.simpleTransition(state, cv, dState) } sort.Sort(NFAStates(dState.states)) } func (la *LevenshteinNFA) computeDistance(query, other []rune) Distance { cState := la.initialStates() nState := newMultiState() for _, i := range other { nState.Clear() chi := characteristicVector(query, i) la.transition(cState, nState, chi) cState, nState = nState, cState } return la.multistateDistance(cState, uint32(len(query))) } ================================================ FILE: levenshtein/levenshtein_test.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "testing" ) func TestLevenshtein(t *testing.T) { hash := make(map[uint8]LevenshteinAutomatonBuilder, 4) for i := 0; i < 3; i++ { lb, err := NewLevenshteinAutomatonBuilder(uint8(i), false) if err != nil { t.Errorf("NewLevenshteinAutomatonBuilder(%d, false) failed, err: %v", i, err) } hash[uint8(i)] = *lb } tests := []struct { desc string query string distance uint8 seq []byte isMatch bool canMatch bool }{ { desc: "cat/0 - c a t", query: "cat", distance: 0, seq: []byte{'c', 'a', 't'}, isMatch: true, canMatch: true, }, { desc: "cat/1 - c a", query: "cat", distance: 1, seq: []byte{'c', 'a'}, isMatch: true, canMatch: true, }, { desc: "cat/1 - c a t s", query: "cat", distance: 1, seq: []byte{'c', 'a', 't', 's'}, isMatch: true, canMatch: true, }, { desc: "cat/0 - c a", query: "cat", distance: 0, seq: []byte{'c', 'a'}, isMatch: false, canMatch: true, }, { desc: "cat/0 - c a t s", query: "cat", distance: 0, seq: []byte{'c', 'a', 't', 's'}, isMatch: false, canMatch: false, }, { desc: "cate/1 - cate", query: "cate", distance: 1, seq: []byte{'c', 'a', 't', 'e'}, isMatch: true, canMatch: true, }, { desc: "cater/1 - cate", query: "cater", distance: 1, seq: []byte{'c', 'a', 't', 'e'}, isMatch: true, canMatch: true, }, { desc: "cater/1 - ctr", query: "cater", distance: 1, seq: []byte{'c', 't', 'r'}, isMatch: false, canMatch: false, }, { desc: "catered/2 - cater", query: "catered", distance: 2, seq: []byte{'c', 'a', 't', 'e', 'r'}, isMatch: true, canMatch: true, }, // this section contains cases where the sequence // of bytes encountered contains utf-8 encoded // multi-byte characters, which should count as 1 // for the purposes of the levenshtein edit distance { desc: "cat/0 - c 0xc3 0xa1 t (cát)", query: "cat", distance: 0, seq: []byte{'c', 0xc3, 0xa1, 't'}, isMatch: false, canMatch: false, }, { desc: "cat/1 - c 0xc3 0xa1 t (cát)", query: "cat", distance: 1, seq: []byte{'c', 0xc3, 0xa1, 't'}, isMatch: true, canMatch: true, }, { desc: "cat/1 - c 0xc3 0xa1 t (cáts)", query: "cat", distance: 1, seq: []byte{'c', 0xc3, 0xa1, 't', 's'}, isMatch: false, canMatch: false, }, { desc: "cat/1 - 0xc3 0xa1 (á)", query: "cat", distance: 1, seq: []byte{0xc3, 0xa1}, isMatch: false, canMatch: true, }, { desc: "cat/1 - c 0xc3 0xa1 t (ácat)", query: "cat", distance: 1, seq: []byte{0xc3, 0xa1, 'c', 'a', 't'}, isMatch: true, canMatch: true, }, // this section has utf-8 encoded multi-byte characters // in the query, which should still just count as 1 // for the purposes of the levenshtein edit distance { desc: "cát/0 - c a t (cat)", query: "cát", distance: 0, seq: []byte{'c', 'a', 't'}, isMatch: false, canMatch: false, }, { desc: "cát/1 - c 0xc3 0xa1 (cá)", query: "cát", distance: 1, seq: []byte{'c', 0xc3, 0xa1}, isMatch: true, canMatch: true, }, { desc: "cát/1 - c 0xc3 0xa1 s (cás)", query: "cát", distance: 1, seq: []byte{'c', 0xc3, 0xa1, 's'}, isMatch: true, canMatch: true, }, { desc: "cát/1 - c 0xc3 0xa1 t a (cáta)", query: "cát", distance: 1, seq: []byte{'c', 0xc3, 0xa1, 't', 'a'}, isMatch: true, canMatch: true, }, { desc: "cát/1 - d 0xc3 0xa1 (dát)", query: "cát", distance: 1, seq: []byte{'d', 0xc3, 0xa1, 't'}, isMatch: true, canMatch: true, }, { desc: "cát/1 - c a t (cat)", query: "cát", distance: 1, seq: []byte{'c', 'a', 't'}, isMatch: true, canMatch: true, }, { desc: "cát/1 - c a t (cats)", query: "cát", distance: 1, seq: []byte{'c', 'a', 't', 's'}, isMatch: false, canMatch: false, }, { desc: "cát/1 - 0xc3, 0xa (á)", query: "cát", distance: 1, seq: []byte{0xc3, 0xa1}, isMatch: false, canMatch: true, }, { desc: "cát/1 - a c 0xc3 0xa1 t (acát)", query: "cát", distance: 1, seq: []byte{'a', 'c', 0xc3, 0xa1, 't'}, isMatch: true, canMatch: true, }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { l, err := hash[uint8(test.distance)].pDfa.buildDfa(test.query, test.distance, false) if err != nil { t.Errorf("buildDfa(%s, %d, false) failed, err: %v", test.query, test.distance, err) } s := l.Start() for _, b := range test.seq { s = l.Accept(s, b) if uint32(s) == SinkState { break } } isMatch := l.IsMatch(s) if isMatch != test.isMatch { t.Errorf("expected isMatch %t, got %t", test.isMatch, isMatch) } canMatch := l.CanMatch(s) if canMatch != test.canMatch { t.Errorf("expectec canMatch %t, got %t", test.canMatch, canMatch) } }) } } func makeDistance(d uint8, md uint8) Distance { if d > md { return Atleast{d: md + 1} } return Exact{d: d} } func testLevenshteinNfaUtil(left, right string, ed uint8, t *testing.T) { for _, d := range []uint8{0, 1, 2, 3} { expectedDistance := makeDistance(ed, uint8(d)) lev := newLevenshtein(d, false) testSymmetric(lev, left, right, expectedDistance, t) } } func testSymmetric(lev *LevenshteinNFA, left, right string, expected Distance, t *testing.T) { levd := lev.computeDistance([]rune(left), []rune(right)) if levd.distance() != expected.distance() { t.Errorf("expected distance: %d, actual: %d", expected.distance(), levd.distance()) } levd = lev.computeDistance([]rune(right), []rune(left)) if levd.distance() != expected.distance() { t.Errorf("expected distance: %d, actual: %d", expected.distance(), levd.distance()) } } func TestLevenshteinNfa(t *testing.T) { testLevenshteinNfaUtil("abc", "abc", 0, t) testLevenshteinNfaUtil("abc", "abcd", 1, t) testLevenshteinNfaUtil("aab", "ab", 1, t) } /*func TestDeadState(t *testing.T) { nfa := newLevenshtein(2, false) pdfa := fromNfa(nfa) dfa := pdfa.buildDfa("abcdefghijklmnop", 0, false) state := dfa.initialState() r := []rune("X") state = dfa.transition(state, uint8(r[0])) if state != 0 { t.Errorf("expected state: 0, actual: %d", state) } state = dfa.transition(state, uint8(r[0])) if state != 0 { t.Errorf("expected state: 0, actual: %d", state) } state = dfa.transition(state, uint8(r[0])) if state != 0 { t.Errorf("expected state: 0, actual: %d", state) } }*/ func TestLevenshteinParametricDfa(t *testing.T) { lev := newLevenshtein(1, true) pDfa, err := fromNfa(lev) if err != nil { t.Errorf("fromNfa err: %v", err) } testStr := "abc" dfa, err := pDfa.buildDfa(testStr, 1, false) if err != nil { t.Errorf("buildDfa(%s, 1, false) failed, err: %v", testStr, err) } rd := dfa.eval([]byte("abc")) if rd.distance() != 0 { t.Errorf("expected distance 0, actual: %d", rd.distance()) } rd = dfa.eval([]byte("ab")) if rd.distance() != 1 { t.Errorf("expected distance 1, actual: %d", rd.distance()) } rd = dfa.eval([]byte("ac")) if rd.distance() != 1 { t.Errorf("expected distance 1, actual: %d", rd.distance()) } rd = dfa.eval([]byte("a")) if rd.distance() != 2 { t.Errorf("expected distance 2, actual: %d", rd.distance()) } rd = dfa.eval([]byte("abcd")) if rd.distance() != 1 { t.Errorf("expected distance 1, actual: %d", rd.distance()) } rd = dfa.eval([]byte("abdd")) if rd.distance() != 2 { t.Errorf("expected distance 2, actual: %d", rd.distance()) } testStr = "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlmnopqrstuvwxyz" dfa, err = pDfa.buildDfa(testStr, 1, false) if err != nil { t.Errorf("buildDfa(%s, 1, false) failed, err: %v", testStr, err) } sample1 := "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlnopqrstuvwxyz" + "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlmnopqrstuvwxyz" rd = dfa.eval([]byte(sample1)) if rd.distance() != 1 { t.Errorf("expected distance 1, actual: %d", rd.distance()) } sample2 := "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlnopqrstuvwxyz" + "abcdefghijlmnopqrstuvwxyz" + "abcdefghijlmnoprqstuvwxyz" rd = dfa.eval([]byte(sample2)) if rd.distance() != 2 { t.Errorf("expected distance 2, actual: %d", rd.distance()) } } func TestDamerau(t *testing.T) { nfa := newLevenshtein(2, true) testSymmetric(nfa, "abc", "abc", Exact{d: 0}, t) testSymmetric(nfa, "abc", "abcd", Exact{d: 1}, t) testSymmetric(nfa, "abcdef", "abddef", Exact{d: 1}, t) testSymmetric(nfa, "abcdef", "abdcef", Exact{d: 1}, t) } func TestLevenshteinDfa(t *testing.T) { nfa := newLevenshtein(2, false) pDfa, err := fromNfa(nfa) if err != nil { t.Errorf("fromNfa failed, err: %v", err) } dfa, err := pDfa.buildDfa("abcabcaaabc", 2, false) if err != nil { t.Errorf("buildDfa(abcabcaaabc, 1, false) failed, err: %v", err) } if dfa.numStates() != 273 { t.Errorf("expected number of states: 273, actual: %d", dfa.numStates()) } } func TestUtf8Simple(t *testing.T) { nfa := newLevenshtein(1, false) pDfa, err := fromNfa(nfa) if err != nil { t.Errorf("fromNfa failed, err: %v", err) } dfa, err := pDfa.buildDfa("あ", 1, false) if err != nil { t.Errorf("buildDfa(あ, 1, false) failed, err: %v", err) } ed := dfa.eval([]byte("あ")) if ed.distance() != 0 { t.Errorf("expected distance 0, actual: %d", ed.distance()) } } func TestSimple(t *testing.T) { query := "abcdef" nfa := newLevenshtein(2, false) pDfa, err := fromNfa(nfa) if err != nil { t.Errorf("fromNfa failed, err: %v", err) } dfa, err := pDfa.buildDfa(query, 1, false) if err != nil { t.Errorf("buildDfa(%s, 1, false) failed, err: %v", query, err) } ed := dfa.eval([]byte(query)) if ed.distance() != 0 { t.Errorf("expected distance 0, actual: %d", ed.distance()) } ed = dfa.eval([]byte("abcdf")) if ed.distance() != 1 { t.Errorf("expected distance 1, actual: %d", ed.distance()) } ed = dfa.eval([]byte("abcdgf")) if ed.distance() != 1 { t.Errorf("expected distance 1, actual: %d", ed.distance()) } ed = dfa.eval([]byte("abccdef")) if ed.distance() != 1 { t.Errorf("expected distance 1, actual: %d", ed.distance()) } } func TestJapanese(t *testing.T) { query := "寿司は焦げられない" nfa := newLevenshtein(2, false) pDfa, err := fromNfa(nfa) if err != nil { t.Errorf("fromNfa failed, err: %v", err) } dfa, err := pDfa.buildDfa(query, 2, false) if err != nil { t.Errorf("buildDfa(%s, 2, false) failed, err: %v", query, err) } ed := dfa.eval([]byte(query)) if ed.distance() != 0 { t.Errorf("expected distance 0, actual: %d", ed.distance()) } ed = dfa.eval([]byte("寿司は焦げられな")) if ed.distance() != 1 { t.Errorf("expected distance 1, actual: %d", ed.distance()) } ed = dfa.eval([]byte("寿司は焦げられなI")) if ed.distance() != 1 { t.Errorf("expected distance 1, actual: %d", ed.distance()) } ed = dfa.eval([]byte("寿司は焦られなI")) if ed.distance() != 2 { t.Errorf("expected distance 2, actual: %d", ed.distance()) } } func TestJapaneseEnglish(t *testing.T) { query := "寿a" nfa := newLevenshtein(1, false) pDfa, err := fromNfa(nfa) if err != nil { t.Errorf("fromNfa failed, err: %v", err) } dfa, err := pDfa.buildDfa(query, 1, false) if err != nil { t.Errorf("buildDfa(%s, 1, false) failed, err: %v", query, err) } ed := dfa.eval([]byte(query)) if ed.distance() != 0 { t.Errorf("expected distance 0, actual: %d", ed.distance()) } ed = dfa.eval([]byte("a")) if ed.distance() != 1 { t.Errorf("expected distance 0, actual: %d", ed.distance()) } } func TestTooManyStatesError(t *testing.T) { nfa := newLevenshtein(3, true) pDfa, err := fromNfa(nfa) if err != nil { t.Errorf("fromNfa failed, err: %v", err) } // query of length 139 characters won't generate 10K states as against // the current levenshtein limit of query length 50. lengthQuery := "1234567890123456789012345678901234567890123456789" + // 50 chars "1234567890123456789012345678901234567890123456789" + // 50 chars "1234567890123456789012345678901234567890" // 40 chars (total 140) _, err = pDfa.buildDfa(lengthQuery, 1, false) if err != ErrTooManyStates { t.Errorf("buildDfa(%s, 1, false) expected to fail with err: %v", lengthQuery, ErrTooManyStates) } } ================================================ FILE: levenshtein/parametric_dfa.go ================================================ // Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package levenshtein import ( "crypto/md5" "encoding/json" "fmt" "math" ) type ParametricState struct { shapeID uint32 offset uint32 } func newParametricState() ParametricState { return ParametricState{} } func (ps *ParametricState) isDeadEnd() bool { return ps.shapeID == 0 } type Transition struct { destShapeID uint32 deltaOffset uint32 } func (t *Transition) apply(state ParametricState) ParametricState { ps := ParametricState{ shapeID: t.destShapeID} // don't need any offset if we are in the dead state, // this ensures we have only one dead state. if t.destShapeID != 0 { ps.offset = state.offset + t.deltaOffset } return ps } type ParametricStateIndex struct { stateIndex []uint32 stateQueue []ParametricState numOffsets uint32 } func newParametricStateIndex(queryLen, numParamState uint32) ParametricStateIndex { numOffsets := queryLen + 1 if numParamState == 0 { numParamState = numOffsets } maxNumStates := numParamState * numOffsets psi := ParametricStateIndex{ stateIndex: make([]uint32, maxNumStates), stateQueue: make([]ParametricState, 0, 150), numOffsets: numOffsets, } for i := uint32(0); i < maxNumStates; i++ { psi.stateIndex[i] = math.MaxUint32 } return psi } func (psi *ParametricStateIndex) numStates() int { return len(psi.stateQueue) } func (psi *ParametricStateIndex) maxNumStates() int { return len(psi.stateIndex) } func (psi *ParametricStateIndex) get(stateID uint32) ParametricState { return psi.stateQueue[stateID] } func (psi *ParametricStateIndex) getOrAllocate(ps ParametricState) uint32 { bucket := ps.shapeID*psi.numOffsets + ps.offset if bucket < uint32(len(psi.stateIndex)) && psi.stateIndex[bucket] != math.MaxUint32 { return psi.stateIndex[bucket] } nState := uint32(len(psi.stateQueue)) psi.stateQueue = append(psi.stateQueue, ps) psi.stateIndex[bucket] = nState return nState } type ParametricDFA struct { distance []uint8 transitions []Transition maxDistance uint8 transitionStride uint32 diameter uint32 } func (pdfa *ParametricDFA) initialState() ParametricState { return ParametricState{shapeID: 1} } // Returns true iff whatever characters come afterward, // we will never reach a shorter distance func (pdfa *ParametricDFA) isPrefixSink(state ParametricState, queryLen uint32) bool { if state.isDeadEnd() { return true } remOffset := queryLen - state.offset if remOffset < pdfa.diameter { stateDistances := pdfa.distance[pdfa.diameter*state.shapeID:] prefixDistance := stateDistances[remOffset] if prefixDistance > pdfa.maxDistance { return false } for _, d := range stateDistances { if d < prefixDistance { return false } } return true } return false } func (pdfa *ParametricDFA) numStates() int { return len(pdfa.transitions) / int(pdfa.transitionStride) } func min(x, y uint32) uint32 { if x < y { return x } return y } func (pdfa *ParametricDFA) transition(state ParametricState, chi uint32) Transition { return pdfa.transitions[pdfa.transitionStride*state.shapeID+chi] } func (pdfa *ParametricDFA) getDistance(state ParametricState, qLen uint32) Distance { remainingOffset := qLen - state.offset if state.isDeadEnd() || remainingOffset >= pdfa.diameter { return Atleast{d: pdfa.maxDistance + 1} } dist := pdfa.distance[int(pdfa.diameter*state.shapeID)+int(remainingOffset)] if dist > pdfa.maxDistance { return Atleast{d: dist} } return Exact{d: dist} } func (pdfa *ParametricDFA) computeDistance(left, right string) Distance { state := pdfa.initialState() leftChars := []rune(left) for _, chr := range []rune(right) { start := state.offset stop := min(start+pdfa.diameter, uint32(len(leftChars))) chi := characteristicVector(leftChars[start:stop], chr) transition := pdfa.transition(state, uint32(chi)) state = transition.apply(state) if state.isDeadEnd() { return Atleast{d: pdfa.maxDistance + 1} } } return pdfa.getDistance(state, uint32(len(left))) } func (pdfa *ParametricDFA) buildDfa(query string, distance uint8, prefix bool) (*DFA, error) { qLen := uint32(len([]rune(query))) alphabet := queryChars(query) psi := newParametricStateIndex(qLen, uint32(pdfa.numStates())) maxNumStates := psi.maxNumStates() deadEndStateID := psi.getOrAllocate(newParametricState()) if deadEndStateID != 0 { return nil, fmt.Errorf("Invalid dead end state") } initialStateID := psi.getOrAllocate(pdfa.initialState()) dfaBuilder := withMaxStates(uint32(maxNumStates)) mask := uint32((1 << pdfa.diameter) - 1) var stateID int for stateID = 0; stateID < StateLimit; stateID++ { if stateID == psi.numStates() { break } state := psi.get(uint32(stateID)) if prefix && pdfa.isPrefixSink(state, qLen) { distance := pdfa.getDistance(state, qLen) dfaBuilder.addState(uint32(stateID), uint32(stateID), distance) } else { transition := pdfa.transition(state, 0) defSuccessor := transition.apply(state) defSuccessorID := psi.getOrAllocate(defSuccessor) distance := pdfa.getDistance(state, qLen) stateBuilder, err := dfaBuilder.addState(uint32(stateID), defSuccessorID, distance) if err != nil { return nil, fmt.Errorf("parametric_dfa: buildDfa, err: %v", err) } alphabet.resetNext() chr, cv, err := alphabet.next() for err == nil { chi := cv.shiftAndMask(state.offset, mask) transition := pdfa.transition(state, chi) destState := transition.apply(state) destStateID := psi.getOrAllocate(destState) stateBuilder.addTransition(chr, destStateID) chr, cv, err = alphabet.next() } } } if stateID == StateLimit { return nil, ErrTooManyStates } dfaBuilder.setInitialState(initialStateID) return dfaBuilder.build(distance), nil } func fromNfa(nfa *LevenshteinNFA) (*ParametricDFA, error) { lookUp := newHash() lookUp.getOrAllocate(*newMultiState()) initialState := nfa.initialStates() lookUp.getOrAllocate(*initialState) maxDistance := nfa.maxDistance() msDiameter := nfa.msDiameter() numChi := 1 << msDiameter chiValues := make([]uint64, numChi) for i := 0; i < numChi; i++ { chiValues[i] = uint64(i) } transitions := make([]Transition, 0, numChi*int(msDiameter)) var stateID int for stateID = 0; stateID < StateLimit; stateID++ { if stateID == len(lookUp.items) { break } for _, chi := range chiValues { destMs := newMultiState() ms := lookUp.getFromID(stateID) nfa.transition(ms, destMs, chi) translation := destMs.normalize() destID := lookUp.getOrAllocate(*destMs) transitions = append(transitions, Transition{ destShapeID: uint32(destID), deltaOffset: translation, }) } } if stateID == StateLimit { return nil, ErrTooManyStates } ns := len(lookUp.items) diameter := int(msDiameter) distances := make([]uint8, 0, diameter*ns) for stateID := 0; stateID < ns; stateID++ { ms := lookUp.getFromID(stateID) for offset := 0; offset < diameter; offset++ { dist := nfa.multistateDistance(ms, uint32(offset)) distances = append(distances, dist.distance()) } } return &ParametricDFA{ diameter: uint32(msDiameter), transitions: transitions, maxDistance: maxDistance, transitionStride: uint32(numChi), distance: distances, }, nil } type hash struct { index map[[16]byte]int items []MultiState } func newHash() *hash { return &hash{ index: make(map[[16]byte]int, 100), items: make([]MultiState, 0, 100), } } func (h *hash) getOrAllocate(m MultiState) int { size := len(h.items) var exists bool var pos int md5 := getHash(&m) if pos, exists = h.index[md5]; !exists { h.index[md5] = size pos = size h.items = append(h.items, m) } return pos } func (h *hash) getFromID(id int) *MultiState { return &h.items[id] } func getHash(ms *MultiState) [16]byte { msBytes := []byte{} for _, state := range ms.states { jsonBytes, _ := json.Marshal(&state) msBytes = append(msBytes, jsonBytes...) } return md5.Sum(msBytes) } ================================================ FILE: merge_iterator.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" ) // MergeFunc is used to choose the new value for a key when merging a slice // of iterators, and the same key is observed with multiple values. // Values presented to the MergeFunc will be in the same order as the // original slice creating the MergeIterator. This allows some MergeFunc // implementations to prioritize one iterator over another. type MergeFunc func([]uint64) uint64 // MergeIterator implements the Iterator interface by traversing a slice // of iterators and merging the contents of them. If the same key exists // in mulitipe underlying iterators, a user-provided MergeFunc will be // invoked to choose the new value. type MergeIterator struct { itrs []Iterator f MergeFunc currKs [][]byte currVs []uint64 lowK []byte lowV uint64 lowIdxs []int mergeV []uint64 } // NewMergeIterator creates a new MergeIterator over the provided slice of // Iterators and with the specified MergeFunc to resolve duplicate keys. func NewMergeIterator(itrs []Iterator, f MergeFunc) (*MergeIterator, error) { rv := &MergeIterator{ itrs: itrs, f: f, currKs: make([][]byte, len(itrs)), currVs: make([]uint64, len(itrs)), lowIdxs: make([]int, 0, len(itrs)), mergeV: make([]uint64, 0, len(itrs)), } rv.init() if rv.lowK == nil { return rv, ErrIteratorDone } return rv, nil } func (m *MergeIterator) init() { for i, itr := range m.itrs { m.currKs[i], m.currVs[i] = itr.Current() } m.updateMatches() } func (m *MergeIterator) updateMatches() { if len(m.itrs) < 1 { return } m.lowK = m.currKs[0] m.lowIdxs = m.lowIdxs[:0] m.lowIdxs = append(m.lowIdxs, 0) for i := 1; i < len(m.itrs); i++ { if m.currKs[i] == nil { continue } cmp := bytes.Compare(m.currKs[i], m.lowK) if m.lowK == nil || cmp < 0 { // reached a new low m.lowK = m.currKs[i] m.lowIdxs = m.lowIdxs[:0] m.lowIdxs = append(m.lowIdxs, i) } else if cmp == 0 { m.lowIdxs = append(m.lowIdxs, i) } } if len(m.lowIdxs) > 1 { // merge multiple values m.mergeV = m.mergeV[:0] for _, vi := range m.lowIdxs { m.mergeV = append(m.mergeV, m.currVs[vi]) } m.lowV = m.f(m.mergeV) } else if len(m.lowIdxs) == 1 { m.lowV = m.currVs[m.lowIdxs[0]] } } // Current returns the key and value currently pointed to by this iterator. // If the iterator is not pointing at a valid value (because Iterator/Next/Seek) // returned an error previously, it may return nil,0. func (m *MergeIterator) Current() ([]byte, uint64) { return m.lowK, m.lowV } // Next advances this iterator to the next key/value pair. If there is none, // then ErrIteratorDone is returned. func (m *MergeIterator) Next() error { // move all the current low iterators to next for _, vi := range m.lowIdxs { err := m.itrs[vi].Next() if err != nil && err != ErrIteratorDone { return err } m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current() } m.updateMatches() if m.lowK == nil { return ErrIteratorDone } return nil } // Seek advances this iterator to the specified key/value pair. If this key // is not in the FST, Current() will return the next largest key. If this // seek operation would go past the last key, then ErrIteratorDone is returned. func (m *MergeIterator) Seek(key []byte) error { for i := range m.itrs { err := m.itrs[i].Seek(key) if err != nil && err != ErrIteratorDone { return err } } m.updateMatches() if m.lowK == nil { return ErrIteratorDone } return nil } // Close will attempt to close all the underlying Iterators. If any errors // are encountered, the first will be returned. func (m *MergeIterator) Close() error { var rv error for i := range m.itrs { // close all iterators, return first error if any err := m.itrs[i].Close() if rv == nil { rv = err } } return rv } // MergeMin chooses the minimum value func MergeMin(vals []uint64) uint64 { rv := vals[0] for _, v := range vals[1:] { if v < rv { rv = v } } return rv } // MergeMax chooses the maximum value func MergeMax(vals []uint64) uint64 { rv := vals[0] for _, v := range vals[1:] { if v > rv { rv = v } } return rv } // MergeSum sums the values func MergeSum(vals []uint64) uint64 { rv := vals[0] for _, v := range vals[1:] { rv += v } return rv } ================================================ FILE: merge_iterator_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "reflect" "sort" "testing" ) func TestMergeIterator(t *testing.T) { tests := []struct { desc string in []map[string]uint64 merge MergeFunc want map[string]uint64 }{ { desc: "two non-empty iterators with no duplicate keys", in: []map[string]uint64{ { "a": 1, "c": 3, "e": 5, }, { "b": 2, "d": 4, "f": 6, }, }, merge: func(mvs []uint64) uint64 { return mvs[0] }, want: map[string]uint64{ "a": 1, "c": 3, "e": 5, "b": 2, "d": 4, "f": 6, }, }, { desc: "two non-empty iterators with duplicate keys summed", in: []map[string]uint64{ { "a": 1, "c": 3, "e": 5, }, { "a": 2, "c": 4, "e": 6, }, }, merge: func(mvs []uint64) uint64 { var rv uint64 for _, mv := range mvs { rv += mv } return rv }, want: map[string]uint64{ "a": 3, "c": 7, "e": 11, }, }, { desc: "non-working example", in: []map[string]uint64{ { "mon": 2, "tues": 3, "thurs": 5, "tye": 99, }, { "bold": 25, "last": 1, "next": 500, "tank": 0, }, }, merge: func(mvs []uint64) uint64 { return mvs[0] }, want: map[string]uint64{ "mon": 2, "tues": 3, "thurs": 5, "tye": 99, "bold": 25, "last": 1, "next": 500, "tank": 0, }, }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { var itrs []Iterator for i := range test.in { itr, err := newTestIterator(test.in[i]) if err != nil && err != ErrIteratorDone { t.Fatalf("error creating iterator: %v", err) } if err == nil { itrs = append(itrs, itr) } } mi, err := NewMergeIterator(itrs, test.merge) if err != nil && err != ErrIteratorDone { t.Fatalf("error creating iterator: %v", err) } got := make(map[string]uint64) for err == nil { currk, currv := mi.Current() err = mi.Next() got[string(currk)] = currv } if err != nil && err != ErrIteratorDone { t.Fatalf("error iterating: %v", err) } if !reflect.DeepEqual(got, test.want) { t.Errorf("expected %v, got %v", test.want, got) } }) } } type testIterator struct { vals map[int]uint64 keys []string curr int } func newTestIterator(in map[string]uint64) (*testIterator, error) { rv := &testIterator{ vals: make(map[int]uint64, len(in)), } for k := range in { rv.keys = append(rv.keys, k) } sort.Strings(rv.keys) for i, k := range rv.keys { rv.vals[i] = in[k] } return rv, nil } func (m *testIterator) Current() ([]byte, uint64) { if m.curr >= len(m.keys) { return nil, 0 } return []byte(m.keys[m.curr]), m.vals[m.curr] } func (m *testIterator) Next() error { m.curr++ if m.curr >= len(m.keys) { return ErrIteratorDone } return nil } func (m *testIterator) Seek(key []byte) error { m.curr = sort.SearchStrings(m.keys, string(key)) if m.curr >= len(m.keys) { return ErrIteratorDone } return nil } func (m *testIterator) Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error { return nil } func (m *testIterator) Close() error { return nil } func (m *testIterator) Exists(key []byte) (bool, error) { return false, nil } func TestMergeFunc(t *testing.T) { tests := []struct { desc string in []uint64 merge MergeFunc want uint64 }{ { desc: "min", in: []uint64{5, 99, 1}, merge: MergeMin, want: 1, }, { desc: "max", in: []uint64{5, 99, 1}, merge: MergeMax, want: 99, }, { desc: "sum", in: []uint64{5, 99, 1}, merge: MergeSum, want: 105, }, } for _, test := range tests { t.Run(test.desc, func(t *testing.T) { got := test.merge(test.in) if test.want != got { t.Errorf("expected %d, got %d", test.want, got) } }) } } func TestEmptyMergeIterator(t *testing.T) { mi, err := NewMergeIterator([]Iterator{}, MergeMin) if err != ErrIteratorDone { t.Fatalf("expected iterator done, got %v", err) } // should get valid merge iterator anyway if mi == nil { t.Fatalf("expected non-nil merge iterator") } // current returns nil, 0 per interface spec ck, cv := mi.Current() if ck != nil { t.Errorf("expected current to return nil key, got %v", ck) } if cv != 0 { t.Errorf("expected current to return 0 val, got %d", cv) } // calling Next/Seek continues to return ErrIteratorDone err = mi.Next() if err != ErrIteratorDone { t.Errorf("expected iterator done, got %v", err) } err = mi.Seek([]byte("anywhere")) if err != ErrIteratorDone { t.Errorf("expected iterator done, got %v", err) } err = mi.Close() if err != nil { t.Errorf("error closing %v", err) } } ================================================ FILE: pack.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum func deltaAddr(base, trans uint64) uint64 { // transition dest of 0 is special case if trans == 0 { return 0 } return base - trans } const packOutMask = 1<<4 - 1 func encodePackSize(transSize, outSize int) byte { var rv byte rv = byte(transSize << 4) rv |= byte(outSize) return rv } func decodePackSize(pack byte) (transSize int, packSize int) { transSize = int(pack >> 4) packSize = int(pack & packOutMask) return } const maxNumTrans = 1<<6 - 1 func encodeNumTrans(n int) byte { if n <= maxNumTrans { return byte(n) } return 0 } func readPackedUint(data []byte) (rv uint64) { for i := range data { shifted := uint64(data[i]) << uint(i*8) rv |= shifted } return } ================================================ FILE: pack_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "fmt" "testing" ) func TestEncodeDecodePackSize(t *testing.T) { for i := 0; i <= 8; i++ { for j := 0; j <= 8; j++ { got := encodePackSize(i, j) goti, gotj := decodePackSize(got) if goti != i || gotj != j { t.Errorf("failed to round trip %d,%d packed as %b to %d,%d", i, j, got, goti, gotj) } } } } func TestEncodeNumTrans(t *testing.T) { tests := []struct { input int want byte }{ {0, 0}, {5, 5}, {1<<6 - 1, 1<<6 - 1}, {1 << 6, 0}, } for _, test := range tests { t.Run(fmt.Sprintf("input %d", test.input), func(t *testing.T) { got := encodeNumTrans(test.input) if got != test.want { t.Errorf("wanted: %d, got: %d", test.want, got) } }) } } ================================================ FILE: regexp/compile.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import ( "regexp/syntax" "unicode" unicode_utf8 "unicode/utf8" "github.com/couchbase/vellum/utf8" ) type compiler struct { sizeLimit uint insts prog instsPool []inst sequences utf8.Sequences rangeStack utf8.RangeStack startBytes []byte endBytes []byte } func newCompiler(sizeLimit uint) *compiler { return &compiler{ sizeLimit: sizeLimit, startBytes: make([]byte, unicode_utf8.UTFMax), endBytes: make([]byte, unicode_utf8.UTFMax), } } func (c *compiler) compile(ast *syntax.Regexp) (prog, error) { err := c.c(ast) if err != nil { return nil, err } inst := c.allocInst() inst.op = OpMatch c.insts = append(c.insts, inst) return c.insts, nil } func (c *compiler) c(ast *syntax.Regexp) (err error) { if ast.Flags&syntax.NonGreedy > 1 { return ErrNoLazy } switch ast.Op { case syntax.OpEndLine, syntax.OpBeginLine, syntax.OpBeginText, syntax.OpEndText: return ErrNoEmpty case syntax.OpWordBoundary, syntax.OpNoWordBoundary: return ErrNoWordBoundary case syntax.OpEmptyMatch: return nil case syntax.OpLiteral: for _, r := range ast.Rune { if ast.Flags&syntax.FoldCase > 0 { next := syntax.Regexp{ Op: syntax.OpCharClass, Flags: ast.Flags & syntax.FoldCase, Rune0: [2]rune{r, r}, } next.Rune = next.Rune0[0:2] // try to find more folded runes for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) { next.Rune = append(next.Rune, r1, r1) } err = c.c(&next) if err != nil { return err } } else { c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) if err != nil { return err } for _, seq := range c.sequences { c.compileUtf8Ranges(seq) } } } case syntax.OpAnyChar: next := syntax.Regexp{ Op: syntax.OpCharClass, Flags: ast.Flags & syntax.FoldCase, Rune0: [2]rune{0, unicode.MaxRune}, } next.Rune = next.Rune0[:2] return c.c(&next) case syntax.OpAnyCharNotNL: next := syntax.Regexp{ Op: syntax.OpCharClass, Flags: ast.Flags & syntax.FoldCase, Rune: []rune{0, 0x09, 0x0B, unicode.MaxRune}, } return c.c(&next) case syntax.OpCharClass: return c.compileClass(ast) case syntax.OpCapture: return c.c(ast.Sub[0]) case syntax.OpConcat: for _, sub := range ast.Sub { err := c.c(sub) if err != nil { return err } } return nil case syntax.OpAlternate: if len(ast.Sub) == 0 { return nil } jmpsToEnd := make([]uint, 0, len(ast.Sub)-1) // does not handle last entry for i := 0; i < len(ast.Sub)-1; i++ { sub := ast.Sub[i] split := c.emptySplit() j1 := c.top() err := c.c(sub) if err != nil { return err } jmpsToEnd = append(jmpsToEnd, c.emptyJump()) j2 := c.top() c.setSplit(split, j1, j2) } // handle last entry err := c.c(ast.Sub[len(ast.Sub)-1]) if err != nil { return err } end := uint(len(c.insts)) for _, jmpToEnd := range jmpsToEnd { c.setJump(jmpToEnd, end) } case syntax.OpQuest: split := c.emptySplit() j1 := c.top() err := c.c(ast.Sub[0]) if err != nil { return err } j2 := c.top() c.setSplit(split, j1, j2) case syntax.OpStar: j1 := c.top() split := c.emptySplit() j2 := c.top() err := c.c(ast.Sub[0]) if err != nil { return err } jmp := c.emptyJump() j3 := uint(len(c.insts)) c.setJump(jmp, j1) c.setSplit(split, j2, j3) case syntax.OpPlus: j1 := c.top() err := c.c(ast.Sub[0]) if err != nil { return err } split := c.emptySplit() j2 := c.top() c.setSplit(split, j1, j2) case syntax.OpRepeat: if ast.Max == -1 { for i := 0; i < ast.Min; i++ { err := c.c(ast.Sub[0]) if err != nil { return err } } next := syntax.Regexp{ Op: syntax.OpStar, Flags: ast.Flags, Sub: ast.Sub, Sub0: ast.Sub0, Rune: ast.Rune, Rune0: ast.Rune0, } return c.c(&next) } for i := 0; i < ast.Min; i++ { err := c.c(ast.Sub[0]) if err != nil { return err } } splits := make([]uint, 0, ast.Max-ast.Min) starts := make([]uint, 0, ast.Max-ast.Min) for i := ast.Min; i < ast.Max; i++ { splits = append(splits, c.emptySplit()) starts = append(starts, uint(len(c.insts))) err := c.c(ast.Sub[0]) if err != nil { return err } } end := uint(len(c.insts)) for i := 0; i < len(splits); i++ { c.setSplit(splits[i], starts[i], end) } } return c.checkSize() } func (c *compiler) checkSize() error { if uint(len(c.insts)*instSize) > c.sizeLimit { return ErrCompiledTooBig } return nil } func (c *compiler) compileClass(ast *syntax.Regexp) error { if len(ast.Rune) == 0 { return nil } jmps := make([]uint, 0, len(ast.Rune)-2) // does not do last pair for i := 0; i < len(ast.Rune)-2; i += 2 { rstart := ast.Rune[i] rend := ast.Rune[i+1] split := c.emptySplit() j1 := c.top() err := c.compileClassRange(rstart, rend) if err != nil { return err } jmps = append(jmps, c.emptyJump()) j2 := c.top() c.setSplit(split, j1, j2) } // handle last pair rstart := ast.Rune[len(ast.Rune)-2] rend := ast.Rune[len(ast.Rune)-1] err := c.compileClassRange(rstart, rend) if err != nil { return err } end := c.top() for _, jmp := range jmps { c.setJump(jmp, end) } return nil } func (c *compiler) compileClassRange(startR, endR rune) (err error) { c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes) if err != nil { return err } jmps := make([]uint, 0, len(c.sequences)-1) // does not do last entry for i := 0; i < len(c.sequences)-1; i++ { seq := c.sequences[i] split := c.emptySplit() j1 := c.top() c.compileUtf8Ranges(seq) jmps = append(jmps, c.emptyJump()) j2 := c.top() c.setSplit(split, j1, j2) } // handle last entry c.compileUtf8Ranges(c.sequences[len(c.sequences)-1]) end := c.top() for _, jmp := range jmps { c.setJump(jmp, end) } return nil } func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) { for _, r := range seq { inst := c.allocInst() inst.op = OpRange inst.rangeStart = r.Start inst.rangeEnd = r.End c.insts = append(c.insts, inst) } } func (c *compiler) emptySplit() uint { inst := c.allocInst() inst.op = OpSplit c.insts = append(c.insts, inst) return c.top() - 1 } func (c *compiler) emptyJump() uint { inst := c.allocInst() inst.op = OpJmp c.insts = append(c.insts, inst) return c.top() - 1 } func (c *compiler) setSplit(i, pc1, pc2 uint) { split := c.insts[i] split.splitA = pc1 split.splitB = pc2 } func (c *compiler) setJump(i, pc uint) { jmp := c.insts[i] jmp.to = pc } func (c *compiler) top() uint { return uint(len(c.insts)) } func (c *compiler) allocInst() *inst { if len(c.instsPool) <= 0 { c.instsPool = make([]inst, 16) } inst := &c.instsPool[0] c.instsPool = c.instsPool[1:] return inst } ================================================ FILE: regexp/compile_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import ( "reflect" "regexp/syntax" "testing" ) func TestCompiler(t *testing.T) { tests := []struct { query string wantInsts prog wantErr error }{ { query: "", wantInsts: []*inst{ {op: OpMatch}, }, wantErr: nil, }, { query: "^", wantErr: ErrNoEmpty, }, { query: `\b`, wantErr: ErrNoWordBoundary, }, { query: `.*?`, wantErr: ErrNoLazy, }, { query: `a`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpMatch}, }, }, { query: `[a-c]`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'c'}, {op: OpMatch}, }, }, { query: `(a)`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpMatch}, }, }, { query: `a?`, wantInsts: []*inst{ {op: OpSplit, splitA: 1, splitB: 2}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpMatch}, }, }, { query: `a*`, wantInsts: []*inst{ {op: OpSplit, splitA: 1, splitB: 3}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpJmp, to: 0}, {op: OpMatch}, }, }, { query: `a+`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpSplit, splitA: 0, splitB: 2}, {op: OpMatch}, }, }, { query: `a{2,4}`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpSplit, splitA: 3, splitB: 6}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpSplit, splitA: 5, splitB: 6}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpMatch}, }, }, { query: `a{3,}`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpSplit, splitA: 4, splitB: 6}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpJmp, to: 3}, {op: OpMatch}, }, }, { query: `a+|b+`, wantInsts: []*inst{ {op: OpSplit, splitA: 1, splitB: 4}, {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpSplit, splitA: 1, splitB: 3}, {op: OpJmp, to: 6}, {op: OpRange, rangeStart: 'b', rangeEnd: 'b'}, {op: OpSplit, splitA: 4, splitB: 6}, {op: OpMatch}, }, }, { query: `a+b+`, wantInsts: []*inst{ {op: OpRange, rangeStart: 'a', rangeEnd: 'a'}, {op: OpSplit, splitA: 0, splitB: 2}, {op: OpRange, rangeStart: 'b', rangeEnd: 'b'}, {op: OpSplit, splitA: 2, splitB: 4}, {op: OpMatch}, }, }, { query: `.`, wantInsts: []*inst{ {op: OpSplit, splitA: 1, splitB: 3}, {op: OpRange, rangeStart: 0, rangeEnd: 0x09}, {op: OpJmp, to: 46}, // match ascii, less than 0x0a {op: OpSplit, splitA: 4, splitB: 6}, {op: OpRange, rangeStart: 0x0b, rangeEnd: 0x7f}, {op: OpJmp, to: 46}, // match rest ascii {op: OpSplit, splitA: 7, splitB: 10}, {op: OpRange, rangeStart: 0xc2, rangeEnd: 0xdf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpSplit, splitA: 11, splitB: 15}, {op: OpRange, rangeStart: 0xe0, rangeEnd: 0xe0}, {op: OpRange, rangeStart: 0xa0, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpSplit, splitA: 16, splitB: 20}, {op: OpRange, rangeStart: 0xe1, rangeEnd: 0xec}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpSplit, splitA: 21, splitB: 25}, {op: OpRange, rangeStart: 0xed, rangeEnd: 0xed}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0x9f}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpSplit, splitA: 26, splitB: 30}, {op: OpRange, rangeStart: 0xee, rangeEnd: 0xef}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpSplit, splitA: 31, splitB: 36}, {op: OpRange, rangeStart: 0xf0, rangeEnd: 0xf0}, {op: OpRange, rangeStart: 0x90, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpSplit, splitA: 37, splitB: 42}, {op: OpRange, rangeStart: 0xf1, rangeEnd: 0xf3}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpJmp, to: 46}, // match {op: OpRange, rangeStart: 0xf4, rangeEnd: 0xf4}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0x8f}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpRange, rangeStart: 0x80, rangeEnd: 0xbf}, {op: OpMatch}, }, }, } for _, test := range tests { t.Run(test.query, func(t *testing.T) { p, err := syntax.Parse(test.query, syntax.Perl) if err != nil { t.Fatalf("error parsing regexp: %v", err) } c := newCompiler(10000) gotInsts, gotErr := c.compile(p) if !reflect.DeepEqual(test.wantErr, gotErr) { t.Errorf("expected error: %v, got error: %v", test.wantErr, gotErr) } if !reflect.DeepEqual(test.wantInsts, gotInsts) { t.Errorf("expected insts: %v, got insts:%v", test.wantInsts, gotInsts) } }) } } ================================================ FILE: regexp/dfa.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import ( "encoding/binary" "fmt" ) // StateLimit is the maximum number of states allowed const StateLimit = 10000 // ErrTooManyStates is returned if you attempt to build a Levenshtein // automaton which requires too many states. var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states", StateLimit) type dfaBuilder struct { dfa *dfa cache map[string]int keyBuf []byte } func newDfaBuilder(insts prog) *dfaBuilder { d := &dfaBuilder{ dfa: &dfa{ insts: insts, states: make([]state, 0, 16), }, cache: make(map[string]int, 1024), } // add 0 state that is invalid d.dfa.states = append(d.dfa.states, state{ next: make([]int, 256), match: false, }) return d } func (d *dfaBuilder) build() (*dfa, error) { cur := newSparseSet(uint(len(d.dfa.insts))) next := newSparseSet(uint(len(d.dfa.insts))) d.dfa.add(cur, 0) ns, instsReuse := d.cachedState(cur, nil) states := intStack{ns} seen := make(map[int]struct{}) var s int states, s = states.Pop() for s != 0 { for b := 0; b < 256; b++ { var ns int ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse) if ns != 0 { if _, ok := seen[ns]; !ok { seen[ns] = struct{}{} states = states.Push(ns) } } if len(d.dfa.states) > StateLimit { return nil, ErrTooManyStates } } states, s = states.Pop() } return d.dfa, nil } func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) ( int, []uint) { cur.Clear() for _, ip := range d.dfa.states[state].insts { cur.Add(ip) } d.dfa.run(cur, next, b) var nextState int nextState, instsReuse = d.cachedState(next, instsReuse) d.dfa.states[state].next[b] = nextState return nextState, instsReuse } func instsKey(insts []uint, buf []byte) []byte { if cap(buf) < 8*len(insts) { buf = make([]byte, 8*len(insts)) } else { buf = buf[0 : 8*len(insts)] } for i, inst := range insts { binary.LittleEndian.PutUint64(buf[i*8:], uint64(inst)) } return buf } func (d *dfaBuilder) cachedState(set *sparseSet, instsReuse []uint) (int, []uint) { insts := instsReuse[:0] if cap(insts) == 0 { insts = make([]uint, 0, set.Len()) } var isMatch bool for i := uint(0); i < uint(set.Len()); i++ { ip := set.Get(i) switch d.dfa.insts[ip].op { case OpRange: insts = append(insts, ip) case OpMatch: isMatch = true insts = append(insts, ip) } } if len(insts) == 0 { return 0, insts } d.keyBuf = instsKey(insts, d.keyBuf) v, ok := d.cache[string(d.keyBuf)] if ok { return v, insts } d.dfa.states = append(d.dfa.states, state{ insts: insts, next: make([]int, 256), match: isMatch, }) newV := len(d.dfa.states) - 1 d.cache[string(d.keyBuf)] = newV return newV, nil } type dfa struct { insts prog states []state } func (d *dfa) add(set *sparseSet, ip uint) { if set.Contains(ip) { return } set.Add(ip) switch d.insts[ip].op { case OpJmp: d.add(set, d.insts[ip].to) case OpSplit: d.add(set, d.insts[ip].splitA) d.add(set, d.insts[ip].splitB) } } func (d *dfa) run(from, to *sparseSet, b byte) bool { to.Clear() var isMatch bool for i := uint(0); i < uint(from.Len()); i++ { ip := from.Get(i) switch d.insts[ip].op { case OpMatch: isMatch = true case OpRange: if d.insts[ip].rangeStart <= b && b <= d.insts[ip].rangeEnd { d.add(to, ip+1) } } } return isMatch } type state struct { insts []uint next []int match bool } type intStack []int func (s intStack) Push(v int) intStack { return append(s, v) } func (s intStack) Pop() (intStack, int) { l := len(s) if l < 1 { return s, 0 } return s[:l-1], s[l-1] } ================================================ FILE: regexp/inst.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import "fmt" // instOp represents a instruction operation type instOp int // the enumeration of operations const ( OpMatch instOp = iota OpJmp OpSplit OpRange ) // instSize is the approximate size of the an inst struct in bytes const instSize = 40 type inst struct { op instOp to uint splitA uint splitB uint rangeStart byte rangeEnd byte } func (i *inst) String() string { switch i.op { case OpJmp: return fmt.Sprintf("JMP: %d", i.to) case OpSplit: return fmt.Sprintf("SPLIT: %d - %d", i.splitA, i.splitB) case OpRange: return fmt.Sprintf("RANGE: %x - %x", i.rangeStart, i.rangeEnd) } return "MATCH" } type prog []*inst func (p prog) String() string { rv := "\n" for i, pi := range p { rv += fmt.Sprintf("%d %v\n", i, pi) } return rv } ================================================ FILE: regexp/regexp.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import ( "fmt" "regexp/syntax" ) // ErrNoEmpty returned when "zero width assertions" are used var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed") // ErrNoWordBoundary returned when word boundaries are used var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed") // ErrNoBytes returned when byte literals are used var ErrNoBytes = fmt.Errorf("byte literals are not allowed") // ErrNoLazy returned when lazy quantifiers are used var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed") // ErrCompiledTooBig returned when regular expression parses into // too many instructions var ErrCompiledTooBig = fmt.Errorf("too many instructions") var DefaultLimit = uint(10 * (1 << 20)) // Regexp implements the vellum.Automaton interface for matcing a user // specified regular expression. type Regexp struct { orig string dfa *dfa } // NewRegexp creates a new Regular Expression automaton with the specified // expression. By default it is limited to approximately 10MB for the // compiled finite state automaton. If this size is exceeded, // ErrCompiledTooBig will be returned. func New(expr string) (*Regexp, error) { return NewWithLimit(expr, DefaultLimit) } // NewRegexpWithLimit creates a new Regular Expression automaton with // the specified expression. The size of the compiled finite state // automaton exceeds the user specified size, ErrCompiledTooBig will be // returned. func NewWithLimit(expr string, size uint) (*Regexp, error) { parsed, err := syntax.Parse(expr, syntax.Perl) if err != nil { return nil, err } return NewParsedWithLimit(expr, parsed, size) } func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) { compiler := newCompiler(size) insts, err := compiler.compile(parsed) if err != nil { return nil, err } dfaBuilder := newDfaBuilder(insts) dfa, err := dfaBuilder.build() if err != nil { return nil, err } return &Regexp{ orig: expr, dfa: dfa, }, nil } // Start returns the start state of this automaton. func (r *Regexp) Start() int { return 1 } // IsMatch returns if the specified state is a matching state. func (r *Regexp) IsMatch(s int) bool { if s < len(r.dfa.states) { return r.dfa.states[s].match } return false } // CanMatch returns if the specified state can ever transition to a matching // state. func (r *Regexp) CanMatch(s int) bool { if s < len(r.dfa.states) && s > 0 { return true } return false } // WillAlwaysMatch returns if the specified state will always end in a // matching state. func (r *Regexp) WillAlwaysMatch(int) bool { return false } // Accept returns the new state, resulting from the transition byte b // when currently in the state s. func (r *Regexp) Accept(s int, b byte) int { if s < len(r.dfa.states) { return r.dfa.states[s].next[b] } return 0 } ================================================ FILE: regexp/regexp_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import ( "fmt" "testing" ) func TestRegexp(t *testing.T) { tests := []struct { query string seq []byte isMatch bool canMatch bool }{ { query: ``, seq: []byte{}, isMatch: true, canMatch: true, }, // test simple literal { query: `a`, seq: []byte{'a'}, isMatch: true, canMatch: true, }, { query: `a`, seq: []byte{}, isMatch: false, canMatch: true, }, { query: `a`, seq: []byte{'a', 'b'}, isMatch: false, canMatch: false, }, // test actual pattern { query: `wat.r`, seq: []byte{'x'}, isMatch: false, canMatch: false, }, { query: `wat.r`, seq: []byte{'w', 'a', 't'}, isMatch: false, canMatch: true, }, { query: `wat.r`, seq: []byte{'w', 'a', 't', 'e'}, isMatch: false, canMatch: true, }, { query: `wat.r`, seq: []byte{'w', 'a', 't', 'e', 'r'}, isMatch: true, canMatch: true, }, { query: `wat.r`, seq: []byte{'w', 'a', 't', 'e', 'r', 's'}, isMatch: false, canMatch: false, }, // test alternation { query: `a+|b+`, seq: []byte{}, isMatch: false, canMatch: true, }, { query: `a+|b+`, seq: []byte{'a'}, isMatch: true, canMatch: true, }, { query: `a+|b+`, seq: []byte{'b'}, isMatch: true, canMatch: true, }, { query: `a+|b+`, seq: []byte{'a', 'a'}, isMatch: true, canMatch: true, }, { query: `a+|b+`, seq: []byte{'b', 'b'}, isMatch: true, canMatch: true, }, { query: `a+|b+`, seq: []byte{'a', 'b'}, isMatch: false, canMatch: false, }, { query: `a+|b+`, seq: []byte{'b', 'a'}, isMatch: false, canMatch: false, }, // test others { query: `[a-z]?[1-9]*`, seq: []byte{}, isMatch: true, canMatch: true, }, { query: `[a-z]?[1-9]*`, seq: []byte{'a'}, isMatch: true, canMatch: true, }, { query: `[a-z]?[1-9]*`, seq: []byte{'a', '1'}, isMatch: true, canMatch: true, }, { query: `[a-z]?[1-9]*`, seq: []byte{'a', '1', '2'}, isMatch: true, canMatch: true, }, { query: `[a-z]?[1-9]*`, seq: []byte{'a', '1', '2', 'z'}, isMatch: false, canMatch: false, }, { query: `[a-z]?[1-9]*`, seq: []byte{'a', 'b'}, isMatch: false, canMatch: false, }, // basic case insensitive match literals { query: `(?i)mArTy`, seq: []byte{'m', 'a', 'r', 't', 'y'}, isMatch: true, canMatch: true, }, { query: `(?i)marty`, seq: []byte{'m', 'A', 'r', 'T', 'y'}, isMatch: true, canMatch: true, }, // case insensitive character class { query: `(?i)[d-f]*`, seq: []byte{'D', 'e', 'e', 'F'}, isMatch: true, canMatch: true, }, // case insensitive, with case sensitive pattern in the middle { query: `(?i)[d-f]*(?-i:m)wow`, seq: []byte{'D', 'e', 'e', 'F', 'm', 'W', 'o', 'W'}, isMatch: true, canMatch: true, }, // (?i)caseless(?-i)cased { query: `(?i)[d-f]*(?-i)wOw`, seq: []byte{'D', 'e', 'e', 'F', 'w', 'O', 'w'}, isMatch: true, canMatch: true, }, // from: https://docs.rs/crate/regex-syntax/0.2.4/source/src/lib.rs // `(?i)[^x]` really should match any character sans `x` and `X`, but if `[^x]` is negated // before being case folded, you'll end up matching any character. { query: `(?i)[^x]`, seq: []byte{'a'}, isMatch: true, canMatch: true, }, { query: `(?i)[^x]`, seq: []byte{'x'}, isMatch: false, canMatch: false, }, { query: `(?i)[^x]`, seq: []byte{'X'}, isMatch: false, canMatch: false, }, } for _, test := range tests { t.Run(fmt.Sprintf("%s - %v", test.query, test.seq), func(t *testing.T) { r, err := New(test.query) if err != nil { t.Fatal(err) } s := r.Start() for _, b := range test.seq { s = r.Accept(s, b) } isMatch := r.IsMatch(s) if isMatch != test.isMatch { t.Errorf("expected isMatch %t, got %t", test.isMatch, isMatch) } canMatch := r.CanMatch(s) if canMatch != test.canMatch { t.Errorf("expectec canMatch %t, got %t", test.canMatch, canMatch) } }) } } func BenchmarkNewWildcard(b *testing.B) { for i := 0; i < b.N; i++ { New("my.*h") } } ================================================ FILE: regexp/sparse.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp type sparseSet struct { dense []uint sparse []uint size uint } func newSparseSet(size uint) *sparseSet { return &sparseSet{ dense: make([]uint, size), sparse: make([]uint, size), size: 0, } } func (s *sparseSet) Len() int { return int(s.size) } func (s *sparseSet) Add(ip uint) uint { i := s.size s.dense[i] = ip s.sparse[ip] = i s.size++ return i } func (s *sparseSet) Get(i uint) uint { return s.dense[i] } func (s *sparseSet) Contains(ip uint) bool { i := s.sparse[ip] return i < s.size && s.dense[i] == ip } func (s *sparseSet) Clear() { s.size = 0 } ================================================ FILE: regexp/sparse_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package regexp import "testing" func TestSparse(t *testing.T) { s := newSparseSet(10) if s.Contains(0) { t.Errorf("expected not to contain 0") } s.Add(3) if !s.Contains(3) { t.Errorf("expected to contains 3, did not") } if s.Len() != 1 { t.Errorf("expected len 1, got %d", s.Len()) } if s.Get(0) != 3 { t.Errorf("expected 10, got %d", s.Get(0)) } s.Clear() if s.Len() != 0 { t.Errorf("expected len 0, got %d", s.Len()) } } ================================================ FILE: registry.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum type registryCell struct { addr int node *builderNode } type registry struct { builderNodePool *builderNodePool table []registryCell tableSize uint mruSize uint } func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry { nsize := tableSize * mruSize rv := ®istry{ builderNodePool: p, table: make([]registryCell, nsize), tableSize: uint(tableSize), mruSize: uint(mruSize), } return rv } func (r *registry) Reset() { var empty registryCell for i := range r.table { r.builderNodePool.Put(r.table[i].node) r.table[i] = empty } } func (r *registry) entry(node *builderNode) (bool, int, *registryCell) { if len(r.table) == 0 { return false, 0, nil } bucket := r.hash(node) start := r.mruSize * uint(bucket) end := start + r.mruSize rc := registryCache(r.table[start:end]) return rc.entry(node, r.builderNodePool) } const fnvPrime = 1099511628211 func (r *registry) hash(b *builderNode) int { var final uint64 if b.final { final = 1 } var h uint64 = 14695981039346656037 h = (h ^ final) * fnvPrime h = (h ^ b.finalOutput) * fnvPrime for _, t := range b.trans { h = (h ^ uint64(t.in)) * fnvPrime h = (h ^ t.out) * fnvPrime h = (h ^ uint64(t.addr)) * fnvPrime } return int(h % uint64(r.tableSize)) } type registryCache []registryCell func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) { if len(r) == 1 { if r[0].node != nil && r[0].node.equiv(node) { return true, r[0].addr, nil } pool.Put(r[0].node) r[0].node = node return false, 0, &r[0] } for i := range r { if r[i].node != nil && r[i].node.equiv(node) { addr := r[i].addr r.promote(i) return true, addr, nil } } // no match last := len(r) - 1 pool.Put(r[last].node) r[last].node = node // discard LRU r.promote(last) return false, 0, &r[0] } func (r registryCache) promote(i int) { for i > 0 { r.swap(i-1, i) i-- } } func (r registryCache) swap(i, j int) { r[i], r[j] = r[j], r[i] } ================================================ FILE: registry_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import "testing" // FIXME add tests for MRU func TestRegistry(t *testing.T) { p := &builderNodePool{} r := newRegistry(p, 10, 1) n1 := &builderNode{ trans: []transition{ { in: 'a', addr: 1, }, { in: 'b', addr: 2, }, { in: 'c', addr: 3, }, }, } // first look, doesn't exist found, _, cell := r.entry(n1) if found { t.Errorf("expected empty registry to not have equivalent") } cell.addr = 276 // second look, does var nowAddr int found, nowAddr, _ = r.entry(n1) if !found { t.Errorf("expected to find equivalent after registering it") } if nowAddr != 276 { t.Errorf("expected to get addr 276, got %d", nowAddr) } } ================================================ FILE: transducer.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum // Transducer represents the general contract of a byte-based finite transducer type Transducer interface { // all transducers are also automatons Automaton // IsMatchWithValue returns true if and only if the state is a match // additionally it returns a states final value (if any) IsMatchWithVal(int) (bool, uint64) // Accept returns the next state given the input to the specified state // additionally it returns the value associated with the transition AcceptWithVal(int, byte) (int, uint64) } // TransducerGet implements an generic Get() method which works // on any implementation of Transducer // The caller MUST check the boolean return value for a match. // Zero is a valid value regardless of match status, // and if it is NOT a match, the value collected so far is returned. func TransducerGet(t Transducer, k []byte) (bool, uint64) { var total uint64 i := 0 curr := t.Start() for t.CanMatch(curr) && i < len(k) { var transVal uint64 curr, transVal = t.AcceptWithVal(curr, k[i]) if curr == noneAddr { break } total += transVal i++ } if i != len(k) { return false, total } match, finalVal := t.IsMatchWithVal(curr) return match, total + finalVal } ================================================ FILE: utf8/utf8.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package utf8 import ( "fmt" "unicode/utf8" ) // Sequences is a collection of Sequence type Sequences []Sequence // NewSequences constructs a collection of Sequence which describe the // byte ranges covered between the start and end runes. func NewSequences(start, end rune) (Sequences, error) { rv, _, err := NewSequencesPrealloc(start, end, nil, nil, nil, nil) return rv, err } func NewSequencesPrealloc(start, end rune, preallocSequences Sequences, preallocRangeStack RangeStack, preallocStartBytes, preallocEndBytes []byte) (Sequences, RangeStack, error) { rv := preallocSequences[:0] startBytes := preallocStartBytes if cap(startBytes) < utf8.UTFMax { startBytes = make([]byte, utf8.UTFMax) } startBytes = startBytes[:utf8.UTFMax] endBytes := preallocEndBytes if cap(endBytes) < utf8.UTFMax { endBytes = make([]byte, utf8.UTFMax) } endBytes = endBytes[:utf8.UTFMax] rangeStack := preallocRangeStack[:0] rangeStack = rangeStack.Push(scalarRange{start, end}) rangeStack, r := rangeStack.Pop() TOP: for r != nilScalarRange { INNER: for { r1, r2 := r.split() if r1 != nilScalarRange { rangeStack = rangeStack.Push(scalarRange{r2.start, r2.end}) r.start = r1.start r.end = r1.end continue INNER } if !r.valid() { rangeStack, r = rangeStack.Pop() continue TOP } for i := 1; i < utf8.UTFMax; i++ { max := maxScalarValue(i) if r.start <= max && max < r.end { rangeStack = rangeStack.Push(scalarRange{max + 1, r.end}) r.end = max continue INNER } } asciiRange := r.ascii() if asciiRange != nilRange { rv = append(rv, Sequence{ asciiRange, }) rangeStack, r = rangeStack.Pop() continue TOP } for i := uint(1); i < utf8.UTFMax; i++ { m := rune((1 << (6 * i)) - 1) if (r.start & ^m) != (r.end & ^m) { if (r.start & m) != 0 { rangeStack = rangeStack.Push(scalarRange{(r.start | m) + 1, r.end}) r.end = r.start | m continue INNER } if (r.end & m) != m { rangeStack = rangeStack.Push(scalarRange{r.end & ^m, r.end}) r.end = (r.end & ^m) - 1 continue INNER } } } n, m := r.encode(startBytes, endBytes) seq, err := SequenceFromEncodedRange(startBytes[0:n], endBytes[0:m]) if err != nil { return nil, nil, err } rv = append(rv, seq) rangeStack, r = rangeStack.Pop() continue TOP } } return rv, rangeStack, nil } // Sequence is a collection of Range type Sequence []Range // SequenceFromEncodedRange creates sequence from the encoded bytes func SequenceFromEncodedRange(start, end []byte) (Sequence, error) { if len(start) != len(end) { return nil, fmt.Errorf("byte slices must be the same length") } switch len(start) { case 2: return Sequence{ Range{start[0], end[0]}, Range{start[1], end[1]}, }, nil case 3: return Sequence{ Range{start[0], end[0]}, Range{start[1], end[1]}, Range{start[2], end[2]}, }, nil case 4: return Sequence{ Range{start[0], end[0]}, Range{start[1], end[1]}, Range{start[2], end[2]}, Range{start[3], end[3]}, }, nil } return nil, fmt.Errorf("invalid encoded byte length") } // Matches checks to see if the provided byte slice matches the Sequence func (u Sequence) Matches(bytes []byte) bool { if len(bytes) < len(u) { return false } for i := 0; i < len(u); i++ { if !u[i].matches(bytes[i]) { return false } } return true } func (u Sequence) String() string { switch len(u) { case 1: return fmt.Sprintf("%v", u[0]) case 2: return fmt.Sprintf("%v%v", u[0], u[1]) case 3: return fmt.Sprintf("%v%v%v", u[0], u[1], u[2]) case 4: return fmt.Sprintf("%v%v%v%v", u[0], u[1], u[2], u[3]) default: return fmt.Sprintf("invalid utf8 sequence") } } // Range describes a single range of byte values type Range struct { Start byte End byte } var nilRange = Range{0xff, 0} func (u Range) matches(b byte) bool { if u.Start <= b && b <= u.End { return true } return false } func (u Range) String() string { if u.Start == u.End { return fmt.Sprintf("[%X]", u.Start) } return fmt.Sprintf("[%X-%X]", u.Start, u.End) } type scalarRange struct { start rune end rune } var nilScalarRange = scalarRange{0xffff, 0} func (s *scalarRange) String() string { return fmt.Sprintf("ScalarRange(%d,%d)", s.start, s.end) } // split this scalar range if it overlaps with a surrogate codepoint func (s *scalarRange) split() (scalarRange, scalarRange) { if s.start < 0xe000 && s.end > 0xd7ff { return scalarRange{ start: s.start, end: 0xd7ff, }, scalarRange{ start: 0xe000, end: s.end, } } return nilScalarRange, nilScalarRange } func (s *scalarRange) valid() bool { return s.start <= s.end } func (s *scalarRange) ascii() Range { if s.valid() && s.end <= 0x7f { return Range{ Start: byte(s.start), End: byte(s.end), } } return nilRange } // start and end MUST have capacity for utf8.UTFMax bytes func (s *scalarRange) encode(start, end []byte) (int, int) { n := utf8.EncodeRune(start, s.start) m := utf8.EncodeRune(end, s.end) return n, m } type RangeStack []scalarRange func (s RangeStack) Push(v scalarRange) RangeStack { return append(s, v) } func (s RangeStack) Pop() (RangeStack, scalarRange) { l := len(s) if l < 1 { return s, nilScalarRange } return s[:l-1], s[l-1] } func maxScalarValue(nbytes int) rune { switch nbytes { case 1: return 0x007f case 2: return 0x07FF case 3: return 0xFFFF default: return 0x10FFFF } } ================================================ FILE: utf8/utf8_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package utf8 import ( "fmt" "reflect" "testing" "unicode/utf8" ) func TestUtf8Sequences(t *testing.T) { want := Sequences{ Sequence{ Range{0x0, 0x7f}, }, Sequence{ Range{0xc2, 0xdf}, Range{0x80, 0xbf}, }, Sequence{ Range{0xe0, 0xe0}, Range{0xa0, 0xbf}, Range{0x80, 0xbf}, }, Sequence{ Range{0xe1, 0xec}, Range{0x80, 0xbf}, Range{0x80, 0xbf}, }, Sequence{ Range{0xed, 0xed}, Range{0x80, 0x9f}, Range{0x80, 0xbf}, }, Sequence{ Range{0xee, 0xef}, Range{0x80, 0xbf}, Range{0x80, 0xbf}, }, } got, err := NewSequences(0, 0xffff) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(want, got) { t.Errorf("wanted: %v, got %v", want, got) } } func TestCodepointsNoSurrogates(t *testing.T) { neverAcceptsSurrogateCodepoints(0x0, 0xFFFF) neverAcceptsSurrogateCodepoints(0x0, 0x10FFFF) neverAcceptsSurrogateCodepoints(0x0, 0x10FFFE) neverAcceptsSurrogateCodepoints(0x80, 0x10FFFF) neverAcceptsSurrogateCodepoints(0xD7FF, 0xE000) } func neverAcceptsSurrogateCodepoints(start, end rune) error { var buf = make([]byte, utf8.UTFMax) sequences, err := NewSequences(start, end) if err != nil { return err } for i := start; i < end; i++ { n := utf8.EncodeRune(buf, i) for _, seq := range sequences { if seq.Matches(buf[:n]) { return fmt.Errorf("utf8 seq: %v matches surrogate %d", seq, i) } } } return nil } ================================================ FILE: vellum.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /* Package vellum is a library for building, serializing and executing an FST (finite state transducer). There are two distinct phases, building an FST and using it. When building an FST, you insert keys ([]byte) and their associated value (uint64). Insert operations MUST be done in lexicographic order. While building the FST, data is streamed to an underlying Writer. At the conclusion of building, you MUST call Close() on the builder. After completion of the build phase, you can either Open() the FST if you serialized it to disk. Alternatively, if you already have the bytes in memory, you can use Load(). By default, Open() will use mmap to avoid loading the entire file into memory. Once the FST is ready, you can use the Contains() method to see if a keys is in the FST. You can use the Get() method to see if a key is in the FST and retrieve it's associated value. And, you can use the Iterator method to enumerate key/value pairs within a specified range. */ package vellum import ( "errors" "io" ) // ErrOutOfOrder is returned when values are not inserted in // lexicographic order. var ErrOutOfOrder = errors.New("values not inserted in lexicographic order") // ErrIteratorDone is returned by Iterator/Next/Seek methods when the // Current() value pointed to by the iterator is greater than the last // key in this FST, or outside the configured startKeyInclusive/endKeyExclusive // range of the Iterator. var ErrIteratorDone = errors.New("iterator-done") // BuilderOpts is a structure to let advanced users customize the behavior // of the builder and some aspects of the generated FST. type BuilderOpts struct { Encoder int RegistryTableSize int RegistryMRUSize int } // New returns a new Builder which will stream out the // underlying representation to the provided Writer as the set is built. func New(w io.Writer, opts *BuilderOpts) (*Builder, error) { return newBuilder(w, opts) } // Open loads the FST stored in the provided path func Open(path string) (*FST, error) { return open(path) } // Load will return the FST represented by the provided byte slice. func Load(data []byte) (*FST, error) { return new(data, nil) } // Merge will iterate through the provided Iterators, merge duplicate keys // with the provided MergeFunc, and build a new FST to the provided Writer. func Merge(w io.Writer, opts *BuilderOpts, itrs []Iterator, f MergeFunc) error { builder, err := New(w, opts) if err != nil { return err } itr, err := NewMergeIterator(itrs, f) for err == nil { k, v := itr.Current() err = builder.Insert(k, v) if err != nil { return err } err = itr.Next() } if err != nil && err != ErrIteratorDone { return err } err = itr.Close() if err != nil { return err } err = builder.Close() if err != nil { return err } return nil } ================================================ FILE: vellum_mmap.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build !nommap package vellum import ( "os" mmap "github.com/blevesearch/mmap-go" ) type mmapWrapper struct { f *os.File mm mmap.MMap } func (m *mmapWrapper) Close() (err error) { if m.mm != nil { err = m.mm.Unmap() } // try to close file even if unmap failed if m.f != nil { err2 := m.f.Close() if err == nil { // try to return first error err = err2 } } return } func open(path string) (*FST, error) { f, err := os.Open(path) if err != nil { return nil, err } mm, err := mmap.Map(f, mmap.RDONLY, 0) if err != nil { // mmap failed, try to close the file _ = f.Close() return nil, err } return new(mm, &mmapWrapper{ f: f, mm: mm, }) } ================================================ FILE: vellum_nommap.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build nommap package vellum import "io/ioutil" func open(path string) (*FST, error) { data, err := ioutil.ReadFile(string) if err != nil { return nil, err } return new(data, nil) } ================================================ FILE: vellum_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bytes" "io/ioutil" "os" "reflect" "testing" ) func TestRoundTripSimple(t *testing.T) { f, err := ioutil.TempFile("", "vellum") if err != nil { t.Fatal(err) } defer func() { err = f.Close() if err != nil { t.Fatal(err) } err = os.Remove(f.Name()) if err != nil { t.Fatal(err) } }() b, err := New(f, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Open(f.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst.Close() if err != nil { t.Fatal(err) } }() // first check all the expected values got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(smallSample, got) { t.Errorf("expected %v, got: %v", smallSample, got) } // some additional tests for items that should not exist if ok, _ := fst.Contains([]byte("mo")); ok { t.Errorf("expected to not contain mo, but did") } if ok, _ := fst.Contains([]byte("monr")); ok { t.Errorf("expected to not contain monr, but did") } if ok, _ := fst.Contains([]byte("thur")); ok { t.Errorf("expected to not contain thur, but did") } if ok, _ := fst.Contains([]byte("thurp")); ok { t.Errorf("expected to not contain thurp, but did") } if ok, _ := fst.Contains([]byte("tue")); ok { t.Errorf("expected to not contain tue, but did") } if ok, _ := fst.Contains([]byte("tuesd")); ok { t.Errorf("expected to not contain tuesd, but did") } // a few more misc non-existent values to increase coverage if ok, _ := fst.Contains([]byte("x")); ok { t.Errorf("expected to not contain x, but did") } // now try accessing it through the Automaton interface exists := AutomatonContains(fst, []byte("mon")) if !exists { t.Errorf("expected key 'mon' to exist, doesn't") } exists = AutomatonContains(fst, []byte("mons")) if exists { t.Errorf("expected key 'mo' to not exist, does") } // now try accessing it through the Transducer interface var val uint64 exists, val = TransducerGet(fst, []byte("mon")) if !exists { t.Errorf("expected key 'mon' to exist, doesn't") } if val != 2 { t.Errorf("expected val 2, got %d", val) } // now try accessing it through the Transducer interface // for key that doesn't exist exists, _ = TransducerGet(fst, []byte("mons")) if exists { t.Errorf("expected key 'mo' to not exist, does") } minKey, _ := fst.GetMinKey() if string(minKey) != "mon" { t.Errorf("expected minKey 'mon', got %v", string(minKey)) } maxKey, _ := fst.GetMaxKey() if string(maxKey) != "tye" { t.Errorf("expected maxKey 'tye', got %v", string(maxKey)) } } func TestRoundTripThousand(t *testing.T) { dataset := thousandTestWords randomThousandVals := randomValues(dataset) f, err := ioutil.TempFile("", "vellum") if err != nil { t.Fatal(err) } defer func() { err = f.Close() if err != nil { t.Fatal(err) } err = os.Remove(f.Name()) if err != nil { t.Fatal(err) } }() b, err := New(f, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStrings(b, dataset, randomThousandVals) if err != nil { t.Fatalf("error inserting thousand words: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing builder: %v", err) } fst, err := Open(f.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst.Close() if err != nil { t.Fatal(err) } }() // first check all the expected values got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } for i := 0; i < len(dataset); i++ { foundVal, ok := got[dataset[i]] if !ok { t.Fatalf("expected to find key, but didn't: %s", dataset[i]) } if foundVal != randomThousandVals[i] { t.Fatalf("expected value %d for key %s, but got %d", randomThousandVals[i], dataset[i], foundVal) } // now remove it delete(got, dataset[i]) } if len(got) != 0 { t.Fatalf("expected got map to be empty after checking, still has %v", got) } } func TestRoundTripEmpty(t *testing.T) { f, err := ioutil.TempFile("", "vellum") if err != nil { t.Fatal(err) } defer func() { err = f.Close() if err != nil { t.Fatal(err) } err = os.Remove(f.Name()) if err != nil { t.Fatal(err) } }() b, err := New(f, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Open(f.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst.Close() if err != nil { t.Fatal(err) } }() if fst.Len() != 0 { t.Fatalf("expected length 0, got %d", fst.Len()) } // first check all the expected values got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if len(got) > 0 { t.Errorf("expected not to see anything, got %v", got) } } func TestRoundTripEmptyString(t *testing.T) { f, err := ioutil.TempFile("", "vellum") if err != nil { t.Fatal(err) } defer func() { err = f.Close() if err != nil { t.Fatal(err) } err = os.Remove(f.Name()) if err != nil { t.Fatal(err) } }() b, err := New(f, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = b.Insert([]byte(""), 1) if err != nil { t.Fatalf("error inserting empty string") } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Open(f.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst.Close() if err != nil { t.Fatal(err) } }() if fst.Len() != 1 { t.Fatalf("expected length 1, got %d", fst.Len()) } // first check all the expected values want := map[string]uint64{ "": 1, } got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("expected %v, got: %v", want, got) } } func TestRoundTripEmptyStringAndOthers(t *testing.T) { f, err := ioutil.TempFile("", "vellum") if err != nil { t.Fatal(err) } defer func() { err = f.Close() if err != nil { t.Fatal(err) } err = os.Remove(f.Name()) if err != nil { t.Fatal(err) } }() b, err := New(f, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = b.Insert([]byte(""), 0) if err != nil { t.Fatalf("error inserting empty string") } err = b.Insert([]byte("a"), 0) if err != nil { t.Fatalf("error inserting empty string") } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } fst, err := Open(f.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst.Close() if err != nil { t.Fatal(err) } }() if fst.Len() != 2 { t.Fatalf("expected length 2, got %d", fst.Len()) } // first check all the expected values want := map[string]uint64{ "": 0, "a": 0, } got := map[string]uint64{} itr, err := fst.Iterator(nil, nil) for err == nil { key, val := itr.Current() got[string(key)] = val err = itr.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("expected %v, got: %v", want, got) } } func TestMerge(t *testing.T) { // first create a file with the smallSample data f, err := ioutil.TempFile("", "vellum1") if err != nil { t.Fatal(err) } defer func() { err = f.Close() if err != nil { t.Fatal(err) } err = os.Remove(f.Name()) if err != nil { t.Fatal(err) } }() b, err := New(f, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } smallSample2 := map[string]uint64{ "bold": 25, "last": 1, "next": 500, "tank": 0, } // next create a file with the smallSample2 data f2, err := ioutil.TempFile("", "vellum1") if err != nil { t.Fatal(err) } defer func() { err = f2.Close() if err != nil { t.Fatal(err) } err = os.Remove(f2.Name()) if err != nil { t.Fatal(err) } }() b, err = New(f2, nil) if err != nil { t.Fatalf("error creating builder: %v", err) } err = insertStringMap(b, smallSample2) if err != nil { t.Fatalf("error building: %v", err) } err = b.Close() if err != nil { t.Fatalf("error closing: %v", err) } // now open them both up fst, err := Open(f.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst.Close() if err != nil { t.Fatal(err) } }() fst2, err := Open(f2.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fst2.Close() if err != nil { t.Fatal(err) } }() // create full range iterators on both itr, err := fst.Iterator(nil, nil) if err != nil { t.Fatalf("error opening iterator: %v", err) } itr2, err := fst2.Iterator(nil, nil) if err != nil { t.Fatalf("error opening iterator: %v", err) } f3, err := ioutil.TempFile("", "vellum1") if err != nil { t.Fatal(err) } defer func() { err = f3.Close() if err != nil { t.Fatal(err) } err = os.Remove(f3.Name()) if err != nil { t.Fatal(err) } }() err = Merge(f3, nil, []Iterator{itr, itr2}, MergeSum) if err != nil { t.Fatalf("error merging iterators: %v", err) } // now check it fstc, err := Open(f3.Name()) if err != nil { t.Fatalf("error loading set: %v", err) } defer func() { err = fstc.Close() if err != nil { t.Fatal(err) } }() if fstc.Len() != 8 { t.Fatalf("expected length 8, got %d", fst.Len()) } // now check all the expected values want := map[string]uint64{ "mon": 2, "tues": 3, "thurs": 5, "tye": 99, "bold": 25, "last": 1, "next": 500, "tank": 0, } got := map[string]uint64{} itrc, err := fstc.Iterator(nil, nil) for err == nil { key, val := itrc.Current() got[string(key)] = val err = itrc.Next() } if err != ErrIteratorDone { t.Errorf("iterator error: %v", err) } if !reflect.DeepEqual(want, got) { t.Errorf("expected %v, got: %v", want, got) } } func BenchmarkKey4000K(b *testing.B) { benchmarkBigKey(b, 4000000) } func BenchmarkKey1000K(b *testing.B) { benchmarkBigKey(b, 1000000) } func BenchmarkKey100K(b *testing.B) { benchmarkBigKey(b, 100000) } func BenchmarkKey10K(b *testing.B) { benchmarkBigKey(b, 10000) } func BenchmarkKey1K(b *testing.B) { benchmarkBigKey(b, 1000) } func benchmarkBigKey(b *testing.B, n int) { big := bytes.Repeat([]byte("a"), n) b.ResetTimer() for i := 0; i < b.N; i++ { b, err := New(ioutil.Discard, nil) if err != nil { break } err = b.Insert(big, 0) if err != nil { break } err = b.Close() if err != nil { break } } } func TestMaxWithSubstring(t *testing.T) { var buf bytes.Buffer builder, err := New(&buf, nil) if err != nil { t.Fatal(err) } err = builder.Insert([]byte("1"), 1) if err != nil { t.Fatal(err) } err = builder.Insert([]byte("11"), 1) if err != nil { t.Fatal(err) } err = builder.Insert([]byte("9"), 9) if err != nil { t.Fatal(err) } err = builder.Insert([]byte("99"), 99) if err != nil { t.Fatal(err) } err = builder.Close() if err != nil { t.Fatal(err) } fst, err := Load(buf.Bytes()) if err != nil { t.Fatal(err) } mink, err := fst.GetMinKey() if err != nil { t.Fatal(err) } if string(mink) != "1" { t.Fatalf("expected max key 1, got %s", string(mink)) } maxk, err := fst.GetMaxKey() if err != nil { t.Fatal(err) } if string(maxk) != "99" { t.Fatalf("expected max key 99, got %s", string(maxk)) } } ================================================ FILE: vendor/github.com/edsrzf/mmap-go/LICENSE ================================================ Copyright (c) 2011, Evan Shaw All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: vendor/github.com/edsrzf/mmap-go/mmap.go ================================================ // Copyright 2011 Evan Shaw. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file defines the common package interface and contains a little bit of // factored out logic. // Package mmap allows mapping files into memory. It tries to provide a simple, reasonably portable interface, // but doesn't go out of its way to abstract away every little platform detail. // This specifically means: // * forked processes may or may not inherit mappings // * a file's timestamp may or may not be updated by writes through mappings // * specifying a size larger than the file's actual size can increase the file's size // * If the mapped file is being modified by another process while your program's running, don't expect consistent results between platforms package mmap import ( "errors" "os" "reflect" "unsafe" ) const ( // RDONLY maps the memory read-only. // Attempts to write to the MMap object will result in undefined behavior. RDONLY = 0 // RDWR maps the memory as read-write. Writes to the MMap object will update the // underlying file. RDWR = 1 << iota // COPY maps the memory as copy-on-write. Writes to the MMap object will affect // memory, but the underlying file will remain unchanged. COPY // If EXEC is set, the mapped memory is marked as executable. EXEC ) const ( // If the ANON flag is set, the mapped memory will not be backed by a file. ANON = 1 << iota ) // MMap represents a file mapped into memory. type MMap []byte // Map maps an entire file into memory. // If ANON is set in flags, f is ignored. func Map(f *os.File, prot, flags int) (MMap, error) { return MapRegion(f, -1, prot, flags, 0) } // MapRegion maps part of a file into memory. // The offset parameter must be a multiple of the system's page size. // If length < 0, the entire file will be mapped. // If ANON is set in flags, f is ignored. func MapRegion(f *os.File, length int, prot, flags int, offset int64) (MMap, error) { var fd uintptr if flags&ANON == 0 { fd = uintptr(f.Fd()) if length < 0 { fi, err := f.Stat() if err != nil { return nil, err } length = int(fi.Size()) } } else { if length <= 0 { return nil, errors.New("anonymous mapping requires non-zero length") } fd = ^uintptr(0) } return mmap(length, uintptr(prot), uintptr(flags), fd, offset) } func (m *MMap) header() *reflect.SliceHeader { return (*reflect.SliceHeader)(unsafe.Pointer(m)) } // Lock keeps the mapped region in physical memory, ensuring that it will not be // swapped out. func (m MMap) Lock() error { dh := m.header() return lock(dh.Data, uintptr(dh.Len)) } // Unlock reverses the effect of Lock, allowing the mapped region to potentially // be swapped out. // If m is already unlocked, aan error will result. func (m MMap) Unlock() error { dh := m.header() return unlock(dh.Data, uintptr(dh.Len)) } // Flush synchronizes the mapping's contents to the file's contents on disk. func (m MMap) Flush() error { dh := m.header() return flush(dh.Data, uintptr(dh.Len)) } // Unmap deletes the memory mapped region, flushes any remaining changes, and sets // m to nil. // Trying to read or write any remaining references to m after Unmap is called will // result in undefined behavior. // Unmap should only be called on the slice value that was originally returned from // a call to Map. Calling Unmap on a derived slice may cause errors. func (m *MMap) Unmap() error { dh := m.header() err := unmap(dh.Data, uintptr(dh.Len)) *m = nil return err } ================================================ FILE: vendor/github.com/edsrzf/mmap-go/mmap_unix.go ================================================ // Copyright 2011 Evan Shaw. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build darwin dragonfly freebsd linux openbsd solaris netbsd package mmap import ( "syscall" ) func mmap(len int, inprot, inflags, fd uintptr, off int64) ([]byte, error) { flags := syscall.MAP_SHARED prot := syscall.PROT_READ switch { case inprot© != 0: prot |= syscall.PROT_WRITE flags = syscall.MAP_PRIVATE case inprot&RDWR != 0: prot |= syscall.PROT_WRITE } if inprot&EXEC != 0 { prot |= syscall.PROT_EXEC } if inflags&ANON != 0 { flags |= syscall.MAP_ANON } b, err := syscall.Mmap(int(fd), off, len, prot, flags) if err != nil { return nil, err } return b, nil } func flush(addr, len uintptr) error { _, _, errno := syscall.Syscall(_SYS_MSYNC, addr, len, _MS_SYNC) if errno != 0 { return syscall.Errno(errno) } return nil } func lock(addr, len uintptr) error { _, _, errno := syscall.Syscall(syscall.SYS_MLOCK, addr, len, 0) if errno != 0 { return syscall.Errno(errno) } return nil } func unlock(addr, len uintptr) error { _, _, errno := syscall.Syscall(syscall.SYS_MUNLOCK, addr, len, 0) if errno != 0 { return syscall.Errno(errno) } return nil } func unmap(addr, len uintptr) error { _, _, errno := syscall.Syscall(syscall.SYS_MUNMAP, addr, len, 0) if errno != 0 { return syscall.Errno(errno) } return nil } ================================================ FILE: vendor/github.com/edsrzf/mmap-go/mmap_windows.go ================================================ // Copyright 2011 Evan Shaw. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package mmap import ( "errors" "os" "sync" "syscall" ) // mmap on Windows is a two-step process. // First, we call CreateFileMapping to get a handle. // Then, we call MapviewToFile to get an actual pointer into memory. // Because we want to emulate a POSIX-style mmap, we don't want to expose // the handle -- only the pointer. We also want to return only a byte slice, // not a struct, so it's convenient to manipulate. // We keep this map so that we can get back the original handle from the memory address. var handleLock sync.Mutex var handleMap = map[uintptr]syscall.Handle{} func mmap(len int, prot, flags, hfile uintptr, off int64) ([]byte, error) { flProtect := uint32(syscall.PAGE_READONLY) dwDesiredAccess := uint32(syscall.FILE_MAP_READ) switch { case prot© != 0: flProtect = syscall.PAGE_WRITECOPY dwDesiredAccess = syscall.FILE_MAP_COPY case prot&RDWR != 0: flProtect = syscall.PAGE_READWRITE dwDesiredAccess = syscall.FILE_MAP_WRITE } if prot&EXEC != 0 { flProtect <<= 4 dwDesiredAccess |= syscall.FILE_MAP_EXECUTE } // The maximum size is the area of the file, starting from 0, // that we wish to allow to be mappable. It is the sum of // the length the user requested, plus the offset where that length // is starting from. This does not map the data into memory. maxSizeHigh := uint32((off + int64(len)) >> 32) maxSizeLow := uint32((off + int64(len)) & 0xFFFFFFFF) // TODO: Do we need to set some security attributes? It might help portability. h, errno := syscall.CreateFileMapping(syscall.Handle(hfile), nil, flProtect, maxSizeHigh, maxSizeLow, nil) if h == 0 { return nil, os.NewSyscallError("CreateFileMapping", errno) } // Actually map a view of the data into memory. The view's size // is the length the user requested. fileOffsetHigh := uint32(off >> 32) fileOffsetLow := uint32(off & 0xFFFFFFFF) addr, errno := syscall.MapViewOfFile(h, dwDesiredAccess, fileOffsetHigh, fileOffsetLow, uintptr(len)) if addr == 0 { return nil, os.NewSyscallError("MapViewOfFile", errno) } handleLock.Lock() handleMap[addr] = h handleLock.Unlock() m := MMap{} dh := m.header() dh.Data = addr dh.Len = len dh.Cap = dh.Len return m, nil } func flush(addr, len uintptr) error { errno := syscall.FlushViewOfFile(addr, len) if errno != nil { return os.NewSyscallError("FlushViewOfFile", errno) } handleLock.Lock() defer handleLock.Unlock() handle, ok := handleMap[addr] if !ok { // should be impossible; we would've errored above return errors.New("unknown base address") } errno = syscall.FlushFileBuffers(handle) return os.NewSyscallError("FlushFileBuffers", errno) } func lock(addr, len uintptr) error { errno := syscall.VirtualLock(addr, len) return os.NewSyscallError("VirtualLock", errno) } func unlock(addr, len uintptr) error { errno := syscall.VirtualUnlock(addr, len) return os.NewSyscallError("VirtualUnlock", errno) } func unmap(addr, len uintptr) error { flush(addr, len) // Lock the UnmapViewOfFile along with the handleMap deletion. // As soon as we unmap the view, the OS is free to give the // same addr to another new map. We don't want another goroutine // to insert and remove the same addr into handleMap while // we're trying to remove our old addr/handle pair. handleLock.Lock() defer handleLock.Unlock() err := syscall.UnmapViewOfFile(addr) if err != nil { return err } handle, ok := handleMap[addr] if !ok { // should be impossible; we would've errored above return errors.New("unknown base address") } delete(handleMap, addr) e := syscall.CloseHandle(syscall.Handle(handle)) return os.NewSyscallError("CloseHandle", e) } ================================================ FILE: vendor/github.com/edsrzf/mmap-go/msync_netbsd.go ================================================ // Copyright 2011 Evan Shaw. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package mmap const _SYS_MSYNC = 277 const _MS_SYNC = 0x04 ================================================ FILE: vendor/github.com/edsrzf/mmap-go/msync_unix.go ================================================ // Copyright 2011 Evan Shaw. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build darwin dragonfly freebsd linux openbsd solaris package mmap import ( "syscall" ) const _SYS_MSYNC = syscall.SYS_MSYNC const _MS_SYNC = syscall.MS_SYNC ================================================ FILE: vendor/github.com/inconshreveable/mousetrap/LICENSE ================================================ Copyright 2014 Alan Shreve Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: vendor/github.com/inconshreveable/mousetrap/trap_others.go ================================================ // +build !windows package mousetrap // StartedByExplorer returns true if the program was invoked by the user // double-clicking on the executable from explorer.exe // // It is conservative and returns false if any of the internal calls fail. // It does not guarantee that the program was run from a terminal. It only can tell you // whether it was launched from explorer.exe // // On non-Windows platforms, it always returns false. func StartedByExplorer() bool { return false } ================================================ FILE: vendor/github.com/inconshreveable/mousetrap/trap_windows.go ================================================ // +build windows // +build !go1.4 package mousetrap import ( "fmt" "os" "syscall" "unsafe" ) const ( // defined by the Win32 API th32cs_snapprocess uintptr = 0x2 ) var ( kernel = syscall.MustLoadDLL("kernel32.dll") CreateToolhelp32Snapshot = kernel.MustFindProc("CreateToolhelp32Snapshot") Process32First = kernel.MustFindProc("Process32FirstW") Process32Next = kernel.MustFindProc("Process32NextW") ) // ProcessEntry32 structure defined by the Win32 API type processEntry32 struct { dwSize uint32 cntUsage uint32 th32ProcessID uint32 th32DefaultHeapID int th32ModuleID uint32 cntThreads uint32 th32ParentProcessID uint32 pcPriClassBase int32 dwFlags uint32 szExeFile [syscall.MAX_PATH]uint16 } func getProcessEntry(pid int) (pe *processEntry32, err error) { snapshot, _, e1 := CreateToolhelp32Snapshot.Call(th32cs_snapprocess, uintptr(0)) if snapshot == uintptr(syscall.InvalidHandle) { err = fmt.Errorf("CreateToolhelp32Snapshot: %v", e1) return } defer syscall.CloseHandle(syscall.Handle(snapshot)) var processEntry processEntry32 processEntry.dwSize = uint32(unsafe.Sizeof(processEntry)) ok, _, e1 := Process32First.Call(snapshot, uintptr(unsafe.Pointer(&processEntry))) if ok == 0 { err = fmt.Errorf("Process32First: %v", e1) return } for { if processEntry.th32ProcessID == uint32(pid) { pe = &processEntry return } ok, _, e1 = Process32Next.Call(snapshot, uintptr(unsafe.Pointer(&processEntry))) if ok == 0 { err = fmt.Errorf("Process32Next: %v", e1) return } } } func getppid() (pid int, err error) { pe, err := getProcessEntry(os.Getpid()) if err != nil { return } pid = int(pe.th32ParentProcessID) return } // StartedByExplorer returns true if the program was invoked by the user double-clicking // on the executable from explorer.exe // // It is conservative and returns false if any of the internal calls fail. // It does not guarantee that the program was run from a terminal. It only can tell you // whether it was launched from explorer.exe func StartedByExplorer() bool { ppid, err := getppid() if err != nil { return false } pe, err := getProcessEntry(ppid) if err != nil { return false } name := syscall.UTF16ToString(pe.szExeFile[:]) return name == "explorer.exe" } ================================================ FILE: vendor/github.com/inconshreveable/mousetrap/trap_windows_1.4.go ================================================ // +build windows // +build go1.4 package mousetrap import ( "os" "syscall" "unsafe" ) func getProcessEntry(pid int) (*syscall.ProcessEntry32, error) { snapshot, err := syscall.CreateToolhelp32Snapshot(syscall.TH32CS_SNAPPROCESS, 0) if err != nil { return nil, err } defer syscall.CloseHandle(snapshot) var procEntry syscall.ProcessEntry32 procEntry.Size = uint32(unsafe.Sizeof(procEntry)) if err = syscall.Process32First(snapshot, &procEntry); err != nil { return nil, err } for { if procEntry.ProcessID == uint32(pid) { return &procEntry, nil } err = syscall.Process32Next(snapshot, &procEntry) if err != nil { return nil, err } } } // StartedByExplorer returns true if the program was invoked by the user double-clicking // on the executable from explorer.exe // // It is conservative and returns false if any of the internal calls fail. // It does not guarantee that the program was run from a terminal. It only can tell you // whether it was launched from explorer.exe func StartedByExplorer() bool { pe, err := getProcessEntry(os.Getppid()) if err != nil { return false } return "explorer.exe" == syscall.UTF16ToString(pe.ExeFile[:]) } ================================================ FILE: vendor/github.com/spf13/cobra/LICENSE.txt ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. ================================================ FILE: vendor/github.com/spf13/cobra/bash_completions.go ================================================ package cobra import ( "fmt" "io" "os" "sort" "strings" "github.com/spf13/pflag" ) // Annotations for Bash completion. const ( BashCompFilenameExt = "cobra_annotation_bash_completion_filename_extensions" BashCompCustom = "cobra_annotation_bash_completion_custom" BashCompOneRequiredFlag = "cobra_annotation_bash_completion_one_required_flag" BashCompSubdirsInDir = "cobra_annotation_bash_completion_subdirs_in_dir" ) func preamble(out io.Writer, name string) error { _, err := fmt.Fprintf(out, "# bash completion for %-36s -*- shell-script -*-\n", name) if err != nil { return err } preamStr := ` __debug() { if [[ -n ${BASH_COMP_DEBUG_FILE} ]]; then echo "$*" >> "${BASH_COMP_DEBUG_FILE}" fi } # Homebrew on Macs have version 1.3 of bash-completion which doesn't include # _init_completion. This is a very minimal version of that function. __my_init_completion() { COMPREPLY=() _get_comp_words_by_ref "$@" cur prev words cword } __index_of_word() { local w word=$1 shift index=0 for w in "$@"; do [[ $w = "$word" ]] && return index=$((index+1)) done index=-1 } __contains_word() { local w word=$1; shift for w in "$@"; do [[ $w = "$word" ]] && return done return 1 } __handle_reply() { __debug "${FUNCNAME[0]}" case $cur in -*) if [[ $(type -t compopt) = "builtin" ]]; then compopt -o nospace fi local allflags if [ ${#must_have_one_flag[@]} -ne 0 ]; then allflags=("${must_have_one_flag[@]}") else allflags=("${flags[*]} ${two_word_flags[*]}") fi COMPREPLY=( $(compgen -W "${allflags[*]}" -- "$cur") ) if [[ $(type -t compopt) = "builtin" ]]; then [[ "${COMPREPLY[0]}" == *= ]] || compopt +o nospace fi # complete after --flag=abc if [[ $cur == *=* ]]; then if [[ $(type -t compopt) = "builtin" ]]; then compopt +o nospace fi local index flag flag="${cur%%=*}" __index_of_word "${flag}" "${flags_with_completion[@]}" if [[ ${index} -ge 0 ]]; then COMPREPLY=() PREFIX="" cur="${cur#*=}" ${flags_completion[${index}]} if [ -n "${ZSH_VERSION}" ]; then # zfs completion needs --flag= prefix eval "COMPREPLY=( \"\${COMPREPLY[@]/#/${flag}=}\" )" fi fi fi return 0; ;; esac # check if we are handling a flag with special work handling local index __index_of_word "${prev}" "${flags_with_completion[@]}" if [[ ${index} -ge 0 ]]; then ${flags_completion[${index}]} return fi # we are parsing a flag and don't have a special handler, no completion if [[ ${cur} != "${words[cword]}" ]]; then return fi local completions completions=("${commands[@]}") if [[ ${#must_have_one_noun[@]} -ne 0 ]]; then completions=("${must_have_one_noun[@]}") fi if [[ ${#must_have_one_flag[@]} -ne 0 ]]; then completions+=("${must_have_one_flag[@]}") fi COMPREPLY=( $(compgen -W "${completions[*]}" -- "$cur") ) if [[ ${#COMPREPLY[@]} -eq 0 && ${#noun_aliases[@]} -gt 0 && ${#must_have_one_noun[@]} -ne 0 ]]; then COMPREPLY=( $(compgen -W "${noun_aliases[*]}" -- "$cur") ) fi if [[ ${#COMPREPLY[@]} -eq 0 ]]; then declare -F __custom_func >/dev/null && __custom_func fi __ltrim_colon_completions "$cur" } # The arguments should be in the form "ext1|ext2|extn" __handle_filename_extension_flag() { local ext="$1" _filedir "@(${ext})" } __handle_subdirs_in_dir_flag() { local dir="$1" pushd "${dir}" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1 } __handle_flag() { __debug "${FUNCNAME[0]}: c is $c words[c] is ${words[c]}" # if a command required a flag, and we found it, unset must_have_one_flag() local flagname=${words[c]} local flagvalue # if the word contained an = if [[ ${words[c]} == *"="* ]]; then flagvalue=${flagname#*=} # take in as flagvalue after the = flagname=${flagname%%=*} # strip everything after the = flagname="${flagname}=" # but put the = back fi __debug "${FUNCNAME[0]}: looking for ${flagname}" if __contains_word "${flagname}" "${must_have_one_flag[@]}"; then must_have_one_flag=() fi # if you set a flag which only applies to this command, don't show subcommands if __contains_word "${flagname}" "${local_nonpersistent_flags[@]}"; then commands=() fi # keep flag value with flagname as flaghash if [ -n "${flagvalue}" ] ; then flaghash[${flagname}]=${flagvalue} elif [ -n "${words[ $((c+1)) ]}" ] ; then flaghash[${flagname}]=${words[ $((c+1)) ]} else flaghash[${flagname}]="true" # pad "true" for bool flag fi # skip the argument to a two word flag if __contains_word "${words[c]}" "${two_word_flags[@]}"; then c=$((c+1)) # if we are looking for a flags value, don't show commands if [[ $c -eq $cword ]]; then commands=() fi fi c=$((c+1)) } __handle_noun() { __debug "${FUNCNAME[0]}: c is $c words[c] is ${words[c]}" if __contains_word "${words[c]}" "${must_have_one_noun[@]}"; then must_have_one_noun=() elif __contains_word "${words[c]}" "${noun_aliases[@]}"; then must_have_one_noun=() fi nouns+=("${words[c]}") c=$((c+1)) } __handle_command() { __debug "${FUNCNAME[0]}: c is $c words[c] is ${words[c]}" local next_command if [[ -n ${last_command} ]]; then next_command="_${last_command}_${words[c]//:/__}" else if [[ $c -eq 0 ]]; then next_command="_$(basename "${words[c]//:/__}")" else next_command="_${words[c]//:/__}" fi fi c=$((c+1)) __debug "${FUNCNAME[0]}: looking for ${next_command}" declare -F $next_command >/dev/null && $next_command } __handle_word() { if [[ $c -ge $cword ]]; then __handle_reply return fi __debug "${FUNCNAME[0]}: c is $c words[c] is ${words[c]}" if [[ "${words[c]}" == -* ]]; then __handle_flag elif __contains_word "${words[c]}" "${commands[@]}"; then __handle_command elif [[ $c -eq 0 ]] && __contains_word "$(basename "${words[c]}")" "${commands[@]}"; then __handle_command else __handle_noun fi __handle_word } ` _, err = fmt.Fprint(out, preamStr) return err } func postscript(w io.Writer, name string) error { name = strings.Replace(name, ":", "__", -1) _, err := fmt.Fprintf(w, "__start_%s()\n", name) if err != nil { return err } _, err = fmt.Fprintf(w, `{ local cur prev words cword declare -A flaghash 2>/dev/null || : if declare -F _init_completion >/dev/null 2>&1; then _init_completion -s || return else __my_init_completion -n "=" || return fi local c=0 local flags=() local two_word_flags=() local local_nonpersistent_flags=() local flags_with_completion=() local flags_completion=() local commands=("%s") local must_have_one_flag=() local must_have_one_noun=() local last_command local nouns=() __handle_word } `, name) if err != nil { return err } _, err = fmt.Fprintf(w, `if [[ $(type -t compopt) = "builtin" ]]; then complete -o default -F __start_%s %s else complete -o default -o nospace -F __start_%s %s fi `, name, name, name, name) if err != nil { return err } _, err = fmt.Fprintf(w, "# ex: ts=4 sw=4 et filetype=sh\n") return err } func writeCommands(cmd *Command, w io.Writer) error { if _, err := fmt.Fprintf(w, " commands=()\n"); err != nil { return err } for _, c := range cmd.Commands() { if !c.IsAvailableCommand() || c == cmd.helpCommand { continue } if _, err := fmt.Fprintf(w, " commands+=(%q)\n", c.Name()); err != nil { return err } } _, err := fmt.Fprintf(w, "\n") return err } func writeFlagHandler(name string, annotations map[string][]string, w io.Writer) error { for key, value := range annotations { switch key { case BashCompFilenameExt: _, err := fmt.Fprintf(w, " flags_with_completion+=(%q)\n", name) if err != nil { return err } if len(value) > 0 { ext := "__handle_filename_extension_flag " + strings.Join(value, "|") _, err = fmt.Fprintf(w, " flags_completion+=(%q)\n", ext) } else { ext := "_filedir" _, err = fmt.Fprintf(w, " flags_completion+=(%q)\n", ext) } if err != nil { return err } case BashCompCustom: _, err := fmt.Fprintf(w, " flags_with_completion+=(%q)\n", name) if err != nil { return err } if len(value) > 0 { handlers := strings.Join(value, "; ") _, err = fmt.Fprintf(w, " flags_completion+=(%q)\n", handlers) } else { _, err = fmt.Fprintf(w, " flags_completion+=(:)\n") } if err != nil { return err } case BashCompSubdirsInDir: _, err := fmt.Fprintf(w, " flags_with_completion+=(%q)\n", name) if len(value) == 1 { ext := "__handle_subdirs_in_dir_flag " + value[0] _, err = fmt.Fprintf(w, " flags_completion+=(%q)\n", ext) } else { ext := "_filedir -d" _, err = fmt.Fprintf(w, " flags_completion+=(%q)\n", ext) } if err != nil { return err } } } return nil } func writeShortFlag(flag *pflag.Flag, w io.Writer) error { b := (len(flag.NoOptDefVal) > 0) name := flag.Shorthand format := " " if !b { format += "two_word_" } format += "flags+=(\"-%s\")\n" if _, err := fmt.Fprintf(w, format, name); err != nil { return err } return writeFlagHandler("-"+name, flag.Annotations, w) } func writeFlag(flag *pflag.Flag, w io.Writer) error { b := (len(flag.NoOptDefVal) > 0) name := flag.Name format := " flags+=(\"--%s" if !b { format += "=" } format += "\")\n" if _, err := fmt.Fprintf(w, format, name); err != nil { return err } return writeFlagHandler("--"+name, flag.Annotations, w) } func writeLocalNonPersistentFlag(flag *pflag.Flag, w io.Writer) error { b := (len(flag.NoOptDefVal) > 0) name := flag.Name format := " local_nonpersistent_flags+=(\"--%s" if !b { format += "=" } format += "\")\n" _, err := fmt.Fprintf(w, format, name) return err } func writeFlags(cmd *Command, w io.Writer) error { _, err := fmt.Fprintf(w, ` flags=() two_word_flags=() local_nonpersistent_flags=() flags_with_completion=() flags_completion=() `) if err != nil { return err } localNonPersistentFlags := cmd.LocalNonPersistentFlags() var visitErr error cmd.NonInheritedFlags().VisitAll(func(flag *pflag.Flag) { if nonCompletableFlag(flag) { return } if err := writeFlag(flag, w); err != nil { visitErr = err return } if len(flag.Shorthand) > 0 { if err := writeShortFlag(flag, w); err != nil { visitErr = err return } } if localNonPersistentFlags.Lookup(flag.Name) != nil { if err := writeLocalNonPersistentFlag(flag, w); err != nil { visitErr = err return } } }) if visitErr != nil { return visitErr } cmd.InheritedFlags().VisitAll(func(flag *pflag.Flag) { if nonCompletableFlag(flag) { return } if err := writeFlag(flag, w); err != nil { visitErr = err return } if len(flag.Shorthand) > 0 { if err := writeShortFlag(flag, w); err != nil { visitErr = err return } } }) if visitErr != nil { return visitErr } _, err = fmt.Fprintf(w, "\n") return err } func writeRequiredFlag(cmd *Command, w io.Writer) error { if _, err := fmt.Fprintf(w, " must_have_one_flag=()\n"); err != nil { return err } flags := cmd.NonInheritedFlags() var visitErr error flags.VisitAll(func(flag *pflag.Flag) { if nonCompletableFlag(flag) { return } for key := range flag.Annotations { switch key { case BashCompOneRequiredFlag: format := " must_have_one_flag+=(\"--%s" b := (flag.Value.Type() == "bool") if !b { format += "=" } format += "\")\n" if _, err := fmt.Fprintf(w, format, flag.Name); err != nil { visitErr = err return } if len(flag.Shorthand) > 0 { if _, err := fmt.Fprintf(w, " must_have_one_flag+=(\"-%s\")\n", flag.Shorthand); err != nil { visitErr = err return } } } } }) return visitErr } func writeRequiredNouns(cmd *Command, w io.Writer) error { if _, err := fmt.Fprintf(w, " must_have_one_noun=()\n"); err != nil { return err } sort.Sort(sort.StringSlice(cmd.ValidArgs)) for _, value := range cmd.ValidArgs { if _, err := fmt.Fprintf(w, " must_have_one_noun+=(%q)\n", value); err != nil { return err } } return nil } func writeArgAliases(cmd *Command, w io.Writer) error { if _, err := fmt.Fprintf(w, " noun_aliases=()\n"); err != nil { return err } sort.Sort(sort.StringSlice(cmd.ArgAliases)) for _, value := range cmd.ArgAliases { if _, err := fmt.Fprintf(w, " noun_aliases+=(%q)\n", value); err != nil { return err } } return nil } func gen(cmd *Command, w io.Writer) error { for _, c := range cmd.Commands() { if !c.IsAvailableCommand() || c == cmd.helpCommand { continue } if err := gen(c, w); err != nil { return err } } commandName := cmd.CommandPath() commandName = strings.Replace(commandName, " ", "_", -1) commandName = strings.Replace(commandName, ":", "__", -1) if _, err := fmt.Fprintf(w, "_%s()\n{\n", commandName); err != nil { return err } if _, err := fmt.Fprintf(w, " last_command=%q\n", commandName); err != nil { return err } if err := writeCommands(cmd, w); err != nil { return err } if err := writeFlags(cmd, w); err != nil { return err } if err := writeRequiredFlag(cmd, w); err != nil { return err } if err := writeRequiredNouns(cmd, w); err != nil { return err } if err := writeArgAliases(cmd, w); err != nil { return err } if _, err := fmt.Fprintf(w, "}\n\n"); err != nil { return err } return nil } // GenBashCompletion generates bash completion file and writes to the passed writer. func (cmd *Command) GenBashCompletion(w io.Writer) error { if err := preamble(w, cmd.Name()); err != nil { return err } if len(cmd.BashCompletionFunction) > 0 { if _, err := fmt.Fprintf(w, "%s\n", cmd.BashCompletionFunction); err != nil { return err } } if err := gen(cmd, w); err != nil { return err } return postscript(w, cmd.Name()) } func nonCompletableFlag(flag *pflag.Flag) bool { return flag.Hidden || len(flag.Deprecated) > 0 } // GenBashCompletionFile generates bash completion file. func (cmd *Command) GenBashCompletionFile(filename string) error { outFile, err := os.Create(filename) if err != nil { return err } defer outFile.Close() return cmd.GenBashCompletion(outFile) } // MarkFlagRequired adds the BashCompOneRequiredFlag annotation to the named flag, if it exists. func (cmd *Command) MarkFlagRequired(name string) error { return MarkFlagRequired(cmd.Flags(), name) } // MarkPersistentFlagRequired adds the BashCompOneRequiredFlag annotation to the named persistent flag, if it exists. func (cmd *Command) MarkPersistentFlagRequired(name string) error { return MarkFlagRequired(cmd.PersistentFlags(), name) } // MarkFlagRequired adds the BashCompOneRequiredFlag annotation to the named flag in the flag set, if it exists. func MarkFlagRequired(flags *pflag.FlagSet, name string) error { return flags.SetAnnotation(name, BashCompOneRequiredFlag, []string{"true"}) } // MarkFlagFilename adds the BashCompFilenameExt annotation to the named flag, if it exists. // Generated bash autocompletion will select filenames for the flag, limiting to named extensions if provided. func (cmd *Command) MarkFlagFilename(name string, extensions ...string) error { return MarkFlagFilename(cmd.Flags(), name, extensions...) } // MarkFlagCustom adds the BashCompCustom annotation to the named flag, if it exists. // Generated bash autocompletion will call the bash function f for the flag. func (cmd *Command) MarkFlagCustom(name string, f string) error { return MarkFlagCustom(cmd.Flags(), name, f) } // MarkPersistentFlagFilename adds the BashCompFilenameExt annotation to the named persistent flag, if it exists. // Generated bash autocompletion will select filenames for the flag, limiting to named extensions if provided. func (cmd *Command) MarkPersistentFlagFilename(name string, extensions ...string) error { return MarkFlagFilename(cmd.PersistentFlags(), name, extensions...) } // MarkFlagFilename adds the BashCompFilenameExt annotation to the named flag in the flag set, if it exists. // Generated bash autocompletion will select filenames for the flag, limiting to named extensions if provided. func MarkFlagFilename(flags *pflag.FlagSet, name string, extensions ...string) error { return flags.SetAnnotation(name, BashCompFilenameExt, extensions) } // MarkFlagCustom adds the BashCompCustom annotation to the named flag in the flag set, if it exists. // Generated bash autocompletion will call the bash function f for the flag. func MarkFlagCustom(flags *pflag.FlagSet, name string, f string) error { return flags.SetAnnotation(name, BashCompCustom, []string{f}) } ================================================ FILE: vendor/github.com/spf13/cobra/cobra.go ================================================ // Copyright © 2013 Steve Francia . // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Commands similar to git, go tools and other modern CLI tools // inspired by go, go-Commander, gh and subcommand package cobra import ( "fmt" "io" "reflect" "strconv" "strings" "text/template" "unicode" ) var templateFuncs = template.FuncMap{ "trim": strings.TrimSpace, "trimRightSpace": trimRightSpace, "appendIfNotPresent": appendIfNotPresent, "rpad": rpad, "gt": Gt, "eq": Eq, } var initializers []func() // EnablePrefixMatching allows to set automatic prefix matching. Automatic prefix matching can be a dangerous thing // to automatically enable in CLI tools. // Set this to true to enable it. var EnablePrefixMatching = false // EnableCommandSorting controls sorting of the slice of commands, which is turned on by default. // To disable sorting, set it to false. var EnableCommandSorting = true // AddTemplateFunc adds a template function that's available to Usage and Help // template generation. func AddTemplateFunc(name string, tmplFunc interface{}) { templateFuncs[name] = tmplFunc } // AddTemplateFuncs adds multiple template functions availalble to Usage and // Help template generation. func AddTemplateFuncs(tmplFuncs template.FuncMap) { for k, v := range tmplFuncs { templateFuncs[k] = v } } // OnInitialize takes a series of func() arguments and appends them to a slice of func(). func OnInitialize(y ...func()) { initializers = append(initializers, y...) } // Gt takes two types and checks whether the first type is greater than the second. In case of types Arrays, Chans, // Maps and Slices, Gt will compare their lengths. Ints are compared directly while strings are first parsed as // ints and then compared. func Gt(a interface{}, b interface{}) bool { var left, right int64 av := reflect.ValueOf(a) switch av.Kind() { case reflect.Array, reflect.Chan, reflect.Map, reflect.Slice: left = int64(av.Len()) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: left = av.Int() case reflect.String: left, _ = strconv.ParseInt(av.String(), 10, 64) } bv := reflect.ValueOf(b) switch bv.Kind() { case reflect.Array, reflect.Chan, reflect.Map, reflect.Slice: right = int64(bv.Len()) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: right = bv.Int() case reflect.String: right, _ = strconv.ParseInt(bv.String(), 10, 64) } return left > right } // Eq takes two types and checks whether they are equal. Supported types are int and string. Unsupported types will panic. func Eq(a interface{}, b interface{}) bool { av := reflect.ValueOf(a) bv := reflect.ValueOf(b) switch av.Kind() { case reflect.Array, reflect.Chan, reflect.Map, reflect.Slice: panic("Eq called on unsupported type") case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: return av.Int() == bv.Int() case reflect.String: return av.String() == bv.String() } return false } func trimRightSpace(s string) string { return strings.TrimRightFunc(s, unicode.IsSpace) } // appendIfNotPresent will append stringToAppend to the end of s, but only if it's not yet present in s. func appendIfNotPresent(s, stringToAppend string) string { if strings.Contains(s, stringToAppend) { return s } return s + " " + stringToAppend } // rpad adds padding to the right of a string. func rpad(s string, padding int) string { template := fmt.Sprintf("%%-%ds", padding) return fmt.Sprintf(template, s) } // tmpl executes the given template text on data, writing the result to w. func tmpl(w io.Writer, text string, data interface{}) error { t := template.New("top") t.Funcs(templateFuncs) template.Must(t.Parse(text)) return t.Execute(w, data) } // ld compares two strings and returns the levenshtein distance between them. func ld(s, t string, ignoreCase bool) int { if ignoreCase { s = strings.ToLower(s) t = strings.ToLower(t) } d := make([][]int, len(s)+1) for i := range d { d[i] = make([]int, len(t)+1) } for i := range d { d[i][0] = i } for j := range d[0] { d[0][j] = j } for j := 1; j <= len(t); j++ { for i := 1; i <= len(s); i++ { if s[i-1] == t[j-1] { d[i][j] = d[i-1][j-1] } else { min := d[i-1][j] if d[i][j-1] < min { min = d[i][j-1] } if d[i-1][j-1] < min { min = d[i-1][j-1] } d[i][j] = min + 1 } } } return d[len(s)][len(t)] } ================================================ FILE: vendor/github.com/spf13/cobra/command.go ================================================ // Copyright © 2013 Steve Francia . // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //Package cobra is a commander providing a simple interface to create powerful modern CLI interfaces. //In addition to providing an interface, Cobra simultaneously provides a controller to organize your application code. package cobra import ( "bytes" "fmt" "io" "os" "path/filepath" "sort" "strings" flag "github.com/spf13/pflag" ) // Command is just that, a command for your application. // eg. 'go run' ... 'run' is the command. Cobra requires // you to define the usage and description as part of your command // definition to ensure usability. type Command struct { // Name is the command name, usually the executable's name. name string // The one-line usage message. Use string // An array of aliases that can be used instead of the first word in Use. Aliases []string // An array of command names for which this command will be suggested - similar to aliases but only suggests. SuggestFor []string // The short description shown in the 'help' output. Short string // The long message shown in the 'help ' output. Long string // Examples of how to use the command Example string // List of all valid non-flag arguments that are accepted in bash completions ValidArgs []string // List of aliases for ValidArgs. These are not suggested to the user in the bash // completion, but accepted if entered manually. ArgAliases []string // Custom functions used by the bash autocompletion generator BashCompletionFunction string // Is this command deprecated and should print this string when used? Deprecated string // Is this command hidden and should NOT show up in the list of available commands? Hidden bool // Annotations are key/value pairs that can be used by applications to identify or // group commands Annotations map[string]string // Full set of flags flags *flag.FlagSet // Set of flags childrens of this command will inherit pflags *flag.FlagSet // Flags that are declared specifically by this command (not inherited). lflags *flag.FlagSet // SilenceErrors is an option to quiet errors down stream SilenceErrors bool // Silence Usage is an option to silence usage when an error occurs. SilenceUsage bool // The *Run functions are executed in the following order: // * PersistentPreRun() // * PreRun() // * Run() // * PostRun() // * PersistentPostRun() // All functions get the same args, the arguments after the command name // PersistentPreRun: children of this command will inherit and execute PersistentPreRun func(cmd *Command, args []string) // PersistentPreRunE: PersistentPreRun but returns an error PersistentPreRunE func(cmd *Command, args []string) error // PreRun: children of this command will not inherit. PreRun func(cmd *Command, args []string) // PreRunE: PreRun but returns an error PreRunE func(cmd *Command, args []string) error // Run: Typically the actual work function. Most commands will only implement this Run func(cmd *Command, args []string) // RunE: Run but returns an error RunE func(cmd *Command, args []string) error // PostRun: run after the Run command. PostRun func(cmd *Command, args []string) // PostRunE: PostRun but returns an error PostRunE func(cmd *Command, args []string) error // PersistentPostRun: children of this command will inherit and execute after PostRun PersistentPostRun func(cmd *Command, args []string) // PersistentPostRunE: PersistentPostRun but returns an error PersistentPostRunE func(cmd *Command, args []string) error // DisableAutoGenTag remove DisableAutoGenTag bool // Commands is the list of commands supported by this program. commands []*Command // Parent Command for this command parent *Command // max lengths of commands' string lengths for use in padding commandsMaxUseLen int commandsMaxCommandPathLen int commandsMaxNameLen int // is commands slice are sorted or not commandsAreSorted bool flagErrorBuf *bytes.Buffer args []string // actual args parsed from flags output *io.Writer // out writer if set in SetOutput(w) usageFunc func(*Command) error // Usage can be defined by application usageTemplate string // Can be defined by Application flagErrorFunc func(*Command, error) error helpTemplate string // Can be defined by Application helpFunc func(*Command, []string) // Help can be defined by application helpCommand *Command // The help command // The global normalization function that we can use on every pFlag set and children commands globNormFunc func(f *flag.FlagSet, name string) flag.NormalizedName // Disable the suggestions based on Levenshtein distance that go along with 'unknown command' messages DisableSuggestions bool // If displaying suggestions, allows to set the minimum levenshtein distance to display, must be > 0 SuggestionsMinimumDistance int // Disable the flag parsing. If this is true all flags will be passed to the command as arguments. DisableFlagParsing bool } // SetArgs sets arguments for the command. It is set to os.Args[1:] by default, if desired, can be overridden // particularly useful when testing. func (c *Command) SetArgs(a []string) { c.args = a } // SetOutput sets the destination for usage and error messages. // If output is nil, os.Stderr is used. func (c *Command) SetOutput(output io.Writer) { c.output = &output } // SetUsageFunc sets usage function. Usage can be defined by application. func (c *Command) SetUsageFunc(f func(*Command) error) { c.usageFunc = f } // SetUsageTemplate sets usage template. Can be defined by Application. func (c *Command) SetUsageTemplate(s string) { c.usageTemplate = s } // SetFlagErrorFunc sets a function to generate an error when flag parsing // fails. func (c *Command) SetFlagErrorFunc(f func(*Command, error) error) { c.flagErrorFunc = f } // SetHelpFunc sets help function. Can be defined by Application. func (c *Command) SetHelpFunc(f func(*Command, []string)) { c.helpFunc = f } // SetHelpCommand sets help command. func (c *Command) SetHelpCommand(cmd *Command) { c.helpCommand = cmd } // SetHelpTemplate sets help template to be used. Application can use it to set custom template. func (c *Command) SetHelpTemplate(s string) { c.helpTemplate = s } // SetGlobalNormalizationFunc sets a normalization function to all flag sets and also to child commands. // The user should not have a cyclic dependency on commands. func (c *Command) SetGlobalNormalizationFunc(n func(f *flag.FlagSet, name string) flag.NormalizedName) { c.Flags().SetNormalizeFunc(n) c.PersistentFlags().SetNormalizeFunc(n) c.globNormFunc = n for _, command := range c.commands { command.SetGlobalNormalizationFunc(n) } } // OutOrStdout returns output to stdout. func (c *Command) OutOrStdout() io.Writer { return c.getOut(os.Stdout) } // OutOrStderr returns output to stderr func (c *Command) OutOrStderr() io.Writer { return c.getOut(os.Stderr) } func (c *Command) getOut(def io.Writer) io.Writer { if c.output != nil { return *c.output } if c.HasParent() { return c.parent.getOut(def) } return def } // UsageFunc returns either the function set by SetUsageFunc for this command // or a parent, or it returns a default usage function. func (c *Command) UsageFunc() (f func(*Command) error) { if c.usageFunc != nil { return c.usageFunc } if c.HasParent() { return c.parent.UsageFunc() } return func(c *Command) error { c.mergePersistentFlags() err := tmpl(c.OutOrStderr(), c.UsageTemplate(), c) if err != nil { c.Println(err) } return err } } // Usage puts out the usage for the command. // Used when a user provides invalid input. // Can be defined by user by overriding UsageFunc. func (c *Command) Usage() error { return c.UsageFunc()(c) } // HelpFunc returns either the function set by SetHelpFunc for this command // or a parent, or it returns a function with default help behavior. func (c *Command) HelpFunc() func(*Command, []string) { if helpFunc := c.checkHelpFunc(); helpFunc != nil { return helpFunc } return func(*Command, []string) { c.mergePersistentFlags() err := tmpl(c.OutOrStdout(), c.HelpTemplate(), c) if err != nil { c.Println(err) } } } // checkHelpFunc checks if there is helpFunc in ancestors of c. func (c *Command) checkHelpFunc() func(*Command, []string) { if c == nil { return nil } if c.helpFunc != nil { return c.helpFunc } if c.HasParent() { return c.parent.checkHelpFunc() } return nil } // Help puts out the help for the command. // Used when a user calls help [command]. // Can be defined by user by overriding HelpFunc. func (c *Command) Help() error { c.HelpFunc()(c, []string{}) return nil } // UsageString return usage string. func (c *Command) UsageString() string { tmpOutput := c.output bb := new(bytes.Buffer) c.SetOutput(bb) c.Usage() c.output = tmpOutput return bb.String() } // FlagErrorFunc returns either the function set by SetFlagErrorFunc for this // command or a parent, or it returns a function which returns the original // error. func (c *Command) FlagErrorFunc() (f func(*Command, error) error) { if c.flagErrorFunc != nil { return c.flagErrorFunc } if c.HasParent() { return c.parent.FlagErrorFunc() } return func(c *Command, err error) error { return err } } var minUsagePadding = 25 // UsagePadding return padding for the usage. func (c *Command) UsagePadding() int { if c.parent == nil || minUsagePadding > c.parent.commandsMaxUseLen { return minUsagePadding } return c.parent.commandsMaxUseLen } var minCommandPathPadding = 11 // CommandPathPadding return padding for the command path. func (c *Command) CommandPathPadding() int { if c.parent == nil || minCommandPathPadding > c.parent.commandsMaxCommandPathLen { return minCommandPathPadding } return c.parent.commandsMaxCommandPathLen } var minNamePadding = 11 // NamePadding returns padding for the name. func (c *Command) NamePadding() int { if c.parent == nil || minNamePadding > c.parent.commandsMaxNameLen { return minNamePadding } return c.parent.commandsMaxNameLen } // UsageTemplate returns usage template for the command. func (c *Command) UsageTemplate() string { if c.usageTemplate != "" { return c.usageTemplate } if c.HasParent() { return c.parent.UsageTemplate() } return `Usage:{{if .Runnable}} {{if .HasAvailableFlags}}{{appendIfNotPresent .UseLine "[flags]"}}{{else}}{{.UseLine}}{{end}}{{end}}{{if .HasAvailableSubCommands}} {{ .CommandPath}} [command]{{end}}{{if gt .Aliases 0}} Aliases: {{.NameAndAliases}} {{end}}{{if .HasExample}} Examples: {{ .Example }}{{end}}{{if .HasAvailableSubCommands}} Available Commands:{{range .Commands}}{{if (or .IsAvailableCommand (eq .Name "help"))}} {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableLocalFlags}} Flags: {{.LocalFlags.FlagUsages | trimRightSpace}}{{end}}{{if .HasAvailableInheritedFlags}} Global Flags: {{.InheritedFlags.FlagUsages | trimRightSpace}}{{end}}{{if .HasHelpSubCommands}} Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}} {{rpad .CommandPath .CommandPathPadding}} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableSubCommands}} Use "{{.CommandPath}} [command] --help" for more information about a command.{{end}} ` } // HelpTemplate return help template for the command. func (c *Command) HelpTemplate() string { if c.helpTemplate != "" { return c.helpTemplate } if c.HasParent() { return c.parent.HelpTemplate() } return `{{with or .Long .Short }}{{. | trim}} {{end}}{{if or .Runnable .HasSubCommands}}{{.UsageString}}{{end}}` } // Really only used when casting a command to a commander. func (c *Command) resetChildrensParents() { for _, x := range c.commands { x.parent = c } } func hasNoOptDefVal(name string, f *flag.FlagSet) bool { flag := f.Lookup(name) if flag == nil { return false } return len(flag.NoOptDefVal) > 0 } func shortHasNoOptDefVal(name string, fs *flag.FlagSet) bool { result := false fs.VisitAll(func(flag *flag.Flag) { if flag.Shorthand == name && len(flag.NoOptDefVal) > 0 { result = true } }) return result } func stripFlags(args []string, c *Command) []string { if len(args) < 1 { return args } c.mergePersistentFlags() commands := []string{} inQuote := false inFlag := false for _, y := range args { if !inQuote { switch { case strings.HasPrefix(y, "\""): inQuote = true case strings.Contains(y, "=\""): inQuote = true case strings.HasPrefix(y, "--") && !strings.Contains(y, "="): // TODO: this isn't quite right, we should really check ahead for 'true' or 'false' inFlag = !hasNoOptDefVal(y[2:], c.Flags()) case strings.HasPrefix(y, "-") && !strings.Contains(y, "=") && len(y) == 2 && !shortHasNoOptDefVal(y[1:], c.Flags()): inFlag = true case inFlag: inFlag = false case y == "": // strip empty commands, as the go tests expect this to be ok.... case !strings.HasPrefix(y, "-"): commands = append(commands, y) inFlag = false } } if strings.HasSuffix(y, "\"") && !strings.HasSuffix(y, "\\\"") { inQuote = false } } return commands } // argsMinusFirstX removes only the first x from args. Otherwise, commands that look like // openshift admin policy add-role-to-user admin my-user, lose the admin argument (arg[4]). func argsMinusFirstX(args []string, x string) []string { for i, y := range args { if x == y { ret := []string{} ret = append(ret, args[:i]...) ret = append(ret, args[i+1:]...) return ret } } return args } // Find the target command given the args and command tree // Meant to be run on the highest node. Only searches down. func (c *Command) Find(args []string) (*Command, []string, error) { if c == nil { return nil, nil, fmt.Errorf("Called find() on a nil Command") } var innerfind func(*Command, []string) (*Command, []string) innerfind = func(c *Command, innerArgs []string) (*Command, []string) { argsWOflags := stripFlags(innerArgs, c) if len(argsWOflags) == 0 { return c, innerArgs } nextSubCmd := argsWOflags[0] matches := make([]*Command, 0) for _, cmd := range c.commands { if cmd.Name() == nextSubCmd || cmd.HasAlias(nextSubCmd) { // exact name or alias match return innerfind(cmd, argsMinusFirstX(innerArgs, nextSubCmd)) } if EnablePrefixMatching { if strings.HasPrefix(cmd.Name(), nextSubCmd) { // prefix match matches = append(matches, cmd) } for _, x := range cmd.Aliases { if strings.HasPrefix(x, nextSubCmd) { matches = append(matches, cmd) } } } } // only accept a single prefix match - multiple matches would be ambiguous if len(matches) == 1 { return innerfind(matches[0], argsMinusFirstX(innerArgs, argsWOflags[0])) } return c, innerArgs } commandFound, a := innerfind(c, args) argsWOflags := stripFlags(a, commandFound) // no subcommand, always take args if !commandFound.HasSubCommands() { return commandFound, a, nil } // root command with subcommands, do subcommand checking if commandFound == c && len(argsWOflags) > 0 { suggestionsString := "" if !c.DisableSuggestions { if c.SuggestionsMinimumDistance <= 0 { c.SuggestionsMinimumDistance = 2 } if suggestions := c.SuggestionsFor(argsWOflags[0]); len(suggestions) > 0 { suggestionsString += "\n\nDid you mean this?\n" for _, s := range suggestions { suggestionsString += fmt.Sprintf("\t%v\n", s) } } } return commandFound, a, fmt.Errorf("unknown command %q for %q%s", argsWOflags[0], commandFound.CommandPath(), suggestionsString) } return commandFound, a, nil } // SuggestionsFor provides suggestions for the typedName. func (c *Command) SuggestionsFor(typedName string) []string { suggestions := []string{} for _, cmd := range c.commands { if cmd.IsAvailableCommand() { levenshteinDistance := ld(typedName, cmd.Name(), true) suggestByLevenshtein := levenshteinDistance <= c.SuggestionsMinimumDistance suggestByPrefix := strings.HasPrefix(strings.ToLower(cmd.Name()), strings.ToLower(typedName)) if suggestByLevenshtein || suggestByPrefix { suggestions = append(suggestions, cmd.Name()) } for _, explicitSuggestion := range cmd.SuggestFor { if strings.EqualFold(typedName, explicitSuggestion) { suggestions = append(suggestions, cmd.Name()) } } } } return suggestions } // VisitParents visits all parents of the command and invokes fn on each parent. func (c *Command) VisitParents(fn func(*Command)) { var traverse func(*Command) *Command traverse = func(x *Command) *Command { if x != c { fn(x) } if x.HasParent() { return traverse(x.parent) } return x } traverse(c) } // Root finds root command. func (c *Command) Root() *Command { var findRoot func(*Command) *Command findRoot = func(x *Command) *Command { if x.HasParent() { return findRoot(x.parent) } return x } return findRoot(c) } // ArgsLenAtDash will return the length of f.Args at the moment when a -- was // found during arg parsing. This allows your program to know which args were // before the -- and which came after. (Description from // https://godoc.org/github.com/spf13/pflag#FlagSet.ArgsLenAtDash). func (c *Command) ArgsLenAtDash() int { return c.Flags().ArgsLenAtDash() } func (c *Command) execute(a []string) (err error) { if c == nil { return fmt.Errorf("Called Execute() on a nil Command") } if len(c.Deprecated) > 0 { c.Printf("Command %q is deprecated, %s\n", c.Name(), c.Deprecated) } // initialize help flag as the last point possible to allow for user // overriding c.initHelpFlag() err = c.ParseFlags(a) if err != nil { return c.FlagErrorFunc()(c, err) } // If help is called, regardless of other flags, return we want help // Also say we need help if the command isn't runnable. helpVal, err := c.Flags().GetBool("help") if err != nil { // should be impossible to get here as we always declare a help // flag in initHelpFlag() c.Println("\"help\" flag declared as non-bool. Please correct your code") return err } if helpVal || !c.Runnable() { return flag.ErrHelp } c.preRun() argWoFlags := c.Flags().Args() if c.DisableFlagParsing { argWoFlags = a } for p := c; p != nil; p = p.Parent() { if p.PersistentPreRunE != nil { if err := p.PersistentPreRunE(c, argWoFlags); err != nil { return err } break } else if p.PersistentPreRun != nil { p.PersistentPreRun(c, argWoFlags) break } } if c.PreRunE != nil { if err := c.PreRunE(c, argWoFlags); err != nil { return err } } else if c.PreRun != nil { c.PreRun(c, argWoFlags) } if c.RunE != nil { if err := c.RunE(c, argWoFlags); err != nil { return err } } else { c.Run(c, argWoFlags) } if c.PostRunE != nil { if err := c.PostRunE(c, argWoFlags); err != nil { return err } } else if c.PostRun != nil { c.PostRun(c, argWoFlags) } for p := c; p != nil; p = p.Parent() { if p.PersistentPostRunE != nil { if err := p.PersistentPostRunE(c, argWoFlags); err != nil { return err } break } else if p.PersistentPostRun != nil { p.PersistentPostRun(c, argWoFlags) break } } return nil } func (c *Command) preRun() { for _, x := range initializers { x() } } func (c *Command) errorMsgFromParse() string { s := c.flagErrorBuf.String() x := strings.Split(s, "\n") if len(x) > 0 { return x[0] } return "" } // Execute Call execute to use the args (os.Args[1:] by default) // and run through the command tree finding appropriate matches // for commands and then corresponding flags. func (c *Command) Execute() error { _, err := c.ExecuteC() return err } // ExecuteC executes the command. func (c *Command) ExecuteC() (cmd *Command, err error) { // Regardless of what command execute is called on, run on Root only if c.HasParent() { return c.Root().ExecuteC() } // windows hook if preExecHookFn != nil { preExecHookFn(c) } // initialize help as the last point possible to allow for user // overriding c.initHelpCmd() var args []string // Workaround FAIL with "go test -v" or "cobra.test -test.v", see #155 if c.args == nil && filepath.Base(os.Args[0]) != "cobra.test" { args = os.Args[1:] } else { args = c.args } cmd, flags, err := c.Find(args) if err != nil { // If found parse to a subcommand and then failed, talk about the subcommand if cmd != nil { c = cmd } if !c.SilenceErrors { c.Println("Error:", err.Error()) c.Printf("Run '%v --help' for usage.\n", c.CommandPath()) } return c, err } err = cmd.execute(flags) if err != nil { // Always show help if requested, even if SilenceErrors is in // effect if err == flag.ErrHelp { cmd.HelpFunc()(cmd, args) return cmd, nil } // If root command has SilentErrors flagged, // all subcommands should respect it if !cmd.SilenceErrors && !c.SilenceErrors { c.Println("Error:", err.Error()) } // If root command has SilentUsage flagged, // all subcommands should respect it if !cmd.SilenceUsage && !c.SilenceUsage { c.Println(cmd.UsageString()) } return cmd, err } return cmd, nil } func (c *Command) initHelpFlag() { c.mergePersistentFlags() if c.Flags().Lookup("help") == nil { c.Flags().BoolP("help", "h", false, "help for "+c.Name()) } } func (c *Command) initHelpCmd() { if c.helpCommand == nil { if !c.HasSubCommands() { return } c.helpCommand = &Command{ Use: "help [command]", Short: "Help about any command", Long: `Help provides help for any command in the application. Simply type ` + c.Name() + ` help [path to command] for full details.`, PersistentPreRun: func(cmd *Command, args []string) {}, PersistentPostRun: func(cmd *Command, args []string) {}, Run: func(c *Command, args []string) { cmd, _, e := c.Root().Find(args) if cmd == nil || e != nil { c.Printf("Unknown help topic %#q\n", args) c.Root().Usage() } else { cmd.Help() } }, } } c.AddCommand(c.helpCommand) } // ResetCommands used for testing. func (c *Command) ResetCommands() { c.commands = nil c.helpCommand = nil } // Sorts commands by their names. type commandSorterByName []*Command func (c commandSorterByName) Len() int { return len(c) } func (c commandSorterByName) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c commandSorterByName) Less(i, j int) bool { return c[i].Name() < c[j].Name() } // Commands returns a sorted slice of child commands. func (c *Command) Commands() []*Command { // do not sort commands if it already sorted or sorting was disabled if EnableCommandSorting && !c.commandsAreSorted { sort.Sort(commandSorterByName(c.commands)) c.commandsAreSorted = true } return c.commands } // AddCommand adds one or more commands to this parent command. func (c *Command) AddCommand(cmds ...*Command) { for i, x := range cmds { if cmds[i] == c { panic("Command can't be a child of itself") } cmds[i].parent = c // update max lengths usageLen := len(x.Use) if usageLen > c.commandsMaxUseLen { c.commandsMaxUseLen = usageLen } commandPathLen := len(x.CommandPath()) if commandPathLen > c.commandsMaxCommandPathLen { c.commandsMaxCommandPathLen = commandPathLen } nameLen := len(x.Name()) if nameLen > c.commandsMaxNameLen { c.commandsMaxNameLen = nameLen } // If global normalization function exists, update all children if c.globNormFunc != nil { x.SetGlobalNormalizationFunc(c.globNormFunc) } c.commands = append(c.commands, x) c.commandsAreSorted = false } } // RemoveCommand removes one or more commands from a parent command. func (c *Command) RemoveCommand(cmds ...*Command) { commands := []*Command{} main: for _, command := range c.commands { for _, cmd := range cmds { if command == cmd { command.parent = nil continue main } } commands = append(commands, command) } c.commands = commands // recompute all lengths c.commandsMaxUseLen = 0 c.commandsMaxCommandPathLen = 0 c.commandsMaxNameLen = 0 for _, command := range c.commands { usageLen := len(command.Use) if usageLen > c.commandsMaxUseLen { c.commandsMaxUseLen = usageLen } commandPathLen := len(command.CommandPath()) if commandPathLen > c.commandsMaxCommandPathLen { c.commandsMaxCommandPathLen = commandPathLen } nameLen := len(command.Name()) if nameLen > c.commandsMaxNameLen { c.commandsMaxNameLen = nameLen } } } // Print is a convenience method to Print to the defined output, fallback to Stderr if not set. func (c *Command) Print(i ...interface{}) { fmt.Fprint(c.OutOrStderr(), i...) } // Println is a convenience method to Println to the defined output, fallback to Stderr if not set. func (c *Command) Println(i ...interface{}) { str := fmt.Sprintln(i...) c.Print(str) } // Printf is a convenience method to Printf to the defined output, fallback to Stderr if not set. func (c *Command) Printf(format string, i ...interface{}) { str := fmt.Sprintf(format, i...) c.Print(str) } // CommandPath returns the full path to this command. func (c *Command) CommandPath() string { str := c.Name() x := c for x.HasParent() { str = x.parent.Name() + " " + str x = x.parent } return str } // UseLine puts out the full usage for a given command (including parents). func (c *Command) UseLine() string { str := "" if c.HasParent() { str = c.parent.CommandPath() + " " } return str + c.Use } // DebugFlags used to determine which flags have been assigned to which commands // and which persist. func (c *Command) DebugFlags() { c.Println("DebugFlags called on", c.Name()) var debugflags func(*Command) debugflags = func(x *Command) { if x.HasFlags() || x.HasPersistentFlags() { c.Println(x.Name()) } if x.HasFlags() { x.flags.VisitAll(func(f *flag.Flag) { if x.HasPersistentFlags() { if x.persistentFlag(f.Name) == nil { c.Println(" -"+f.Shorthand+",", "--"+f.Name, "["+f.DefValue+"]", "", f.Value, " [L]") } else { c.Println(" -"+f.Shorthand+",", "--"+f.Name, "["+f.DefValue+"]", "", f.Value, " [LP]") } } else { c.Println(" -"+f.Shorthand+",", "--"+f.Name, "["+f.DefValue+"]", "", f.Value, " [L]") } }) } if x.HasPersistentFlags() { x.pflags.VisitAll(func(f *flag.Flag) { if x.HasFlags() { if x.flags.Lookup(f.Name) == nil { c.Println(" -"+f.Shorthand+",", "--"+f.Name, "["+f.DefValue+"]", "", f.Value, " [P]") } } else { c.Println(" -"+f.Shorthand+",", "--"+f.Name, "["+f.DefValue+"]", "", f.Value, " [P]") } }) } c.Println(x.flagErrorBuf) if x.HasSubCommands() { for _, y := range x.commands { debugflags(y) } } } debugflags(c) } // Name returns the command's name: the first word in the use line. func (c *Command) Name() string { if c.name != "" { return c.name } name := c.Use i := strings.Index(name, " ") if i >= 0 { name = name[:i] } c.name = name return c.name } // HasAlias determines if a given string is an alias of the command. func (c *Command) HasAlias(s string) bool { for _, a := range c.Aliases { if a == s { return true } } return false } // NameAndAliases returns string containing name and all aliases func (c *Command) NameAndAliases() string { return strings.Join(append([]string{c.Name()}, c.Aliases...), ", ") } // HasExample determines if the command has example. func (c *Command) HasExample() bool { return len(c.Example) > 0 } // Runnable determines if the command is itself runnable. func (c *Command) Runnable() bool { return c.Run != nil || c.RunE != nil } // HasSubCommands determines if the command has children commands. func (c *Command) HasSubCommands() bool { return len(c.commands) > 0 } // IsAvailableCommand determines if a command is available as a non-help command // (this includes all non deprecated/hidden commands). func (c *Command) IsAvailableCommand() bool { if len(c.Deprecated) != 0 || c.Hidden { return false } if c.HasParent() && c.Parent().helpCommand == c { return false } if c.Runnable() || c.HasAvailableSubCommands() { return true } return false } // IsAdditionalHelpTopicCommand determines if a command is an additional // help topic command; additional help topic command is determined by the // fact that it is NOT runnable/hidden/deprecated, and has no sub commands that // are runnable/hidden/deprecated. // Concrete example: https://github.com/spf13/cobra/issues/393#issuecomment-282741924. func (c *Command) IsAdditionalHelpTopicCommand() bool { // if a command is runnable, deprecated, or hidden it is not a 'help' command if c.Runnable() || len(c.Deprecated) != 0 || c.Hidden { return false } // if any non-help sub commands are found, the command is not a 'help' command for _, sub := range c.commands { if !sub.IsAdditionalHelpTopicCommand() { return false } } // the command either has no sub commands, or no non-help sub commands return true } // HasHelpSubCommands determines if a command has any available 'help' sub commands // that need to be shown in the usage/help default template under 'additional help // topics'. func (c *Command) HasHelpSubCommands() bool { // return true on the first found available 'help' sub command for _, sub := range c.commands { if sub.IsAdditionalHelpTopicCommand() { return true } } // the command either has no sub commands, or no available 'help' sub commands return false } // HasAvailableSubCommands determines if a command has available sub commands that // need to be shown in the usage/help default template under 'available commands'. func (c *Command) HasAvailableSubCommands() bool { // return true on the first found available (non deprecated/help/hidden) // sub command for _, sub := range c.commands { if sub.IsAvailableCommand() { return true } } // the command either has no sub comamnds, or no available (non deprecated/help/hidden) // sub commands return false } // HasParent determines if the command is a child command. func (c *Command) HasParent() bool { return c.parent != nil } // GlobalNormalizationFunc returns the global normalization function or nil if doesn't exists. func (c *Command) GlobalNormalizationFunc() func(f *flag.FlagSet, name string) flag.NormalizedName { return c.globNormFunc } // Flags returns the complete FlagSet that applies // to this command (local and persistent declared here and by all parents). func (c *Command) Flags() *flag.FlagSet { if c.flags == nil { c.flags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } c.flags.SetOutput(c.flagErrorBuf) } return c.flags } // LocalNonPersistentFlags are flags specific to this command which will NOT persist to subcommands. func (c *Command) LocalNonPersistentFlags() *flag.FlagSet { persistentFlags := c.PersistentFlags() out := flag.NewFlagSet(c.Name(), flag.ContinueOnError) c.LocalFlags().VisitAll(func(f *flag.Flag) { if persistentFlags.Lookup(f.Name) == nil { out.AddFlag(f) } }) return out } // LocalFlags returns the local FlagSet specifically set in the current command. func (c *Command) LocalFlags() *flag.FlagSet { c.mergePersistentFlags() local := flag.NewFlagSet(c.Name(), flag.ContinueOnError) c.lflags.VisitAll(func(f *flag.Flag) { local.AddFlag(f) }) if !c.HasParent() { flag.CommandLine.VisitAll(func(f *flag.Flag) { if local.Lookup(f.Name) == nil { local.AddFlag(f) } }) } return local } // InheritedFlags returns all flags which were inherited from parents commands. func (c *Command) InheritedFlags() *flag.FlagSet { c.mergePersistentFlags() inherited := flag.NewFlagSet(c.Name(), flag.ContinueOnError) local := c.LocalFlags() var rmerge func(x *Command) rmerge = func(x *Command) { if x.HasPersistentFlags() { x.PersistentFlags().VisitAll(func(f *flag.Flag) { if inherited.Lookup(f.Name) == nil && local.Lookup(f.Name) == nil { inherited.AddFlag(f) } }) } if x.HasParent() { rmerge(x.parent) } } if c.HasParent() { rmerge(c.parent) } return inherited } // NonInheritedFlags returns all flags which were not inherited from parent commands. func (c *Command) NonInheritedFlags() *flag.FlagSet { return c.LocalFlags() } // PersistentFlags returns the persistent FlagSet specifically set in the current command. func (c *Command) PersistentFlags() *flag.FlagSet { if c.pflags == nil { c.pflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } c.pflags.SetOutput(c.flagErrorBuf) } return c.pflags } // ResetFlags is used in testing. func (c *Command) ResetFlags() { c.flagErrorBuf = new(bytes.Buffer) c.flagErrorBuf.Reset() c.flags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) c.flags.SetOutput(c.flagErrorBuf) c.pflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) c.pflags.SetOutput(c.flagErrorBuf) } // HasFlags checks if the command contains any flags (local plus persistent from the entire structure). func (c *Command) HasFlags() bool { return c.Flags().HasFlags() } // HasPersistentFlags checks if the command contains persistent flags. func (c *Command) HasPersistentFlags() bool { return c.PersistentFlags().HasFlags() } // HasLocalFlags checks if the command has flags specifically declared locally. func (c *Command) HasLocalFlags() bool { return c.LocalFlags().HasFlags() } // HasInheritedFlags checks if the command has flags inherited from its parent command. func (c *Command) HasInheritedFlags() bool { return c.InheritedFlags().HasFlags() } // HasAvailableFlags checks if the command contains any flags (local plus persistent from the entire // structure) which are not hidden or deprecated. func (c *Command) HasAvailableFlags() bool { return c.Flags().HasAvailableFlags() } // HasAvailablePersistentFlags checks if the command contains persistent flags which are not hidden or deprecated. func (c *Command) HasAvailablePersistentFlags() bool { return c.PersistentFlags().HasAvailableFlags() } // HasAvailableLocalFlags checks if the command has flags specifically declared locally which are not hidden // or deprecated. func (c *Command) HasAvailableLocalFlags() bool { return c.LocalFlags().HasAvailableFlags() } // HasAvailableInheritedFlags checks if the command has flags inherited from its parent command which are // not hidden or deprecated. func (c *Command) HasAvailableInheritedFlags() bool { return c.InheritedFlags().HasAvailableFlags() } // Flag climbs up the command tree looking for matching flag. func (c *Command) Flag(name string) (flag *flag.Flag) { flag = c.Flags().Lookup(name) if flag == nil { flag = c.persistentFlag(name) } return } // Recursively find matching persistent flag. func (c *Command) persistentFlag(name string) (flag *flag.Flag) { if c.HasPersistentFlags() { flag = c.PersistentFlags().Lookup(name) } if flag == nil && c.HasParent() { flag = c.parent.persistentFlag(name) } return } // ParseFlags parses persistent flag tree and local flags. func (c *Command) ParseFlags(args []string) (err error) { if c.DisableFlagParsing { return nil } c.mergePersistentFlags() err = c.Flags().Parse(args) return } // Parent returns a commands parent command. func (c *Command) Parent() *Command { return c.parent } func (c *Command) mergePersistentFlags() { var rmerge func(x *Command) // Save the set of local flags if c.lflags == nil { c.lflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } c.lflags.SetOutput(c.flagErrorBuf) addtolocal := func(f *flag.Flag) { c.lflags.AddFlag(f) } c.Flags().VisitAll(addtolocal) c.PersistentFlags().VisitAll(addtolocal) } rmerge = func(x *Command) { if !x.HasParent() { flag.CommandLine.VisitAll(func(f *flag.Flag) { if x.PersistentFlags().Lookup(f.Name) == nil { x.PersistentFlags().AddFlag(f) } }) } if x.HasPersistentFlags() { x.PersistentFlags().VisitAll(func(f *flag.Flag) { if c.Flags().Lookup(f.Name) == nil { c.Flags().AddFlag(f) } }) } if x.HasParent() { rmerge(x.parent) } } rmerge(c) } ================================================ FILE: vendor/github.com/spf13/cobra/command_notwin.go ================================================ // +build !windows package cobra var preExecHookFn func(*Command) ================================================ FILE: vendor/github.com/spf13/cobra/command_win.go ================================================ // +build windows package cobra import ( "os" "time" "github.com/inconshreveable/mousetrap" ) var preExecHookFn = preExecHook // enables an information splash screen on Windows if the CLI is started from explorer.exe. var MousetrapHelpText string = `This is a command line tool You need to open cmd.exe and run it from there. ` func preExecHook(c *Command) { if mousetrap.StartedByExplorer() { c.Print(MousetrapHelpText) time.Sleep(5 * time.Second) os.Exit(1) } } ================================================ FILE: vendor/github.com/spf13/cobra/doc/man_docs.go ================================================ // Copyright 2015 Red Hat Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package doc import ( "bytes" "fmt" "io" "os" "path/filepath" "sort" "strings" "time" mangen "github.com/cpuguy83/go-md2man/md2man" "github.com/spf13/cobra" "github.com/spf13/pflag" ) // GenManTree will generate a man page for this command and all descendants // in the directory given. The header may be nil. This function may not work // correctly if your command names have - in them. If you have `cmd` with two // subcmds, `sub` and `sub-third`. And `sub` has a subcommand called `third` // it is undefined which help output will be in the file `cmd-sub-third.1`. func GenManTree(cmd *cobra.Command, header *GenManHeader, dir string) error { return GenManTreeFromOpts(cmd, GenManTreeOptions{ Header: header, Path: dir, CommandSeparator: "-", }) } // GenManTreeFromOpts generates a man page for the command and all descendants. // The pages are written to the opts.Path directory. func GenManTreeFromOpts(cmd *cobra.Command, opts GenManTreeOptions) error { header := opts.Header if header == nil { header = &GenManHeader{} } for _, c := range cmd.Commands() { if !c.IsAvailableCommand() || c.IsAdditionalHelpTopicCommand() { continue } if err := GenManTreeFromOpts(c, opts); err != nil { return err } } section := "1" if header.Section != "" { section = header.Section } separator := "_" if opts.CommandSeparator != "" { separator = opts.CommandSeparator } basename := strings.Replace(cmd.CommandPath(), " ", separator, -1) filename := filepath.Join(opts.Path, basename+"."+section) f, err := os.Create(filename) if err != nil { return err } defer f.Close() headerCopy := *header return GenMan(cmd, &headerCopy, f) } type GenManTreeOptions struct { Header *GenManHeader Path string CommandSeparator string } // GenManHeader is a lot like the .TH header at the start of man pages. These // include the title, section, date, source, and manual. We will use the // current time if Date if unset and will use "Auto generated by spf13/cobra" // if the Source is unset. type GenManHeader struct { Title string Section string Date *time.Time date string Source string Manual string } // GenMan will generate a man page for the given command and write it to // w. The header argument may be nil, however obviously w may not. func GenMan(cmd *cobra.Command, header *GenManHeader, w io.Writer) error { if header == nil { header = &GenManHeader{} } fillHeader(header, cmd.CommandPath()) b := genMan(cmd, header) _, err := w.Write(mangen.Render(b)) return err } func fillHeader(header *GenManHeader, name string) { if header.Title == "" { header.Title = strings.ToUpper(strings.Replace(name, " ", "\\-", -1)) } if header.Section == "" { header.Section = "1" } if header.Date == nil { now := time.Now() header.Date = &now } header.date = (*header.Date).Format("Jan 2006") if header.Source == "" { header.Source = "Auto generated by spf13/cobra" } } func manPreamble(out io.Writer, header *GenManHeader, cmd *cobra.Command, dashedName string) { description := cmd.Long if len(description) == 0 { description = cmd.Short } fmt.Fprintf(out, `%% %s(%s)%s %% %s %% %s # NAME `, header.Title, header.Section, header.date, header.Source, header.Manual) fmt.Fprintf(out, "%s \\- %s\n\n", dashedName, cmd.Short) fmt.Fprintf(out, "# SYNOPSIS\n") fmt.Fprintf(out, "**%s**\n\n", cmd.UseLine()) fmt.Fprintf(out, "# DESCRIPTION\n") fmt.Fprintf(out, "%s\n\n", description) } func manPrintFlags(out io.Writer, flags *pflag.FlagSet) { flags.VisitAll(func(flag *pflag.Flag) { if len(flag.Deprecated) > 0 || flag.Hidden { return } format := "" if len(flag.Shorthand) > 0 && len(flag.ShorthandDeprecated) == 0 { format = fmt.Sprintf("**-%s**, **--%s**", flag.Shorthand, flag.Name) } else { format = fmt.Sprintf("**--%s**", flag.Name) } if len(flag.NoOptDefVal) > 0 { format = format + "[" } if flag.Value.Type() == "string" { // put quotes on the value format = format + "=%q" } else { format = format + "=%s" } if len(flag.NoOptDefVal) > 0 { format = format + "]" } format = format + "\n\t%s\n\n" fmt.Fprintf(out, format, flag.DefValue, flag.Usage) }) } func manPrintOptions(out io.Writer, command *cobra.Command) { flags := command.NonInheritedFlags() if flags.HasFlags() { fmt.Fprintf(out, "# OPTIONS\n") manPrintFlags(out, flags) fmt.Fprintf(out, "\n") } flags = command.InheritedFlags() if flags.HasFlags() { fmt.Fprintf(out, "# OPTIONS INHERITED FROM PARENT COMMANDS\n") manPrintFlags(out, flags) fmt.Fprintf(out, "\n") } } func genMan(cmd *cobra.Command, header *GenManHeader) []byte { // something like `rootcmd-subcmd1-subcmd2` dashCommandName := strings.Replace(cmd.CommandPath(), " ", "-", -1) buf := new(bytes.Buffer) manPreamble(buf, header, cmd, dashCommandName) manPrintOptions(buf, cmd) if len(cmd.Example) > 0 { fmt.Fprintf(buf, "# EXAMPLE\n") fmt.Fprintf(buf, "```\n%s\n```\n", cmd.Example) } if hasSeeAlso(cmd) { fmt.Fprintf(buf, "# SEE ALSO\n") seealsos := make([]string, 0) if cmd.HasParent() { parentPath := cmd.Parent().CommandPath() dashParentPath := strings.Replace(parentPath, " ", "-", -1) seealso := fmt.Sprintf("**%s(%s)**", dashParentPath, header.Section) seealsos = append(seealsos, seealso) cmd.VisitParents(func(c *cobra.Command) { if c.DisableAutoGenTag { cmd.DisableAutoGenTag = c.DisableAutoGenTag } }) } children := cmd.Commands() sort.Sort(byName(children)) for _, c := range children { if !c.IsAvailableCommand() || c.IsAdditionalHelpTopicCommand() { continue } seealso := fmt.Sprintf("**%s-%s(%s)**", dashCommandName, c.Name(), header.Section) seealsos = append(seealsos, seealso) } fmt.Fprintf(buf, "%s\n", strings.Join(seealsos, ", ")) } if !cmd.DisableAutoGenTag { fmt.Fprintf(buf, "# HISTORY\n%s Auto generated by spf13/cobra\n", header.Date.Format("2-Jan-2006")) } return buf.Bytes() } ================================================ FILE: vendor/github.com/spf13/cobra/doc/md_docs.go ================================================ //Copyright 2015 Red Hat Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package doc import ( "fmt" "io" "os" "path/filepath" "sort" "strings" "time" "github.com/spf13/cobra" ) func printOptions(w io.Writer, cmd *cobra.Command, name string) error { flags := cmd.NonInheritedFlags() flags.SetOutput(w) if flags.HasFlags() { if _, err := fmt.Fprintf(w, "### Options\n\n```\n"); err != nil { return err } flags.PrintDefaults() if _, err := fmt.Fprintf(w, "```\n\n"); err != nil { return err } } parentFlags := cmd.InheritedFlags() parentFlags.SetOutput(w) if parentFlags.HasFlags() { if _, err := fmt.Fprintf(w, "### Options inherited from parent commands\n\n```\n"); err != nil { return err } parentFlags.PrintDefaults() if _, err := fmt.Fprintf(w, "```\n\n"); err != nil { return err } } return nil } func GenMarkdown(cmd *cobra.Command, w io.Writer) error { return GenMarkdownCustom(cmd, w, func(s string) string { return s }) } func GenMarkdownCustom(cmd *cobra.Command, w io.Writer, linkHandler func(string) string) error { name := cmd.CommandPath() short := cmd.Short long := cmd.Long if len(long) == 0 { long = short } if _, err := fmt.Fprintf(w, "## %s\n\n", name); err != nil { return err } if _, err := fmt.Fprintf(w, "%s\n\n", short); err != nil { return err } if _, err := fmt.Fprintf(w, "### Synopsis\n\n"); err != nil { return err } if _, err := fmt.Fprintf(w, "\n%s\n\n", long); err != nil { return err } if cmd.Runnable() { if _, err := fmt.Fprintf(w, "```\n%s\n```\n\n", cmd.UseLine()); err != nil { return err } } if len(cmd.Example) > 0 { if _, err := fmt.Fprintf(w, "### Examples\n\n"); err != nil { return err } if _, err := fmt.Fprintf(w, "```\n%s\n```\n\n", cmd.Example); err != nil { return err } } if err := printOptions(w, cmd, name); err != nil { return err } if hasSeeAlso(cmd) { if _, err := fmt.Fprintf(w, "### SEE ALSO\n"); err != nil { return err } if cmd.HasParent() { parent := cmd.Parent() pname := parent.CommandPath() link := pname + ".md" link = strings.Replace(link, " ", "_", -1) if _, err := fmt.Fprintf(w, "* [%s](%s)\t - %s\n", pname, linkHandler(link), parent.Short); err != nil { return err } cmd.VisitParents(func(c *cobra.Command) { if c.DisableAutoGenTag { cmd.DisableAutoGenTag = c.DisableAutoGenTag } }) } children := cmd.Commands() sort.Sort(byName(children)) for _, child := range children { if !child.IsAvailableCommand() || child.IsAdditionalHelpTopicCommand() { continue } cname := name + " " + child.Name() link := cname + ".md" link = strings.Replace(link, " ", "_", -1) if _, err := fmt.Fprintf(w, "* [%s](%s)\t - %s\n", cname, linkHandler(link), child.Short); err != nil { return err } } if _, err := fmt.Fprintf(w, "\n"); err != nil { return err } } if !cmd.DisableAutoGenTag { if _, err := fmt.Fprintf(w, "###### Auto generated by spf13/cobra on %s\n", time.Now().Format("2-Jan-2006")); err != nil { return err } } return nil } func GenMarkdownTree(cmd *cobra.Command, dir string) error { identity := func(s string) string { return s } emptyStr := func(s string) string { return "" } return GenMarkdownTreeCustom(cmd, dir, emptyStr, identity) } func GenMarkdownTreeCustom(cmd *cobra.Command, dir string, filePrepender, linkHandler func(string) string) error { for _, c := range cmd.Commands() { if !c.IsAvailableCommand() || c.IsAdditionalHelpTopicCommand() { continue } if err := GenMarkdownTreeCustom(c, dir, filePrepender, linkHandler); err != nil { return err } } basename := strings.Replace(cmd.CommandPath(), " ", "_", -1) + ".md" filename := filepath.Join(dir, basename) f, err := os.Create(filename) if err != nil { return err } defer f.Close() if _, err := io.WriteString(f, filePrepender(filename)); err != nil { return err } if err := GenMarkdownCustom(cmd, f, linkHandler); err != nil { return err } return nil } ================================================ FILE: vendor/github.com/spf13/cobra/doc/util.go ================================================ // Copyright 2015 Red Hat Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package doc import ( "strings" "github.com/spf13/cobra" ) // Test to see if we have a reason to print See Also information in docs // Basically this is a test for a parent commend or a subcommand which is // both not deprecated and not the autogenerated help command. func hasSeeAlso(cmd *cobra.Command) bool { if cmd.HasParent() { return true } for _, c := range cmd.Commands() { if !c.IsAvailableCommand() || c.IsAdditionalHelpTopicCommand() { continue } return true } return false } // Temporary workaround for yaml lib generating incorrect yaml with long strings // that do not contain \n. func forceMultiLine(s string) string { if len(s) > 60 && !strings.Contains(s, "\n") { s = s + "\n" } return s } type byName []*cobra.Command func (s byName) Len() int { return len(s) } func (s byName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s byName) Less(i, j int) bool { return s[i].Name() < s[j].Name() } ================================================ FILE: vendor/github.com/spf13/cobra/doc/yaml_docs.go ================================================ // Copyright 2016 French Ben. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package doc import ( "fmt" "io" "os" "path/filepath" "sort" "strings" "github.com/spf13/cobra" "github.com/spf13/pflag" "gopkg.in/yaml.v2" ) type cmdOption struct { Name string Shorthand string `yaml:",omitempty"` DefaultValue string `yaml:"default_value,omitempty"` Usage string `yaml:",omitempty"` } type cmdDoc struct { Name string Synopsis string `yaml:",omitempty"` Description string `yaml:",omitempty"` Options []cmdOption `yaml:",omitempty"` InheritedOptions []cmdOption `yaml:"inherited_options,omitempty"` Example string `yaml:",omitempty"` SeeAlso []string `yaml:"see_also,omitempty"` } // GenYamlTree creates yaml structured ref files for this command and all descendants // in the directory given. This function may not work // correctly if your command names have - in them. If you have `cmd` with two // subcmds, `sub` and `sub-third`. And `sub` has a subcommand called `third` // it is undefined which help output will be in the file `cmd-sub-third.1`. func GenYamlTree(cmd *cobra.Command, dir string) error { identity := func(s string) string { return s } emptyStr := func(s string) string { return "" } return GenYamlTreeCustom(cmd, dir, emptyStr, identity) } // GenYamlTreeCustom creates yaml structured ref files func GenYamlTreeCustom(cmd *cobra.Command, dir string, filePrepender, linkHandler func(string) string) error { for _, c := range cmd.Commands() { if !c.IsAvailableCommand() || c.IsAdditionalHelpTopicCommand() { continue } if err := GenYamlTreeCustom(c, dir, filePrepender, linkHandler); err != nil { return err } } basename := strings.Replace(cmd.CommandPath(), " ", "_", -1) + ".yaml" filename := filepath.Join(dir, basename) f, err := os.Create(filename) if err != nil { return err } defer f.Close() if _, err := io.WriteString(f, filePrepender(filename)); err != nil { return err } if err := GenYamlCustom(cmd, f, linkHandler); err != nil { return err } return nil } // GenYaml creates yaml output func GenYaml(cmd *cobra.Command, w io.Writer) error { return GenYamlCustom(cmd, w, func(s string) string { return s }) } // GenYamlCustom creates custom yaml output func GenYamlCustom(cmd *cobra.Command, w io.Writer, linkHandler func(string) string) error { yamlDoc := cmdDoc{} yamlDoc.Name = cmd.CommandPath() yamlDoc.Synopsis = forceMultiLine(cmd.Short) yamlDoc.Description = forceMultiLine(cmd.Long) if len(cmd.Example) > 0 { yamlDoc.Example = cmd.Example } flags := cmd.NonInheritedFlags() if flags.HasFlags() { yamlDoc.Options = genFlagResult(flags) } flags = cmd.InheritedFlags() if flags.HasFlags() { yamlDoc.InheritedOptions = genFlagResult(flags) } if hasSeeAlso(cmd) { result := []string{} if cmd.HasParent() { parent := cmd.Parent() result = append(result, parent.CommandPath()+" - "+parent.Short) } children := cmd.Commands() sort.Sort(byName(children)) for _, child := range children { if !child.IsAvailableCommand() || child.IsAdditionalHelpTopicCommand() { continue } result = append(result, child.Name()+" - "+child.Short) } yamlDoc.SeeAlso = result } final, err := yaml.Marshal(&yamlDoc) if err != nil { fmt.Println(err) os.Exit(1) } if _, err := fmt.Fprintf(w, string(final)); err != nil { return err } return nil } func genFlagResult(flags *pflag.FlagSet) []cmdOption { var result []cmdOption flags.VisitAll(func(flag *pflag.Flag) { // Todo, when we mark a shorthand is deprecated, but specify an empty message. // The flag.ShorthandDeprecated is empty as the shorthand is deprecated. // Using len(flag.ShorthandDeprecated) > 0 can't handle this, others are ok. if !(len(flag.ShorthandDeprecated) > 0) && len(flag.Shorthand) > 0 { opt := cmdOption{ flag.Name, flag.Shorthand, flag.DefValue, forceMultiLine(flag.Usage), } result = append(result, opt) } else { opt := cmdOption{ Name: flag.Name, DefaultValue: forceMultiLine(flag.DefValue), Usage: forceMultiLine(flag.Usage), } result = append(result, opt) } }) return result } ================================================ FILE: vendor/github.com/spf13/pflag/LICENSE ================================================ Copyright (c) 2012 Alex Ogier. All rights reserved. Copyright (c) 2012 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: vendor/github.com/spf13/pflag/bool.go ================================================ package pflag import "strconv" // optional interface to indicate boolean flags that can be // supplied without "=value" text type boolFlag interface { Value IsBoolFlag() bool } // -- bool Value type boolValue bool func newBoolValue(val bool, p *bool) *boolValue { *p = val return (*boolValue)(p) } func (b *boolValue) Set(s string) error { v, err := strconv.ParseBool(s) *b = boolValue(v) return err } func (b *boolValue) Type() string { return "bool" } func (b *boolValue) String() string { return strconv.FormatBool(bool(*b)) } func (b *boolValue) IsBoolFlag() bool { return true } func boolConv(sval string) (interface{}, error) { return strconv.ParseBool(sval) } // GetBool return the bool value of a flag with the given name func (f *FlagSet) GetBool(name string) (bool, error) { val, err := f.getFlagType(name, "bool", boolConv) if err != nil { return false, err } return val.(bool), nil } // BoolVar defines a bool flag with specified name, default value, and usage string. // The argument p points to a bool variable in which to store the value of the flag. func (f *FlagSet) BoolVar(p *bool, name string, value bool, usage string) { f.BoolVarP(p, name, "", value, usage) } // BoolVarP is like BoolVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) BoolVarP(p *bool, name, shorthand string, value bool, usage string) { flag := f.VarPF(newBoolValue(value, p), name, shorthand, usage) flag.NoOptDefVal = "true" } // BoolVar defines a bool flag with specified name, default value, and usage string. // The argument p points to a bool variable in which to store the value of the flag. func BoolVar(p *bool, name string, value bool, usage string) { BoolVarP(p, name, "", value, usage) } // BoolVarP is like BoolVar, but accepts a shorthand letter that can be used after a single dash. func BoolVarP(p *bool, name, shorthand string, value bool, usage string) { flag := CommandLine.VarPF(newBoolValue(value, p), name, shorthand, usage) flag.NoOptDefVal = "true" } // Bool defines a bool flag with specified name, default value, and usage string. // The return value is the address of a bool variable that stores the value of the flag. func (f *FlagSet) Bool(name string, value bool, usage string) *bool { return f.BoolP(name, "", value, usage) } // BoolP is like Bool, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) BoolP(name, shorthand string, value bool, usage string) *bool { p := new(bool) f.BoolVarP(p, name, shorthand, value, usage) return p } // Bool defines a bool flag with specified name, default value, and usage string. // The return value is the address of a bool variable that stores the value of the flag. func Bool(name string, value bool, usage string) *bool { return BoolP(name, "", value, usage) } // BoolP is like Bool, but accepts a shorthand letter that can be used after a single dash. func BoolP(name, shorthand string, value bool, usage string) *bool { b := CommandLine.BoolP(name, shorthand, value, usage) return b } ================================================ FILE: vendor/github.com/spf13/pflag/bool_slice.go ================================================ package pflag import ( "io" "strconv" "strings" ) // -- boolSlice Value type boolSliceValue struct { value *[]bool changed bool } func newBoolSliceValue(val []bool, p *[]bool) *boolSliceValue { bsv := new(boolSliceValue) bsv.value = p *bsv.value = val return bsv } // Set converts, and assigns, the comma-separated boolean argument string representation as the []bool value of this flag. // If Set is called on a flag that already has a []bool assigned, the newly converted values will be appended. func (s *boolSliceValue) Set(val string) error { // remove all quote characters rmQuote := strings.NewReplacer(`"`, "", `'`, "", "`", "") // read flag arguments with CSV parser boolStrSlice, err := readAsCSV(rmQuote.Replace(val)) if err != nil && err != io.EOF { return err } // parse boolean values into slice out := make([]bool, 0, len(boolStrSlice)) for _, boolStr := range boolStrSlice { b, err := strconv.ParseBool(strings.TrimSpace(boolStr)) if err != nil { return err } out = append(out, b) } if !s.changed { *s.value = out } else { *s.value = append(*s.value, out...) } s.changed = true return nil } // Type returns a string that uniquely represents this flag's type. func (s *boolSliceValue) Type() string { return "boolSlice" } // String defines a "native" format for this boolean slice flag value. func (s *boolSliceValue) String() string { boolStrSlice := make([]string, len(*s.value)) for i, b := range *s.value { boolStrSlice[i] = strconv.FormatBool(b) } out, _ := writeAsCSV(boolStrSlice) return "[" + out + "]" } func boolSliceConv(val string) (interface{}, error) { val = strings.Trim(val, "[]") // Empty string would cause a slice with one (empty) entry if len(val) == 0 { return []bool{}, nil } ss := strings.Split(val, ",") out := make([]bool, len(ss)) for i, t := range ss { var err error out[i], err = strconv.ParseBool(t) if err != nil { return nil, err } } return out, nil } // GetBoolSlice returns the []bool value of a flag with the given name. func (f *FlagSet) GetBoolSlice(name string) ([]bool, error) { val, err := f.getFlagType(name, "boolSlice", boolSliceConv) if err != nil { return []bool{}, err } return val.([]bool), nil } // BoolSliceVar defines a boolSlice flag with specified name, default value, and usage string. // The argument p points to a []bool variable in which to store the value of the flag. func (f *FlagSet) BoolSliceVar(p *[]bool, name string, value []bool, usage string) { f.VarP(newBoolSliceValue(value, p), name, "", usage) } // BoolSliceVarP is like BoolSliceVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) BoolSliceVarP(p *[]bool, name, shorthand string, value []bool, usage string) { f.VarP(newBoolSliceValue(value, p), name, shorthand, usage) } // BoolSliceVar defines a []bool flag with specified name, default value, and usage string. // The argument p points to a []bool variable in which to store the value of the flag. func BoolSliceVar(p *[]bool, name string, value []bool, usage string) { CommandLine.VarP(newBoolSliceValue(value, p), name, "", usage) } // BoolSliceVarP is like BoolSliceVar, but accepts a shorthand letter that can be used after a single dash. func BoolSliceVarP(p *[]bool, name, shorthand string, value []bool, usage string) { CommandLine.VarP(newBoolSliceValue(value, p), name, shorthand, usage) } // BoolSlice defines a []bool flag with specified name, default value, and usage string. // The return value is the address of a []bool variable that stores the value of the flag. func (f *FlagSet) BoolSlice(name string, value []bool, usage string) *[]bool { p := []bool{} f.BoolSliceVarP(&p, name, "", value, usage) return &p } // BoolSliceP is like BoolSlice, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) BoolSliceP(name, shorthand string, value []bool, usage string) *[]bool { p := []bool{} f.BoolSliceVarP(&p, name, shorthand, value, usage) return &p } // BoolSlice defines a []bool flag with specified name, default value, and usage string. // The return value is the address of a []bool variable that stores the value of the flag. func BoolSlice(name string, value []bool, usage string) *[]bool { return CommandLine.BoolSliceP(name, "", value, usage) } // BoolSliceP is like BoolSlice, but accepts a shorthand letter that can be used after a single dash. func BoolSliceP(name, shorthand string, value []bool, usage string) *[]bool { return CommandLine.BoolSliceP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/count.go ================================================ package pflag import "strconv" // -- count Value type countValue int func newCountValue(val int, p *int) *countValue { *p = val return (*countValue)(p) } func (i *countValue) Set(s string) error { v, err := strconv.ParseInt(s, 0, 64) // -1 means that no specific value was passed, so increment if v == -1 { *i = countValue(*i + 1) } else { *i = countValue(v) } return err } func (i *countValue) Type() string { return "count" } func (i *countValue) String() string { return strconv.Itoa(int(*i)) } func countConv(sval string) (interface{}, error) { i, err := strconv.Atoi(sval) if err != nil { return nil, err } return i, nil } // GetCount return the int value of a flag with the given name func (f *FlagSet) GetCount(name string) (int, error) { val, err := f.getFlagType(name, "count", countConv) if err != nil { return 0, err } return val.(int), nil } // CountVar defines a count flag with specified name, default value, and usage string. // The argument p points to an int variable in which to store the value of the flag. // A count flag will add 1 to its value evey time it is found on the command line func (f *FlagSet) CountVar(p *int, name string, usage string) { f.CountVarP(p, name, "", usage) } // CountVarP is like CountVar only take a shorthand for the flag name. func (f *FlagSet) CountVarP(p *int, name, shorthand string, usage string) { flag := f.VarPF(newCountValue(0, p), name, shorthand, usage) flag.NoOptDefVal = "-1" } // CountVar like CountVar only the flag is placed on the CommandLine instead of a given flag set func CountVar(p *int, name string, usage string) { CommandLine.CountVar(p, name, usage) } // CountVarP is like CountVar only take a shorthand for the flag name. func CountVarP(p *int, name, shorthand string, usage string) { CommandLine.CountVarP(p, name, shorthand, usage) } // Count defines a count flag with specified name, default value, and usage string. // The return value is the address of an int variable that stores the value of the flag. // A count flag will add 1 to its value evey time it is found on the command line func (f *FlagSet) Count(name string, usage string) *int { p := new(int) f.CountVarP(p, name, "", usage) return p } // CountP is like Count only takes a shorthand for the flag name. func (f *FlagSet) CountP(name, shorthand string, usage string) *int { p := new(int) f.CountVarP(p, name, shorthand, usage) return p } // Count like Count only the flag is placed on the CommandLine isntead of a given flag set func Count(name string, usage string) *int { return CommandLine.CountP(name, "", usage) } // CountP is like Count only takes a shorthand for the flag name. func CountP(name, shorthand string, usage string) *int { return CommandLine.CountP(name, shorthand, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/duration.go ================================================ package pflag import ( "time" ) // -- time.Duration Value type durationValue time.Duration func newDurationValue(val time.Duration, p *time.Duration) *durationValue { *p = val return (*durationValue)(p) } func (d *durationValue) Set(s string) error { v, err := time.ParseDuration(s) *d = durationValue(v) return err } func (d *durationValue) Type() string { return "duration" } func (d *durationValue) String() string { return (*time.Duration)(d).String() } func durationConv(sval string) (interface{}, error) { return time.ParseDuration(sval) } // GetDuration return the duration value of a flag with the given name func (f *FlagSet) GetDuration(name string) (time.Duration, error) { val, err := f.getFlagType(name, "duration", durationConv) if err != nil { return 0, err } return val.(time.Duration), nil } // DurationVar defines a time.Duration flag with specified name, default value, and usage string. // The argument p points to a time.Duration variable in which to store the value of the flag. func (f *FlagSet) DurationVar(p *time.Duration, name string, value time.Duration, usage string) { f.VarP(newDurationValue(value, p), name, "", usage) } // DurationVarP is like DurationVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) DurationVarP(p *time.Duration, name, shorthand string, value time.Duration, usage string) { f.VarP(newDurationValue(value, p), name, shorthand, usage) } // DurationVar defines a time.Duration flag with specified name, default value, and usage string. // The argument p points to a time.Duration variable in which to store the value of the flag. func DurationVar(p *time.Duration, name string, value time.Duration, usage string) { CommandLine.VarP(newDurationValue(value, p), name, "", usage) } // DurationVarP is like DurationVar, but accepts a shorthand letter that can be used after a single dash. func DurationVarP(p *time.Duration, name, shorthand string, value time.Duration, usage string) { CommandLine.VarP(newDurationValue(value, p), name, shorthand, usage) } // Duration defines a time.Duration flag with specified name, default value, and usage string. // The return value is the address of a time.Duration variable that stores the value of the flag. func (f *FlagSet) Duration(name string, value time.Duration, usage string) *time.Duration { p := new(time.Duration) f.DurationVarP(p, name, "", value, usage) return p } // DurationP is like Duration, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) DurationP(name, shorthand string, value time.Duration, usage string) *time.Duration { p := new(time.Duration) f.DurationVarP(p, name, shorthand, value, usage) return p } // Duration defines a time.Duration flag with specified name, default value, and usage string. // The return value is the address of a time.Duration variable that stores the value of the flag. func Duration(name string, value time.Duration, usage string) *time.Duration { return CommandLine.DurationP(name, "", value, usage) } // DurationP is like Duration, but accepts a shorthand letter that can be used after a single dash. func DurationP(name, shorthand string, value time.Duration, usage string) *time.Duration { return CommandLine.DurationP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/flag.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. /* Package pflag is a drop-in replacement for Go's flag package, implementing POSIX/GNU-style --flags. pflag is compatible with the GNU extensions to the POSIX recommendations for command-line options. See http://www.gnu.org/software/libc/manual/html_node/Argument-Syntax.html Usage: pflag is a drop-in replacement of Go's native flag package. If you import pflag under the name "flag" then all code should continue to function with no changes. import flag "github.com/ogier/pflag" There is one exception to this: if you directly instantiate the Flag struct there is one more field "Shorthand" that you will need to set. Most code never instantiates this struct directly, and instead uses functions such as String(), BoolVar(), and Var(), and is therefore unaffected. Define flags using flag.String(), Bool(), Int(), etc. This declares an integer flag, -flagname, stored in the pointer ip, with type *int. var ip = flag.Int("flagname", 1234, "help message for flagname") If you like, you can bind the flag to a variable using the Var() functions. var flagvar int func init() { flag.IntVar(&flagvar, "flagname", 1234, "help message for flagname") } Or you can create custom flags that satisfy the Value interface (with pointer receivers) and couple them to flag parsing by flag.Var(&flagVal, "name", "help message for flagname") For such flags, the default value is just the initial value of the variable. After all flags are defined, call flag.Parse() to parse the command line into the defined flags. Flags may then be used directly. If you're using the flags themselves, they are all pointers; if you bind to variables, they're values. fmt.Println("ip has value ", *ip) fmt.Println("flagvar has value ", flagvar) After parsing, the arguments after the flag are available as the slice flag.Args() or individually as flag.Arg(i). The arguments are indexed from 0 through flag.NArg()-1. The pflag package also defines some new functions that are not in flag, that give one-letter shorthands for flags. You can use these by appending 'P' to the name of any function that defines a flag. var ip = flag.IntP("flagname", "f", 1234, "help message") var flagvar bool func init() { flag.BoolVarP("boolname", "b", true, "help message") } flag.VarP(&flagVar, "varname", "v", 1234, "help message") Shorthand letters can be used with single dashes on the command line. Boolean shorthand flags can be combined with other shorthand flags. Command line flag syntax: --flag // boolean flags only --flag=x Unlike the flag package, a single dash before an option means something different than a double dash. Single dashes signify a series of shorthand letters for flags. All but the last shorthand letter must be boolean flags. // boolean flags -f -abc // non-boolean flags -n 1234 -Ifile // mixed -abcs "hello" -abcn1234 Flag parsing stops after the terminator "--". Unlike the flag package, flags can be interspersed with arguments anywhere on the command line before this terminator. Integer flags accept 1234, 0664, 0x1234 and may be negative. Boolean flags (in their long form) accept 1, 0, t, f, true, false, TRUE, FALSE, True, False. Duration flags accept any input valid for time.ParseDuration. The default set of command-line flags is controlled by top-level functions. The FlagSet type allows one to define independent sets of flags, such as to implement subcommands in a command-line interface. The methods of FlagSet are analogous to the top-level functions for the command-line flag set. */ package pflag import ( "bytes" "errors" "fmt" "io" "os" "sort" "strings" ) // ErrHelp is the error returned if the flag -help is invoked but no such flag is defined. var ErrHelp = errors.New("pflag: help requested") // ErrorHandling defines how to handle flag parsing errors. type ErrorHandling int const ( // ContinueOnError will return an err from Parse() if an error is found ContinueOnError ErrorHandling = iota // ExitOnError will call os.Exit(2) if an error is found when parsing ExitOnError // PanicOnError will panic() if an error is found when parsing flags PanicOnError ) // NormalizedName is a flag name that has been normalized according to rules // for the FlagSet (e.g. making '-' and '_' equivalent). type NormalizedName string // A FlagSet represents a set of defined flags. type FlagSet struct { // Usage is the function called when an error occurs while parsing flags. // The field is a function (not a method) that may be changed to point to // a custom error handler. Usage func() name string parsed bool actual map[NormalizedName]*Flag formal map[NormalizedName]*Flag shorthands map[byte]*Flag args []string // arguments after flags argsLenAtDash int // len(args) when a '--' was located when parsing, or -1 if no -- exitOnError bool // does the program exit if there's an error? errorHandling ErrorHandling output io.Writer // nil means stderr; use out() accessor interspersed bool // allow interspersed option/non-option args normalizeNameFunc func(f *FlagSet, name string) NormalizedName } // A Flag represents the state of a flag. type Flag struct { Name string // name as it appears on command line Shorthand string // one-letter abbreviated flag Usage string // help message Value Value // value as set DefValue string // default value (as text); for usage message Changed bool // If the user set the value (or if left to default) NoOptDefVal string //default value (as text); if the flag is on the command line without any options Deprecated string // If this flag is deprecated, this string is the new or now thing to use Hidden bool // used by cobra.Command to allow flags to be hidden from help/usage text ShorthandDeprecated string // If the shorthand of this flag is deprecated, this string is the new or now thing to use Annotations map[string][]string // used by cobra.Command bash autocomple code } // Value is the interface to the dynamic value stored in a flag. // (The default value is represented as a string.) type Value interface { String() string Set(string) error Type() string } // sortFlags returns the flags as a slice in lexicographical sorted order. func sortFlags(flags map[NormalizedName]*Flag) []*Flag { list := make(sort.StringSlice, len(flags)) i := 0 for k := range flags { list[i] = string(k) i++ } list.Sort() result := make([]*Flag, len(list)) for i, name := range list { result[i] = flags[NormalizedName(name)] } return result } // SetNormalizeFunc allows you to add a function which can translate flag names. // Flags added to the FlagSet will be translated and then when anything tries to // look up the flag that will also be translated. So it would be possible to create // a flag named "getURL" and have it translated to "geturl". A user could then pass // "--getUrl" which may also be translated to "geturl" and everything will work. func (f *FlagSet) SetNormalizeFunc(n func(f *FlagSet, name string) NormalizedName) { f.normalizeNameFunc = n for k, v := range f.formal { delete(f.formal, k) nname := f.normalizeFlagName(string(k)) f.formal[nname] = v v.Name = string(nname) } } // GetNormalizeFunc returns the previously set NormalizeFunc of a function which // does no translation, if not set previously. func (f *FlagSet) GetNormalizeFunc() func(f *FlagSet, name string) NormalizedName { if f.normalizeNameFunc != nil { return f.normalizeNameFunc } return func(f *FlagSet, name string) NormalizedName { return NormalizedName(name) } } func (f *FlagSet) normalizeFlagName(name string) NormalizedName { n := f.GetNormalizeFunc() return n(f, name) } func (f *FlagSet) out() io.Writer { if f.output == nil { return os.Stderr } return f.output } // SetOutput sets the destination for usage and error messages. // If output is nil, os.Stderr is used. func (f *FlagSet) SetOutput(output io.Writer) { f.output = output } // VisitAll visits the flags in lexicographical order, calling fn for each. // It visits all flags, even those not set. func (f *FlagSet) VisitAll(fn func(*Flag)) { for _, flag := range sortFlags(f.formal) { fn(flag) } } // HasFlags returns a bool to indicate if the FlagSet has any flags definied. func (f *FlagSet) HasFlags() bool { return len(f.formal) > 0 } // HasAvailableFlags returns a bool to indicate if the FlagSet has any flags // definied that are not hidden or deprecated. func (f *FlagSet) HasAvailableFlags() bool { for _, flag := range f.formal { if !flag.Hidden && len(flag.Deprecated) == 0 { return true } } return false } // VisitAll visits the command-line flags in lexicographical order, calling // fn for each. It visits all flags, even those not set. func VisitAll(fn func(*Flag)) { CommandLine.VisitAll(fn) } // Visit visits the flags in lexicographical order, calling fn for each. // It visits only those flags that have been set. func (f *FlagSet) Visit(fn func(*Flag)) { for _, flag := range sortFlags(f.actual) { fn(flag) } } // Visit visits the command-line flags in lexicographical order, calling fn // for each. It visits only those flags that have been set. func Visit(fn func(*Flag)) { CommandLine.Visit(fn) } // Lookup returns the Flag structure of the named flag, returning nil if none exists. func (f *FlagSet) Lookup(name string) *Flag { return f.lookup(f.normalizeFlagName(name)) } // lookup returns the Flag structure of the named flag, returning nil if none exists. func (f *FlagSet) lookup(name NormalizedName) *Flag { return f.formal[name] } // func to return a given type for a given flag name func (f *FlagSet) getFlagType(name string, ftype string, convFunc func(sval string) (interface{}, error)) (interface{}, error) { flag := f.Lookup(name) if flag == nil { err := fmt.Errorf("flag accessed but not defined: %s", name) return nil, err } if flag.Value.Type() != ftype { err := fmt.Errorf("trying to get %s value of flag of type %s", ftype, flag.Value.Type()) return nil, err } sval := flag.Value.String() result, err := convFunc(sval) if err != nil { return nil, err } return result, nil } // ArgsLenAtDash will return the length of f.Args at the moment when a -- was // found during arg parsing. This allows your program to know which args were // before the -- and which came after. func (f *FlagSet) ArgsLenAtDash() int { return f.argsLenAtDash } // MarkDeprecated indicated that a flag is deprecated in your program. It will // continue to function but will not show up in help or usage messages. Using // this flag will also print the given usageMessage. func (f *FlagSet) MarkDeprecated(name string, usageMessage string) error { flag := f.Lookup(name) if flag == nil { return fmt.Errorf("flag %q does not exist", name) } if len(usageMessage) == 0 { return fmt.Errorf("deprecated message for flag %q must be set", name) } flag.Deprecated = usageMessage return nil } // MarkShorthandDeprecated will mark the shorthand of a flag deprecated in your // program. It will continue to function but will not show up in help or usage // messages. Using this flag will also print the given usageMessage. func (f *FlagSet) MarkShorthandDeprecated(name string, usageMessage string) error { flag := f.Lookup(name) if flag == nil { return fmt.Errorf("flag %q does not exist", name) } if len(usageMessage) == 0 { return fmt.Errorf("deprecated message for flag %q must be set", name) } flag.ShorthandDeprecated = usageMessage return nil } // MarkHidden sets a flag to 'hidden' in your program. It will continue to // function but will not show up in help or usage messages. func (f *FlagSet) MarkHidden(name string) error { flag := f.Lookup(name) if flag == nil { return fmt.Errorf("flag %q does not exist", name) } flag.Hidden = true return nil } // Lookup returns the Flag structure of the named command-line flag, // returning nil if none exists. func Lookup(name string) *Flag { return CommandLine.Lookup(name) } // Set sets the value of the named flag. func (f *FlagSet) Set(name, value string) error { normalName := f.normalizeFlagName(name) flag, ok := f.formal[normalName] if !ok { return fmt.Errorf("no such flag -%v", name) } err := flag.Value.Set(value) if err != nil { return err } if f.actual == nil { f.actual = make(map[NormalizedName]*Flag) } f.actual[normalName] = flag flag.Changed = true if len(flag.Deprecated) > 0 { fmt.Fprintf(os.Stderr, "Flag --%s has been deprecated, %s\n", flag.Name, flag.Deprecated) } return nil } // SetAnnotation allows one to set arbitrary annotations on a flag in the FlagSet. // This is sometimes used by spf13/cobra programs which want to generate additional // bash completion information. func (f *FlagSet) SetAnnotation(name, key string, values []string) error { normalName := f.normalizeFlagName(name) flag, ok := f.formal[normalName] if !ok { return fmt.Errorf("no such flag -%v", name) } if flag.Annotations == nil { flag.Annotations = map[string][]string{} } flag.Annotations[key] = values return nil } // Changed returns true if the flag was explicitly set during Parse() and false // otherwise func (f *FlagSet) Changed(name string) bool { flag := f.Lookup(name) // If a flag doesn't exist, it wasn't changed.... if flag == nil { return false } return flag.Changed } // Set sets the value of the named command-line flag. func Set(name, value string) error { return CommandLine.Set(name, value) } // PrintDefaults prints, to standard error unless configured // otherwise, the default values of all defined flags in the set. func (f *FlagSet) PrintDefaults() { usages := f.FlagUsages() fmt.Fprint(f.out(), usages) } // defaultIsZeroValue returns true if the default value for this flag represents // a zero value. func (f *Flag) defaultIsZeroValue() bool { switch f.Value.(type) { case boolFlag: return f.DefValue == "false" case *durationValue: // Beginning in Go 1.7, duration zero values are "0s" return f.DefValue == "0" || f.DefValue == "0s" case *intValue, *int8Value, *int32Value, *int64Value, *uintValue, *uint8Value, *uint16Value, *uint32Value, *uint64Value, *countValue, *float32Value, *float64Value: return f.DefValue == "0" case *stringValue: return f.DefValue == "" case *ipValue, *ipMaskValue, *ipNetValue: return f.DefValue == "" case *intSliceValue, *stringSliceValue, *stringArrayValue: return f.DefValue == "[]" default: switch f.Value.String() { case "false": return true case "": return true case "": return true case "0": return true } return false } } // UnquoteUsage extracts a back-quoted name from the usage // string for a flag and returns it and the un-quoted usage. // Given "a `name` to show" it returns ("name", "a name to show"). // If there are no back quotes, the name is an educated guess of the // type of the flag's value, or the empty string if the flag is boolean. func UnquoteUsage(flag *Flag) (name string, usage string) { // Look for a back-quoted name, but avoid the strings package. usage = flag.Usage for i := 0; i < len(usage); i++ { if usage[i] == '`' { for j := i + 1; j < len(usage); j++ { if usage[j] == '`' { name = usage[i+1 : j] usage = usage[:i] + name + usage[j+1:] return name, usage } } break // Only one back quote; use type name. } } name = flag.Value.Type() switch name { case "bool": name = "" case "float64": name = "float" case "int64": name = "int" case "uint64": name = "uint" } return } // Splits the string `s` on whitespace into an initial substring up to // `i` runes in length and the remainder. Will go `slop` over `i` if // that encompasses the entire string (which allows the caller to // avoid short orphan words on the final line). func wrapN(i, slop int, s string) (string, string) { if i+slop > len(s) { return s, "" } w := strings.LastIndexAny(s[:i], " \t") if w <= 0 { return s, "" } return s[:w], s[w+1:] } // Wraps the string `s` to a maximum width `w` with leading indent // `i`. The first line is not indented (this is assumed to be done by // caller). Pass `w` == 0 to do no wrapping func wrap(i, w int, s string) string { if w == 0 { return s } // space between indent i and end of line width w into which // we should wrap the text. wrap := w - i var r, l string // Not enough space for sensible wrapping. Wrap as a block on // the next line instead. if wrap < 24 { i = 16 wrap = w - i r += "\n" + strings.Repeat(" ", i) } // If still not enough space then don't even try to wrap. if wrap < 24 { return s } // Try to avoid short orphan words on the final line, by // allowing wrapN to go a bit over if that would fit in the // remainder of the line. slop := 5 wrap = wrap - slop // Handle first line, which is indented by the caller (or the // special case above) l, s = wrapN(wrap, slop, s) r = r + l // Now wrap the rest for s != "" { var t string t, s = wrapN(wrap, slop, s) r = r + "\n" + strings.Repeat(" ", i) + t } return r } // FlagUsagesWrapped returns a string containing the usage information // for all flags in the FlagSet. Wrapped to `cols` columns (0 for no // wrapping) func (f *FlagSet) FlagUsagesWrapped(cols int) string { x := new(bytes.Buffer) lines := make([]string, 0, len(f.formal)) maxlen := 0 f.VisitAll(func(flag *Flag) { if len(flag.Deprecated) > 0 || flag.Hidden { return } line := "" if len(flag.Shorthand) > 0 && len(flag.ShorthandDeprecated) == 0 { line = fmt.Sprintf(" -%s, --%s", flag.Shorthand, flag.Name) } else { line = fmt.Sprintf(" --%s", flag.Name) } varname, usage := UnquoteUsage(flag) if len(varname) > 0 { line += " " + varname } if len(flag.NoOptDefVal) > 0 { switch flag.Value.Type() { case "string": line += fmt.Sprintf("[=\"%s\"]", flag.NoOptDefVal) case "bool": if flag.NoOptDefVal != "true" { line += fmt.Sprintf("[=%s]", flag.NoOptDefVal) } default: line += fmt.Sprintf("[=%s]", flag.NoOptDefVal) } } // This special character will be replaced with spacing once the // correct alignment is calculated line += "\x00" if len(line) > maxlen { maxlen = len(line) } line += usage if !flag.defaultIsZeroValue() { if flag.Value.Type() == "string" { line += fmt.Sprintf(" (default \"%s\")", flag.DefValue) } else { line += fmt.Sprintf(" (default %s)", flag.DefValue) } } lines = append(lines, line) }) for _, line := range lines { sidx := strings.Index(line, "\x00") spacing := strings.Repeat(" ", maxlen-sidx) // maxlen + 2 comes from + 1 for the \x00 and + 1 for the (deliberate) off-by-one in maxlen-sidx fmt.Fprintln(x, line[:sidx], spacing, wrap(maxlen+2, cols, line[sidx+1:])) } return x.String() } // FlagUsages returns a string containing the usage information for all flags in // the FlagSet func (f *FlagSet) FlagUsages() string { return f.FlagUsagesWrapped(0) } // PrintDefaults prints to standard error the default values of all defined command-line flags. func PrintDefaults() { CommandLine.PrintDefaults() } // defaultUsage is the default function to print a usage message. func defaultUsage(f *FlagSet) { fmt.Fprintf(f.out(), "Usage of %s:\n", f.name) f.PrintDefaults() } // NOTE: Usage is not just defaultUsage(CommandLine) // because it serves (via godoc flag Usage) as the example // for how to write your own usage function. // Usage prints to standard error a usage message documenting all defined command-line flags. // The function is a variable that may be changed to point to a custom function. // By default it prints a simple header and calls PrintDefaults; for details about the // format of the output and how to control it, see the documentation for PrintDefaults. var Usage = func() { fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0]) PrintDefaults() } // NFlag returns the number of flags that have been set. func (f *FlagSet) NFlag() int { return len(f.actual) } // NFlag returns the number of command-line flags that have been set. func NFlag() int { return len(CommandLine.actual) } // Arg returns the i'th argument. Arg(0) is the first remaining argument // after flags have been processed. func (f *FlagSet) Arg(i int) string { if i < 0 || i >= len(f.args) { return "" } return f.args[i] } // Arg returns the i'th command-line argument. Arg(0) is the first remaining argument // after flags have been processed. func Arg(i int) string { return CommandLine.Arg(i) } // NArg is the number of arguments remaining after flags have been processed. func (f *FlagSet) NArg() int { return len(f.args) } // NArg is the number of arguments remaining after flags have been processed. func NArg() int { return len(CommandLine.args) } // Args returns the non-flag arguments. func (f *FlagSet) Args() []string { return f.args } // Args returns the non-flag command-line arguments. func Args() []string { return CommandLine.args } // Var defines a flag with the specified name and usage string. The type and // value of the flag are represented by the first argument, of type Value, which // typically holds a user-defined implementation of Value. For instance, the // caller could create a flag that turns a comma-separated string into a slice // of strings by giving the slice the methods of Value; in particular, Set would // decompose the comma-separated string into the slice. func (f *FlagSet) Var(value Value, name string, usage string) { f.VarP(value, name, "", usage) } // VarPF is like VarP, but returns the flag created func (f *FlagSet) VarPF(value Value, name, shorthand, usage string) *Flag { // Remember the default value as a string; it won't change. flag := &Flag{ Name: name, Shorthand: shorthand, Usage: usage, Value: value, DefValue: value.String(), } f.AddFlag(flag) return flag } // VarP is like Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) VarP(value Value, name, shorthand, usage string) { f.VarPF(value, name, shorthand, usage) } // AddFlag will add the flag to the FlagSet func (f *FlagSet) AddFlag(flag *Flag) { // Call normalizeFlagName function only once normalizedFlagName := f.normalizeFlagName(flag.Name) _, alreadythere := f.formal[normalizedFlagName] if alreadythere { msg := fmt.Sprintf("%s flag redefined: %s", f.name, flag.Name) fmt.Fprintln(f.out(), msg) panic(msg) // Happens only if flags are declared with identical names } if f.formal == nil { f.formal = make(map[NormalizedName]*Flag) } flag.Name = string(normalizedFlagName) f.formal[normalizedFlagName] = flag if len(flag.Shorthand) == 0 { return } if len(flag.Shorthand) > 1 { fmt.Fprintf(f.out(), "%s shorthand more than ASCII character: %s\n", f.name, flag.Shorthand) panic("shorthand is more than one character") } if f.shorthands == nil { f.shorthands = make(map[byte]*Flag) } c := flag.Shorthand[0] old, alreadythere := f.shorthands[c] if alreadythere { fmt.Fprintf(f.out(), "%s shorthand reused: %q for %s already used for %s\n", f.name, c, flag.Name, old.Name) panic("shorthand redefinition") } f.shorthands[c] = flag } // AddFlagSet adds one FlagSet to another. If a flag is already present in f // the flag from newSet will be ignored func (f *FlagSet) AddFlagSet(newSet *FlagSet) { if newSet == nil { return } newSet.VisitAll(func(flag *Flag) { if f.Lookup(flag.Name) == nil { f.AddFlag(flag) } }) } // Var defines a flag with the specified name and usage string. The type and // value of the flag are represented by the first argument, of type Value, which // typically holds a user-defined implementation of Value. For instance, the // caller could create a flag that turns a comma-separated string into a slice // of strings by giving the slice the methods of Value; in particular, Set would // decompose the comma-separated string into the slice. func Var(value Value, name string, usage string) { CommandLine.VarP(value, name, "", usage) } // VarP is like Var, but accepts a shorthand letter that can be used after a single dash. func VarP(value Value, name, shorthand, usage string) { CommandLine.VarP(value, name, shorthand, usage) } // failf prints to standard error a formatted error and usage message and // returns the error. func (f *FlagSet) failf(format string, a ...interface{}) error { err := fmt.Errorf(format, a...) fmt.Fprintln(f.out(), err) f.usage() return err } // usage calls the Usage method for the flag set, or the usage function if // the flag set is CommandLine. func (f *FlagSet) usage() { if f == CommandLine { Usage() } else if f.Usage == nil { defaultUsage(f) } else { f.Usage() } } func (f *FlagSet) setFlag(flag *Flag, value string, origArg string) error { if err := flag.Value.Set(value); err != nil { return f.failf("invalid argument %q for %s: %v", value, origArg, err) } // mark as visited for Visit() if f.actual == nil { f.actual = make(map[NormalizedName]*Flag) } f.actual[f.normalizeFlagName(flag.Name)] = flag flag.Changed = true if len(flag.Deprecated) > 0 { fmt.Fprintf(os.Stderr, "Flag --%s has been deprecated, %s\n", flag.Name, flag.Deprecated) } if len(flag.ShorthandDeprecated) > 0 && containsShorthand(origArg, flag.Shorthand) { fmt.Fprintf(os.Stderr, "Flag shorthand -%s has been deprecated, %s\n", flag.Shorthand, flag.ShorthandDeprecated) } return nil } func containsShorthand(arg, shorthand string) bool { // filter out flags -- if strings.HasPrefix(arg, "-") { return false } arg = strings.SplitN(arg, "=", 2)[0] return strings.Contains(arg, shorthand) } func (f *FlagSet) parseLongArg(s string, args []string, fn parseFunc) (a []string, err error) { a = args name := s[2:] if len(name) == 0 || name[0] == '-' || name[0] == '=' { err = f.failf("bad flag syntax: %s", s) return } split := strings.SplitN(name, "=", 2) name = split[0] flag, alreadythere := f.formal[f.normalizeFlagName(name)] if !alreadythere { if name == "help" { // special case for nice help message. f.usage() return a, ErrHelp } err = f.failf("unknown flag: --%s", name) return } var value string if len(split) == 2 { // '--flag=arg' value = split[1] } else if len(flag.NoOptDefVal) > 0 { // '--flag' (arg was optional) value = flag.NoOptDefVal } else if len(a) > 0 { // '--flag arg' value = a[0] a = a[1:] } else { // '--flag' (arg was required) err = f.failf("flag needs an argument: %s", s) return } err = fn(flag, value, s) return } func (f *FlagSet) parseSingleShortArg(shorthands string, args []string, fn parseFunc) (outShorts string, outArgs []string, err error) { if strings.HasPrefix(shorthands, "test.") { return } outArgs = args outShorts = shorthands[1:] c := shorthands[0] flag, alreadythere := f.shorthands[c] if !alreadythere { if c == 'h' { // special case for nice help message. f.usage() err = ErrHelp return } //TODO continue on error err = f.failf("unknown shorthand flag: %q in -%s", c, shorthands) return } var value string if len(shorthands) > 2 && shorthands[1] == '=' { value = shorthands[2:] outShorts = "" } else if len(flag.NoOptDefVal) > 0 { value = flag.NoOptDefVal } else if len(shorthands) > 1 { value = shorthands[1:] outShorts = "" } else if len(args) > 0 { value = args[0] outArgs = args[1:] } else { err = f.failf("flag needs an argument: %q in -%s", c, shorthands) return } err = fn(flag, value, shorthands) return } func (f *FlagSet) parseShortArg(s string, args []string, fn parseFunc) (a []string, err error) { a = args shorthands := s[1:] for len(shorthands) > 0 { shorthands, a, err = f.parseSingleShortArg(shorthands, args, fn) if err != nil { return } } return } func (f *FlagSet) parseArgs(args []string, fn parseFunc) (err error) { for len(args) > 0 { s := args[0] args = args[1:] if len(s) == 0 || s[0] != '-' || len(s) == 1 { if !f.interspersed { f.args = append(f.args, s) f.args = append(f.args, args...) return nil } f.args = append(f.args, s) continue } if s[1] == '-' { if len(s) == 2 { // "--" terminates the flags f.argsLenAtDash = len(f.args) f.args = append(f.args, args...) break } args, err = f.parseLongArg(s, args, fn) } else { args, err = f.parseShortArg(s, args, fn) } if err != nil { return } } return } // Parse parses flag definitions from the argument list, which should not // include the command name. Must be called after all flags in the FlagSet // are defined and before flags are accessed by the program. // The return value will be ErrHelp if -help was set but not defined. func (f *FlagSet) Parse(arguments []string) error { f.parsed = true f.args = make([]string, 0, len(arguments)) assign := func(flag *Flag, value, origArg string) error { return f.setFlag(flag, value, origArg) } err := f.parseArgs(arguments, assign) if err != nil { switch f.errorHandling { case ContinueOnError: return err case ExitOnError: os.Exit(2) case PanicOnError: panic(err) } } return nil } type parseFunc func(flag *Flag, value, origArg string) error // ParseAll parses flag definitions from the argument list, which should not // include the command name. The arguments for fn are flag and value. Must be // called after all flags in the FlagSet are defined and before flags are // accessed by the program. The return value will be ErrHelp if -help was set // but not defined. func (f *FlagSet) ParseAll(arguments []string, fn func(flag *Flag, value string) error) error { f.parsed = true f.args = make([]string, 0, len(arguments)) assign := func(flag *Flag, value, origArg string) error { return fn(flag, value) } err := f.parseArgs(arguments, assign) if err != nil { switch f.errorHandling { case ContinueOnError: return err case ExitOnError: os.Exit(2) case PanicOnError: panic(err) } } return nil } // Parsed reports whether f.Parse has been called. func (f *FlagSet) Parsed() bool { return f.parsed } // Parse parses the command-line flags from os.Args[1:]. Must be called // after all flags are defined and before flags are accessed by the program. func Parse() { // Ignore errors; CommandLine is set for ExitOnError. CommandLine.Parse(os.Args[1:]) } // ParseAll parses the command-line flags from os.Args[1:] and called fn for each. // The arguments for fn are flag and value. Must be called after all flags are // defined and before flags are accessed by the program. func ParseAll(fn func(flag *Flag, value string) error) { // Ignore errors; CommandLine is set for ExitOnError. CommandLine.ParseAll(os.Args[1:], fn) } // SetInterspersed sets whether to support interspersed option/non-option arguments. func SetInterspersed(interspersed bool) { CommandLine.SetInterspersed(interspersed) } // Parsed returns true if the command-line flags have been parsed. func Parsed() bool { return CommandLine.Parsed() } // CommandLine is the default set of command-line flags, parsed from os.Args. var CommandLine = NewFlagSet(os.Args[0], ExitOnError) // NewFlagSet returns a new, empty flag set with the specified name and // error handling property. func NewFlagSet(name string, errorHandling ErrorHandling) *FlagSet { f := &FlagSet{ name: name, errorHandling: errorHandling, argsLenAtDash: -1, interspersed: true, } return f } // SetInterspersed sets whether to support interspersed option/non-option arguments. func (f *FlagSet) SetInterspersed(interspersed bool) { f.interspersed = interspersed } // Init sets the name and error handling property for a flag set. // By default, the zero FlagSet uses an empty name and the // ContinueOnError error handling policy. func (f *FlagSet) Init(name string, errorHandling ErrorHandling) { f.name = name f.errorHandling = errorHandling f.argsLenAtDash = -1 } ================================================ FILE: vendor/github.com/spf13/pflag/float32.go ================================================ package pflag import "strconv" // -- float32 Value type float32Value float32 func newFloat32Value(val float32, p *float32) *float32Value { *p = val return (*float32Value)(p) } func (f *float32Value) Set(s string) error { v, err := strconv.ParseFloat(s, 32) *f = float32Value(v) return err } func (f *float32Value) Type() string { return "float32" } func (f *float32Value) String() string { return strconv.FormatFloat(float64(*f), 'g', -1, 32) } func float32Conv(sval string) (interface{}, error) { v, err := strconv.ParseFloat(sval, 32) if err != nil { return 0, err } return float32(v), nil } // GetFloat32 return the float32 value of a flag with the given name func (f *FlagSet) GetFloat32(name string) (float32, error) { val, err := f.getFlagType(name, "float32", float32Conv) if err != nil { return 0, err } return val.(float32), nil } // Float32Var defines a float32 flag with specified name, default value, and usage string. // The argument p points to a float32 variable in which to store the value of the flag. func (f *FlagSet) Float32Var(p *float32, name string, value float32, usage string) { f.VarP(newFloat32Value(value, p), name, "", usage) } // Float32VarP is like Float32Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Float32VarP(p *float32, name, shorthand string, value float32, usage string) { f.VarP(newFloat32Value(value, p), name, shorthand, usage) } // Float32Var defines a float32 flag with specified name, default value, and usage string. // The argument p points to a float32 variable in which to store the value of the flag. func Float32Var(p *float32, name string, value float32, usage string) { CommandLine.VarP(newFloat32Value(value, p), name, "", usage) } // Float32VarP is like Float32Var, but accepts a shorthand letter that can be used after a single dash. func Float32VarP(p *float32, name, shorthand string, value float32, usage string) { CommandLine.VarP(newFloat32Value(value, p), name, shorthand, usage) } // Float32 defines a float32 flag with specified name, default value, and usage string. // The return value is the address of a float32 variable that stores the value of the flag. func (f *FlagSet) Float32(name string, value float32, usage string) *float32 { p := new(float32) f.Float32VarP(p, name, "", value, usage) return p } // Float32P is like Float32, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Float32P(name, shorthand string, value float32, usage string) *float32 { p := new(float32) f.Float32VarP(p, name, shorthand, value, usage) return p } // Float32 defines a float32 flag with specified name, default value, and usage string. // The return value is the address of a float32 variable that stores the value of the flag. func Float32(name string, value float32, usage string) *float32 { return CommandLine.Float32P(name, "", value, usage) } // Float32P is like Float32, but accepts a shorthand letter that can be used after a single dash. func Float32P(name, shorthand string, value float32, usage string) *float32 { return CommandLine.Float32P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/float64.go ================================================ package pflag import "strconv" // -- float64 Value type float64Value float64 func newFloat64Value(val float64, p *float64) *float64Value { *p = val return (*float64Value)(p) } func (f *float64Value) Set(s string) error { v, err := strconv.ParseFloat(s, 64) *f = float64Value(v) return err } func (f *float64Value) Type() string { return "float64" } func (f *float64Value) String() string { return strconv.FormatFloat(float64(*f), 'g', -1, 64) } func float64Conv(sval string) (interface{}, error) { return strconv.ParseFloat(sval, 64) } // GetFloat64 return the float64 value of a flag with the given name func (f *FlagSet) GetFloat64(name string) (float64, error) { val, err := f.getFlagType(name, "float64", float64Conv) if err != nil { return 0, err } return val.(float64), nil } // Float64Var defines a float64 flag with specified name, default value, and usage string. // The argument p points to a float64 variable in which to store the value of the flag. func (f *FlagSet) Float64Var(p *float64, name string, value float64, usage string) { f.VarP(newFloat64Value(value, p), name, "", usage) } // Float64VarP is like Float64Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Float64VarP(p *float64, name, shorthand string, value float64, usage string) { f.VarP(newFloat64Value(value, p), name, shorthand, usage) } // Float64Var defines a float64 flag with specified name, default value, and usage string. // The argument p points to a float64 variable in which to store the value of the flag. func Float64Var(p *float64, name string, value float64, usage string) { CommandLine.VarP(newFloat64Value(value, p), name, "", usage) } // Float64VarP is like Float64Var, but accepts a shorthand letter that can be used after a single dash. func Float64VarP(p *float64, name, shorthand string, value float64, usage string) { CommandLine.VarP(newFloat64Value(value, p), name, shorthand, usage) } // Float64 defines a float64 flag with specified name, default value, and usage string. // The return value is the address of a float64 variable that stores the value of the flag. func (f *FlagSet) Float64(name string, value float64, usage string) *float64 { p := new(float64) f.Float64VarP(p, name, "", value, usage) return p } // Float64P is like Float64, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Float64P(name, shorthand string, value float64, usage string) *float64 { p := new(float64) f.Float64VarP(p, name, shorthand, value, usage) return p } // Float64 defines a float64 flag with specified name, default value, and usage string. // The return value is the address of a float64 variable that stores the value of the flag. func Float64(name string, value float64, usage string) *float64 { return CommandLine.Float64P(name, "", value, usage) } // Float64P is like Float64, but accepts a shorthand letter that can be used after a single dash. func Float64P(name, shorthand string, value float64, usage string) *float64 { return CommandLine.Float64P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/golangflag.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package pflag import ( goflag "flag" "reflect" "strings" ) // flagValueWrapper implements pflag.Value around a flag.Value. The main // difference here is the addition of the Type method that returns a string // name of the type. As this is generally unknown, we approximate that with // reflection. type flagValueWrapper struct { inner goflag.Value flagType string } // We are just copying the boolFlag interface out of goflag as that is what // they use to decide if a flag should get "true" when no arg is given. type goBoolFlag interface { goflag.Value IsBoolFlag() bool } func wrapFlagValue(v goflag.Value) Value { // If the flag.Value happens to also be a pflag.Value, just use it directly. if pv, ok := v.(Value); ok { return pv } pv := &flagValueWrapper{ inner: v, } t := reflect.TypeOf(v) if t.Kind() == reflect.Interface || t.Kind() == reflect.Ptr { t = t.Elem() } pv.flagType = strings.TrimSuffix(t.Name(), "Value") return pv } func (v *flagValueWrapper) String() string { return v.inner.String() } func (v *flagValueWrapper) Set(s string) error { return v.inner.Set(s) } func (v *flagValueWrapper) Type() string { return v.flagType } // PFlagFromGoFlag will return a *pflag.Flag given a *flag.Flag // If the *flag.Flag.Name was a single character (ex: `v`) it will be accessiblei // with both `-v` and `--v` in flags. If the golang flag was more than a single // character (ex: `verbose`) it will only be accessible via `--verbose` func PFlagFromGoFlag(goflag *goflag.Flag) *Flag { // Remember the default value as a string; it won't change. flag := &Flag{ Name: goflag.Name, Usage: goflag.Usage, Value: wrapFlagValue(goflag.Value), // Looks like golang flags don't set DefValue correctly :-( //DefValue: goflag.DefValue, DefValue: goflag.Value.String(), } // Ex: if the golang flag was -v, allow both -v and --v to work if len(flag.Name) == 1 { flag.Shorthand = flag.Name } if fv, ok := goflag.Value.(goBoolFlag); ok && fv.IsBoolFlag() { flag.NoOptDefVal = "true" } return flag } // AddGoFlag will add the given *flag.Flag to the pflag.FlagSet func (f *FlagSet) AddGoFlag(goflag *goflag.Flag) { if f.Lookup(goflag.Name) != nil { return } newflag := PFlagFromGoFlag(goflag) f.AddFlag(newflag) } // AddGoFlagSet will add the given *flag.FlagSet to the pflag.FlagSet func (f *FlagSet) AddGoFlagSet(newSet *goflag.FlagSet) { if newSet == nil { return } newSet.VisitAll(func(goflag *goflag.Flag) { f.AddGoFlag(goflag) }) } ================================================ FILE: vendor/github.com/spf13/pflag/int.go ================================================ package pflag import "strconv" // -- int Value type intValue int func newIntValue(val int, p *int) *intValue { *p = val return (*intValue)(p) } func (i *intValue) Set(s string) error { v, err := strconv.ParseInt(s, 0, 64) *i = intValue(v) return err } func (i *intValue) Type() string { return "int" } func (i *intValue) String() string { return strconv.Itoa(int(*i)) } func intConv(sval string) (interface{}, error) { return strconv.Atoi(sval) } // GetInt return the int value of a flag with the given name func (f *FlagSet) GetInt(name string) (int, error) { val, err := f.getFlagType(name, "int", intConv) if err != nil { return 0, err } return val.(int), nil } // IntVar defines an int flag with specified name, default value, and usage string. // The argument p points to an int variable in which to store the value of the flag. func (f *FlagSet) IntVar(p *int, name string, value int, usage string) { f.VarP(newIntValue(value, p), name, "", usage) } // IntVarP is like IntVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IntVarP(p *int, name, shorthand string, value int, usage string) { f.VarP(newIntValue(value, p), name, shorthand, usage) } // IntVar defines an int flag with specified name, default value, and usage string. // The argument p points to an int variable in which to store the value of the flag. func IntVar(p *int, name string, value int, usage string) { CommandLine.VarP(newIntValue(value, p), name, "", usage) } // IntVarP is like IntVar, but accepts a shorthand letter that can be used after a single dash. func IntVarP(p *int, name, shorthand string, value int, usage string) { CommandLine.VarP(newIntValue(value, p), name, shorthand, usage) } // Int defines an int flag with specified name, default value, and usage string. // The return value is the address of an int variable that stores the value of the flag. func (f *FlagSet) Int(name string, value int, usage string) *int { p := new(int) f.IntVarP(p, name, "", value, usage) return p } // IntP is like Int, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IntP(name, shorthand string, value int, usage string) *int { p := new(int) f.IntVarP(p, name, shorthand, value, usage) return p } // Int defines an int flag with specified name, default value, and usage string. // The return value is the address of an int variable that stores the value of the flag. func Int(name string, value int, usage string) *int { return CommandLine.IntP(name, "", value, usage) } // IntP is like Int, but accepts a shorthand letter that can be used after a single dash. func IntP(name, shorthand string, value int, usage string) *int { return CommandLine.IntP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/int32.go ================================================ package pflag import "strconv" // -- int32 Value type int32Value int32 func newInt32Value(val int32, p *int32) *int32Value { *p = val return (*int32Value)(p) } func (i *int32Value) Set(s string) error { v, err := strconv.ParseInt(s, 0, 32) *i = int32Value(v) return err } func (i *int32Value) Type() string { return "int32" } func (i *int32Value) String() string { return strconv.FormatInt(int64(*i), 10) } func int32Conv(sval string) (interface{}, error) { v, err := strconv.ParseInt(sval, 0, 32) if err != nil { return 0, err } return int32(v), nil } // GetInt32 return the int32 value of a flag with the given name func (f *FlagSet) GetInt32(name string) (int32, error) { val, err := f.getFlagType(name, "int32", int32Conv) if err != nil { return 0, err } return val.(int32), nil } // Int32Var defines an int32 flag with specified name, default value, and usage string. // The argument p points to an int32 variable in which to store the value of the flag. func (f *FlagSet) Int32Var(p *int32, name string, value int32, usage string) { f.VarP(newInt32Value(value, p), name, "", usage) } // Int32VarP is like Int32Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Int32VarP(p *int32, name, shorthand string, value int32, usage string) { f.VarP(newInt32Value(value, p), name, shorthand, usage) } // Int32Var defines an int32 flag with specified name, default value, and usage string. // The argument p points to an int32 variable in which to store the value of the flag. func Int32Var(p *int32, name string, value int32, usage string) { CommandLine.VarP(newInt32Value(value, p), name, "", usage) } // Int32VarP is like Int32Var, but accepts a shorthand letter that can be used after a single dash. func Int32VarP(p *int32, name, shorthand string, value int32, usage string) { CommandLine.VarP(newInt32Value(value, p), name, shorthand, usage) } // Int32 defines an int32 flag with specified name, default value, and usage string. // The return value is the address of an int32 variable that stores the value of the flag. func (f *FlagSet) Int32(name string, value int32, usage string) *int32 { p := new(int32) f.Int32VarP(p, name, "", value, usage) return p } // Int32P is like Int32, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Int32P(name, shorthand string, value int32, usage string) *int32 { p := new(int32) f.Int32VarP(p, name, shorthand, value, usage) return p } // Int32 defines an int32 flag with specified name, default value, and usage string. // The return value is the address of an int32 variable that stores the value of the flag. func Int32(name string, value int32, usage string) *int32 { return CommandLine.Int32P(name, "", value, usage) } // Int32P is like Int32, but accepts a shorthand letter that can be used after a single dash. func Int32P(name, shorthand string, value int32, usage string) *int32 { return CommandLine.Int32P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/int64.go ================================================ package pflag import "strconv" // -- int64 Value type int64Value int64 func newInt64Value(val int64, p *int64) *int64Value { *p = val return (*int64Value)(p) } func (i *int64Value) Set(s string) error { v, err := strconv.ParseInt(s, 0, 64) *i = int64Value(v) return err } func (i *int64Value) Type() string { return "int64" } func (i *int64Value) String() string { return strconv.FormatInt(int64(*i), 10) } func int64Conv(sval string) (interface{}, error) { return strconv.ParseInt(sval, 0, 64) } // GetInt64 return the int64 value of a flag with the given name func (f *FlagSet) GetInt64(name string) (int64, error) { val, err := f.getFlagType(name, "int64", int64Conv) if err != nil { return 0, err } return val.(int64), nil } // Int64Var defines an int64 flag with specified name, default value, and usage string. // The argument p points to an int64 variable in which to store the value of the flag. func (f *FlagSet) Int64Var(p *int64, name string, value int64, usage string) { f.VarP(newInt64Value(value, p), name, "", usage) } // Int64VarP is like Int64Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Int64VarP(p *int64, name, shorthand string, value int64, usage string) { f.VarP(newInt64Value(value, p), name, shorthand, usage) } // Int64Var defines an int64 flag with specified name, default value, and usage string. // The argument p points to an int64 variable in which to store the value of the flag. func Int64Var(p *int64, name string, value int64, usage string) { CommandLine.VarP(newInt64Value(value, p), name, "", usage) } // Int64VarP is like Int64Var, but accepts a shorthand letter that can be used after a single dash. func Int64VarP(p *int64, name, shorthand string, value int64, usage string) { CommandLine.VarP(newInt64Value(value, p), name, shorthand, usage) } // Int64 defines an int64 flag with specified name, default value, and usage string. // The return value is the address of an int64 variable that stores the value of the flag. func (f *FlagSet) Int64(name string, value int64, usage string) *int64 { p := new(int64) f.Int64VarP(p, name, "", value, usage) return p } // Int64P is like Int64, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Int64P(name, shorthand string, value int64, usage string) *int64 { p := new(int64) f.Int64VarP(p, name, shorthand, value, usage) return p } // Int64 defines an int64 flag with specified name, default value, and usage string. // The return value is the address of an int64 variable that stores the value of the flag. func Int64(name string, value int64, usage string) *int64 { return CommandLine.Int64P(name, "", value, usage) } // Int64P is like Int64, but accepts a shorthand letter that can be used after a single dash. func Int64P(name, shorthand string, value int64, usage string) *int64 { return CommandLine.Int64P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/int8.go ================================================ package pflag import "strconv" // -- int8 Value type int8Value int8 func newInt8Value(val int8, p *int8) *int8Value { *p = val return (*int8Value)(p) } func (i *int8Value) Set(s string) error { v, err := strconv.ParseInt(s, 0, 8) *i = int8Value(v) return err } func (i *int8Value) Type() string { return "int8" } func (i *int8Value) String() string { return strconv.FormatInt(int64(*i), 10) } func int8Conv(sval string) (interface{}, error) { v, err := strconv.ParseInt(sval, 0, 8) if err != nil { return 0, err } return int8(v), nil } // GetInt8 return the int8 value of a flag with the given name func (f *FlagSet) GetInt8(name string) (int8, error) { val, err := f.getFlagType(name, "int8", int8Conv) if err != nil { return 0, err } return val.(int8), nil } // Int8Var defines an int8 flag with specified name, default value, and usage string. // The argument p points to an int8 variable in which to store the value of the flag. func (f *FlagSet) Int8Var(p *int8, name string, value int8, usage string) { f.VarP(newInt8Value(value, p), name, "", usage) } // Int8VarP is like Int8Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Int8VarP(p *int8, name, shorthand string, value int8, usage string) { f.VarP(newInt8Value(value, p), name, shorthand, usage) } // Int8Var defines an int8 flag with specified name, default value, and usage string. // The argument p points to an int8 variable in which to store the value of the flag. func Int8Var(p *int8, name string, value int8, usage string) { CommandLine.VarP(newInt8Value(value, p), name, "", usage) } // Int8VarP is like Int8Var, but accepts a shorthand letter that can be used after a single dash. func Int8VarP(p *int8, name, shorthand string, value int8, usage string) { CommandLine.VarP(newInt8Value(value, p), name, shorthand, usage) } // Int8 defines an int8 flag with specified name, default value, and usage string. // The return value is the address of an int8 variable that stores the value of the flag. func (f *FlagSet) Int8(name string, value int8, usage string) *int8 { p := new(int8) f.Int8VarP(p, name, "", value, usage) return p } // Int8P is like Int8, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Int8P(name, shorthand string, value int8, usage string) *int8 { p := new(int8) f.Int8VarP(p, name, shorthand, value, usage) return p } // Int8 defines an int8 flag with specified name, default value, and usage string. // The return value is the address of an int8 variable that stores the value of the flag. func Int8(name string, value int8, usage string) *int8 { return CommandLine.Int8P(name, "", value, usage) } // Int8P is like Int8, but accepts a shorthand letter that can be used after a single dash. func Int8P(name, shorthand string, value int8, usage string) *int8 { return CommandLine.Int8P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/int_slice.go ================================================ package pflag import ( "fmt" "strconv" "strings" ) // -- intSlice Value type intSliceValue struct { value *[]int changed bool } func newIntSliceValue(val []int, p *[]int) *intSliceValue { isv := new(intSliceValue) isv.value = p *isv.value = val return isv } func (s *intSliceValue) Set(val string) error { ss := strings.Split(val, ",") out := make([]int, len(ss)) for i, d := range ss { var err error out[i], err = strconv.Atoi(d) if err != nil { return err } } if !s.changed { *s.value = out } else { *s.value = append(*s.value, out...) } s.changed = true return nil } func (s *intSliceValue) Type() string { return "intSlice" } func (s *intSliceValue) String() string { out := make([]string, len(*s.value)) for i, d := range *s.value { out[i] = fmt.Sprintf("%d", d) } return "[" + strings.Join(out, ",") + "]" } func intSliceConv(val string) (interface{}, error) { val = strings.Trim(val, "[]") // Empty string would cause a slice with one (empty) entry if len(val) == 0 { return []int{}, nil } ss := strings.Split(val, ",") out := make([]int, len(ss)) for i, d := range ss { var err error out[i], err = strconv.Atoi(d) if err != nil { return nil, err } } return out, nil } // GetIntSlice return the []int value of a flag with the given name func (f *FlagSet) GetIntSlice(name string) ([]int, error) { val, err := f.getFlagType(name, "intSlice", intSliceConv) if err != nil { return []int{}, err } return val.([]int), nil } // IntSliceVar defines a intSlice flag with specified name, default value, and usage string. // The argument p points to a []int variable in which to store the value of the flag. func (f *FlagSet) IntSliceVar(p *[]int, name string, value []int, usage string) { f.VarP(newIntSliceValue(value, p), name, "", usage) } // IntSliceVarP is like IntSliceVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IntSliceVarP(p *[]int, name, shorthand string, value []int, usage string) { f.VarP(newIntSliceValue(value, p), name, shorthand, usage) } // IntSliceVar defines a int[] flag with specified name, default value, and usage string. // The argument p points to a int[] variable in which to store the value of the flag. func IntSliceVar(p *[]int, name string, value []int, usage string) { CommandLine.VarP(newIntSliceValue(value, p), name, "", usage) } // IntSliceVarP is like IntSliceVar, but accepts a shorthand letter that can be used after a single dash. func IntSliceVarP(p *[]int, name, shorthand string, value []int, usage string) { CommandLine.VarP(newIntSliceValue(value, p), name, shorthand, usage) } // IntSlice defines a []int flag with specified name, default value, and usage string. // The return value is the address of a []int variable that stores the value of the flag. func (f *FlagSet) IntSlice(name string, value []int, usage string) *[]int { p := []int{} f.IntSliceVarP(&p, name, "", value, usage) return &p } // IntSliceP is like IntSlice, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IntSliceP(name, shorthand string, value []int, usage string) *[]int { p := []int{} f.IntSliceVarP(&p, name, shorthand, value, usage) return &p } // IntSlice defines a []int flag with specified name, default value, and usage string. // The return value is the address of a []int variable that stores the value of the flag. func IntSlice(name string, value []int, usage string) *[]int { return CommandLine.IntSliceP(name, "", value, usage) } // IntSliceP is like IntSlice, but accepts a shorthand letter that can be used after a single dash. func IntSliceP(name, shorthand string, value []int, usage string) *[]int { return CommandLine.IntSliceP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/ip.go ================================================ package pflag import ( "fmt" "net" "strings" ) // -- net.IP value type ipValue net.IP func newIPValue(val net.IP, p *net.IP) *ipValue { *p = val return (*ipValue)(p) } func (i *ipValue) String() string { return net.IP(*i).String() } func (i *ipValue) Set(s string) error { ip := net.ParseIP(strings.TrimSpace(s)) if ip == nil { return fmt.Errorf("failed to parse IP: %q", s) } *i = ipValue(ip) return nil } func (i *ipValue) Type() string { return "ip" } func ipConv(sval string) (interface{}, error) { ip := net.ParseIP(sval) if ip != nil { return ip, nil } return nil, fmt.Errorf("invalid string being converted to IP address: %s", sval) } // GetIP return the net.IP value of a flag with the given name func (f *FlagSet) GetIP(name string) (net.IP, error) { val, err := f.getFlagType(name, "ip", ipConv) if err != nil { return nil, err } return val.(net.IP), nil } // IPVar defines an net.IP flag with specified name, default value, and usage string. // The argument p points to an net.IP variable in which to store the value of the flag. func (f *FlagSet) IPVar(p *net.IP, name string, value net.IP, usage string) { f.VarP(newIPValue(value, p), name, "", usage) } // IPVarP is like IPVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPVarP(p *net.IP, name, shorthand string, value net.IP, usage string) { f.VarP(newIPValue(value, p), name, shorthand, usage) } // IPVar defines an net.IP flag with specified name, default value, and usage string. // The argument p points to an net.IP variable in which to store the value of the flag. func IPVar(p *net.IP, name string, value net.IP, usage string) { CommandLine.VarP(newIPValue(value, p), name, "", usage) } // IPVarP is like IPVar, but accepts a shorthand letter that can be used after a single dash. func IPVarP(p *net.IP, name, shorthand string, value net.IP, usage string) { CommandLine.VarP(newIPValue(value, p), name, shorthand, usage) } // IP defines an net.IP flag with specified name, default value, and usage string. // The return value is the address of an net.IP variable that stores the value of the flag. func (f *FlagSet) IP(name string, value net.IP, usage string) *net.IP { p := new(net.IP) f.IPVarP(p, name, "", value, usage) return p } // IPP is like IP, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPP(name, shorthand string, value net.IP, usage string) *net.IP { p := new(net.IP) f.IPVarP(p, name, shorthand, value, usage) return p } // IP defines an net.IP flag with specified name, default value, and usage string. // The return value is the address of an net.IP variable that stores the value of the flag. func IP(name string, value net.IP, usage string) *net.IP { return CommandLine.IPP(name, "", value, usage) } // IPP is like IP, but accepts a shorthand letter that can be used after a single dash. func IPP(name, shorthand string, value net.IP, usage string) *net.IP { return CommandLine.IPP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/ip_slice.go ================================================ package pflag import ( "fmt" "io" "net" "strings" ) // -- ipSlice Value type ipSliceValue struct { value *[]net.IP changed bool } func newIPSliceValue(val []net.IP, p *[]net.IP) *ipSliceValue { ipsv := new(ipSliceValue) ipsv.value = p *ipsv.value = val return ipsv } // Set converts, and assigns, the comma-separated IP argument string representation as the []net.IP value of this flag. // If Set is called on a flag that already has a []net.IP assigned, the newly converted values will be appended. func (s *ipSliceValue) Set(val string) error { // remove all quote characters rmQuote := strings.NewReplacer(`"`, "", `'`, "", "`", "") // read flag arguments with CSV parser ipStrSlice, err := readAsCSV(rmQuote.Replace(val)) if err != nil && err != io.EOF { return err } // parse ip values into slice out := make([]net.IP, 0, len(ipStrSlice)) for _, ipStr := range ipStrSlice { ip := net.ParseIP(strings.TrimSpace(ipStr)) if ip == nil { return fmt.Errorf("invalid string being converted to IP address: %s", ipStr) } out = append(out, ip) } if !s.changed { *s.value = out } else { *s.value = append(*s.value, out...) } s.changed = true return nil } // Type returns a string that uniquely represents this flag's type. func (s *ipSliceValue) Type() string { return "ipSlice" } // String defines a "native" format for this net.IP slice flag value. func (s *ipSliceValue) String() string { ipStrSlice := make([]string, len(*s.value)) for i, ip := range *s.value { ipStrSlice[i] = ip.String() } out, _ := writeAsCSV(ipStrSlice) return "[" + out + "]" } func ipSliceConv(val string) (interface{}, error) { val = strings.Trim(val, "[]") // Emtpy string would cause a slice with one (empty) entry if len(val) == 0 { return []net.IP{}, nil } ss := strings.Split(val, ",") out := make([]net.IP, len(ss)) for i, sval := range ss { ip := net.ParseIP(strings.TrimSpace(sval)) if ip == nil { return nil, fmt.Errorf("invalid string being converted to IP address: %s", sval) } out[i] = ip } return out, nil } // GetIPSlice returns the []net.IP value of a flag with the given name func (f *FlagSet) GetIPSlice(name string) ([]net.IP, error) { val, err := f.getFlagType(name, "ipSlice", ipSliceConv) if err != nil { return []net.IP{}, err } return val.([]net.IP), nil } // IPSliceVar defines a ipSlice flag with specified name, default value, and usage string. // The argument p points to a []net.IP variable in which to store the value of the flag. func (f *FlagSet) IPSliceVar(p *[]net.IP, name string, value []net.IP, usage string) { f.VarP(newIPSliceValue(value, p), name, "", usage) } // IPSliceVarP is like IPSliceVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPSliceVarP(p *[]net.IP, name, shorthand string, value []net.IP, usage string) { f.VarP(newIPSliceValue(value, p), name, shorthand, usage) } // IPSliceVar defines a []net.IP flag with specified name, default value, and usage string. // The argument p points to a []net.IP variable in which to store the value of the flag. func IPSliceVar(p *[]net.IP, name string, value []net.IP, usage string) { CommandLine.VarP(newIPSliceValue(value, p), name, "", usage) } // IPSliceVarP is like IPSliceVar, but accepts a shorthand letter that can be used after a single dash. func IPSliceVarP(p *[]net.IP, name, shorthand string, value []net.IP, usage string) { CommandLine.VarP(newIPSliceValue(value, p), name, shorthand, usage) } // IPSlice defines a []net.IP flag with specified name, default value, and usage string. // The return value is the address of a []net.IP variable that stores the value of that flag. func (f *FlagSet) IPSlice(name string, value []net.IP, usage string) *[]net.IP { p := []net.IP{} f.IPSliceVarP(&p, name, "", value, usage) return &p } // IPSliceP is like IPSlice, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPSliceP(name, shorthand string, value []net.IP, usage string) *[]net.IP { p := []net.IP{} f.IPSliceVarP(&p, name, shorthand, value, usage) return &p } // IPSlice defines a []net.IP flag with specified name, default value, and usage string. // The return value is the address of a []net.IP variable that stores the value of the flag. func IPSlice(name string, value []net.IP, usage string) *[]net.IP { return CommandLine.IPSliceP(name, "", value, usage) } // IPSliceP is like IPSlice, but accepts a shorthand letter that can be used after a single dash. func IPSliceP(name, shorthand string, value []net.IP, usage string) *[]net.IP { return CommandLine.IPSliceP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/ipmask.go ================================================ package pflag import ( "fmt" "net" "strconv" ) // -- net.IPMask value type ipMaskValue net.IPMask func newIPMaskValue(val net.IPMask, p *net.IPMask) *ipMaskValue { *p = val return (*ipMaskValue)(p) } func (i *ipMaskValue) String() string { return net.IPMask(*i).String() } func (i *ipMaskValue) Set(s string) error { ip := ParseIPv4Mask(s) if ip == nil { return fmt.Errorf("failed to parse IP mask: %q", s) } *i = ipMaskValue(ip) return nil } func (i *ipMaskValue) Type() string { return "ipMask" } // ParseIPv4Mask written in IP form (e.g. 255.255.255.0). // This function should really belong to the net package. func ParseIPv4Mask(s string) net.IPMask { mask := net.ParseIP(s) if mask == nil { if len(s) != 8 { return nil } // net.IPMask.String() actually outputs things like ffffff00 // so write a horrible parser for that as well :-( m := []int{} for i := 0; i < 4; i++ { b := "0x" + s[2*i:2*i+2] d, err := strconv.ParseInt(b, 0, 0) if err != nil { return nil } m = append(m, int(d)) } s := fmt.Sprintf("%d.%d.%d.%d", m[0], m[1], m[2], m[3]) mask = net.ParseIP(s) if mask == nil { return nil } } return net.IPv4Mask(mask[12], mask[13], mask[14], mask[15]) } func parseIPv4Mask(sval string) (interface{}, error) { mask := ParseIPv4Mask(sval) if mask == nil { return nil, fmt.Errorf("unable to parse %s as net.IPMask", sval) } return mask, nil } // GetIPv4Mask return the net.IPv4Mask value of a flag with the given name func (f *FlagSet) GetIPv4Mask(name string) (net.IPMask, error) { val, err := f.getFlagType(name, "ipMask", parseIPv4Mask) if err != nil { return nil, err } return val.(net.IPMask), nil } // IPMaskVar defines an net.IPMask flag with specified name, default value, and usage string. // The argument p points to an net.IPMask variable in which to store the value of the flag. func (f *FlagSet) IPMaskVar(p *net.IPMask, name string, value net.IPMask, usage string) { f.VarP(newIPMaskValue(value, p), name, "", usage) } // IPMaskVarP is like IPMaskVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPMaskVarP(p *net.IPMask, name, shorthand string, value net.IPMask, usage string) { f.VarP(newIPMaskValue(value, p), name, shorthand, usage) } // IPMaskVar defines an net.IPMask flag with specified name, default value, and usage string. // The argument p points to an net.IPMask variable in which to store the value of the flag. func IPMaskVar(p *net.IPMask, name string, value net.IPMask, usage string) { CommandLine.VarP(newIPMaskValue(value, p), name, "", usage) } // IPMaskVarP is like IPMaskVar, but accepts a shorthand letter that can be used after a single dash. func IPMaskVarP(p *net.IPMask, name, shorthand string, value net.IPMask, usage string) { CommandLine.VarP(newIPMaskValue(value, p), name, shorthand, usage) } // IPMask defines an net.IPMask flag with specified name, default value, and usage string. // The return value is the address of an net.IPMask variable that stores the value of the flag. func (f *FlagSet) IPMask(name string, value net.IPMask, usage string) *net.IPMask { p := new(net.IPMask) f.IPMaskVarP(p, name, "", value, usage) return p } // IPMaskP is like IPMask, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPMaskP(name, shorthand string, value net.IPMask, usage string) *net.IPMask { p := new(net.IPMask) f.IPMaskVarP(p, name, shorthand, value, usage) return p } // IPMask defines an net.IPMask flag with specified name, default value, and usage string. // The return value is the address of an net.IPMask variable that stores the value of the flag. func IPMask(name string, value net.IPMask, usage string) *net.IPMask { return CommandLine.IPMaskP(name, "", value, usage) } // IPMaskP is like IP, but accepts a shorthand letter that can be used after a single dash. func IPMaskP(name, shorthand string, value net.IPMask, usage string) *net.IPMask { return CommandLine.IPMaskP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/ipnet.go ================================================ package pflag import ( "fmt" "net" "strings" ) // IPNet adapts net.IPNet for use as a flag. type ipNetValue net.IPNet func (ipnet ipNetValue) String() string { n := net.IPNet(ipnet) return n.String() } func (ipnet *ipNetValue) Set(value string) error { _, n, err := net.ParseCIDR(strings.TrimSpace(value)) if err != nil { return err } *ipnet = ipNetValue(*n) return nil } func (*ipNetValue) Type() string { return "ipNet" } func newIPNetValue(val net.IPNet, p *net.IPNet) *ipNetValue { *p = val return (*ipNetValue)(p) } func ipNetConv(sval string) (interface{}, error) { _, n, err := net.ParseCIDR(strings.TrimSpace(sval)) if err == nil { return *n, nil } return nil, fmt.Errorf("invalid string being converted to IPNet: %s", sval) } // GetIPNet return the net.IPNet value of a flag with the given name func (f *FlagSet) GetIPNet(name string) (net.IPNet, error) { val, err := f.getFlagType(name, "ipNet", ipNetConv) if err != nil { return net.IPNet{}, err } return val.(net.IPNet), nil } // IPNetVar defines an net.IPNet flag with specified name, default value, and usage string. // The argument p points to an net.IPNet variable in which to store the value of the flag. func (f *FlagSet) IPNetVar(p *net.IPNet, name string, value net.IPNet, usage string) { f.VarP(newIPNetValue(value, p), name, "", usage) } // IPNetVarP is like IPNetVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPNetVarP(p *net.IPNet, name, shorthand string, value net.IPNet, usage string) { f.VarP(newIPNetValue(value, p), name, shorthand, usage) } // IPNetVar defines an net.IPNet flag with specified name, default value, and usage string. // The argument p points to an net.IPNet variable in which to store the value of the flag. func IPNetVar(p *net.IPNet, name string, value net.IPNet, usage string) { CommandLine.VarP(newIPNetValue(value, p), name, "", usage) } // IPNetVarP is like IPNetVar, but accepts a shorthand letter that can be used after a single dash. func IPNetVarP(p *net.IPNet, name, shorthand string, value net.IPNet, usage string) { CommandLine.VarP(newIPNetValue(value, p), name, shorthand, usage) } // IPNet defines an net.IPNet flag with specified name, default value, and usage string. // The return value is the address of an net.IPNet variable that stores the value of the flag. func (f *FlagSet) IPNet(name string, value net.IPNet, usage string) *net.IPNet { p := new(net.IPNet) f.IPNetVarP(p, name, "", value, usage) return p } // IPNetP is like IPNet, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) IPNetP(name, shorthand string, value net.IPNet, usage string) *net.IPNet { p := new(net.IPNet) f.IPNetVarP(p, name, shorthand, value, usage) return p } // IPNet defines an net.IPNet flag with specified name, default value, and usage string. // The return value is the address of an net.IPNet variable that stores the value of the flag. func IPNet(name string, value net.IPNet, usage string) *net.IPNet { return CommandLine.IPNetP(name, "", value, usage) } // IPNetP is like IPNet, but accepts a shorthand letter that can be used after a single dash. func IPNetP(name, shorthand string, value net.IPNet, usage string) *net.IPNet { return CommandLine.IPNetP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/string.go ================================================ package pflag // -- string Value type stringValue string func newStringValue(val string, p *string) *stringValue { *p = val return (*stringValue)(p) } func (s *stringValue) Set(val string) error { *s = stringValue(val) return nil } func (s *stringValue) Type() string { return "string" } func (s *stringValue) String() string { return string(*s) } func stringConv(sval string) (interface{}, error) { return sval, nil } // GetString return the string value of a flag with the given name func (f *FlagSet) GetString(name string) (string, error) { val, err := f.getFlagType(name, "string", stringConv) if err != nil { return "", err } return val.(string), nil } // StringVar defines a string flag with specified name, default value, and usage string. // The argument p points to a string variable in which to store the value of the flag. func (f *FlagSet) StringVar(p *string, name string, value string, usage string) { f.VarP(newStringValue(value, p), name, "", usage) } // StringVarP is like StringVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) StringVarP(p *string, name, shorthand string, value string, usage string) { f.VarP(newStringValue(value, p), name, shorthand, usage) } // StringVar defines a string flag with specified name, default value, and usage string. // The argument p points to a string variable in which to store the value of the flag. func StringVar(p *string, name string, value string, usage string) { CommandLine.VarP(newStringValue(value, p), name, "", usage) } // StringVarP is like StringVar, but accepts a shorthand letter that can be used after a single dash. func StringVarP(p *string, name, shorthand string, value string, usage string) { CommandLine.VarP(newStringValue(value, p), name, shorthand, usage) } // String defines a string flag with specified name, default value, and usage string. // The return value is the address of a string variable that stores the value of the flag. func (f *FlagSet) String(name string, value string, usage string) *string { p := new(string) f.StringVarP(p, name, "", value, usage) return p } // StringP is like String, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) StringP(name, shorthand string, value string, usage string) *string { p := new(string) f.StringVarP(p, name, shorthand, value, usage) return p } // String defines a string flag with specified name, default value, and usage string. // The return value is the address of a string variable that stores the value of the flag. func String(name string, value string, usage string) *string { return CommandLine.StringP(name, "", value, usage) } // StringP is like String, but accepts a shorthand letter that can be used after a single dash. func StringP(name, shorthand string, value string, usage string) *string { return CommandLine.StringP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/string_array.go ================================================ package pflag // -- stringArray Value type stringArrayValue struct { value *[]string changed bool } func newStringArrayValue(val []string, p *[]string) *stringArrayValue { ssv := new(stringArrayValue) ssv.value = p *ssv.value = val return ssv } func (s *stringArrayValue) Set(val string) error { if !s.changed { *s.value = []string{val} s.changed = true } else { *s.value = append(*s.value, val) } return nil } func (s *stringArrayValue) Type() string { return "stringArray" } func (s *stringArrayValue) String() string { str, _ := writeAsCSV(*s.value) return "[" + str + "]" } func stringArrayConv(sval string) (interface{}, error) { sval = sval[1 : len(sval)-1] // An empty string would cause a array with one (empty) string if len(sval) == 0 { return []string{}, nil } return readAsCSV(sval) } // GetStringArray return the []string value of a flag with the given name func (f *FlagSet) GetStringArray(name string) ([]string, error) { val, err := f.getFlagType(name, "stringArray", stringArrayConv) if err != nil { return []string{}, err } return val.([]string), nil } // StringArrayVar defines a string flag with specified name, default value, and usage string. // The argument p points to a []string variable in which to store the values of the multiple flags. // The value of each argument will not try to be separated by comma func (f *FlagSet) StringArrayVar(p *[]string, name string, value []string, usage string) { f.VarP(newStringArrayValue(value, p), name, "", usage) } // StringArrayVarP is like StringArrayVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) StringArrayVarP(p *[]string, name, shorthand string, value []string, usage string) { f.VarP(newStringArrayValue(value, p), name, shorthand, usage) } // StringArrayVar defines a string flag with specified name, default value, and usage string. // The argument p points to a []string variable in which to store the value of the flag. // The value of each argument will not try to be separated by comma func StringArrayVar(p *[]string, name string, value []string, usage string) { CommandLine.VarP(newStringArrayValue(value, p), name, "", usage) } // StringArrayVarP is like StringArrayVar, but accepts a shorthand letter that can be used after a single dash. func StringArrayVarP(p *[]string, name, shorthand string, value []string, usage string) { CommandLine.VarP(newStringArrayValue(value, p), name, shorthand, usage) } // StringArray defines a string flag with specified name, default value, and usage string. // The return value is the address of a []string variable that stores the value of the flag. // The value of each argument will not try to be separated by comma func (f *FlagSet) StringArray(name string, value []string, usage string) *[]string { p := []string{} f.StringArrayVarP(&p, name, "", value, usage) return &p } // StringArrayP is like StringArray, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) StringArrayP(name, shorthand string, value []string, usage string) *[]string { p := []string{} f.StringArrayVarP(&p, name, shorthand, value, usage) return &p } // StringArray defines a string flag with specified name, default value, and usage string. // The return value is the address of a []string variable that stores the value of the flag. // The value of each argument will not try to be separated by comma func StringArray(name string, value []string, usage string) *[]string { return CommandLine.StringArrayP(name, "", value, usage) } // StringArrayP is like StringArray, but accepts a shorthand letter that can be used after a single dash. func StringArrayP(name, shorthand string, value []string, usage string) *[]string { return CommandLine.StringArrayP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/string_slice.go ================================================ package pflag import ( "bytes" "encoding/csv" "strings" ) // -- stringSlice Value type stringSliceValue struct { value *[]string changed bool } func newStringSliceValue(val []string, p *[]string) *stringSliceValue { ssv := new(stringSliceValue) ssv.value = p *ssv.value = val return ssv } func readAsCSV(val string) ([]string, error) { if val == "" { return []string{}, nil } stringReader := strings.NewReader(val) csvReader := csv.NewReader(stringReader) return csvReader.Read() } func writeAsCSV(vals []string) (string, error) { b := &bytes.Buffer{} w := csv.NewWriter(b) err := w.Write(vals) if err != nil { return "", err } w.Flush() return strings.TrimSuffix(b.String(), "\n"), nil } func (s *stringSliceValue) Set(val string) error { v, err := readAsCSV(val) if err != nil { return err } if !s.changed { *s.value = v } else { *s.value = append(*s.value, v...) } s.changed = true return nil } func (s *stringSliceValue) Type() string { return "stringSlice" } func (s *stringSliceValue) String() string { str, _ := writeAsCSV(*s.value) return "[" + str + "]" } func stringSliceConv(sval string) (interface{}, error) { sval = sval[1 : len(sval)-1] // An empty string would cause a slice with one (empty) string if len(sval) == 0 { return []string{}, nil } return readAsCSV(sval) } // GetStringSlice return the []string value of a flag with the given name func (f *FlagSet) GetStringSlice(name string) ([]string, error) { val, err := f.getFlagType(name, "stringSlice", stringSliceConv) if err != nil { return []string{}, err } return val.([]string), nil } // StringSliceVar defines a string flag with specified name, default value, and usage string. // The argument p points to a []string variable in which to store the value of the flag. func (f *FlagSet) StringSliceVar(p *[]string, name string, value []string, usage string) { f.VarP(newStringSliceValue(value, p), name, "", usage) } // StringSliceVarP is like StringSliceVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) StringSliceVarP(p *[]string, name, shorthand string, value []string, usage string) { f.VarP(newStringSliceValue(value, p), name, shorthand, usage) } // StringSliceVar defines a string flag with specified name, default value, and usage string. // The argument p points to a []string variable in which to store the value of the flag. func StringSliceVar(p *[]string, name string, value []string, usage string) { CommandLine.VarP(newStringSliceValue(value, p), name, "", usage) } // StringSliceVarP is like StringSliceVar, but accepts a shorthand letter that can be used after a single dash. func StringSliceVarP(p *[]string, name, shorthand string, value []string, usage string) { CommandLine.VarP(newStringSliceValue(value, p), name, shorthand, usage) } // StringSlice defines a string flag with specified name, default value, and usage string. // The return value is the address of a []string variable that stores the value of the flag. func (f *FlagSet) StringSlice(name string, value []string, usage string) *[]string { p := []string{} f.StringSliceVarP(&p, name, "", value, usage) return &p } // StringSliceP is like StringSlice, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) StringSliceP(name, shorthand string, value []string, usage string) *[]string { p := []string{} f.StringSliceVarP(&p, name, shorthand, value, usage) return &p } // StringSlice defines a string flag with specified name, default value, and usage string. // The return value is the address of a []string variable that stores the value of the flag. func StringSlice(name string, value []string, usage string) *[]string { return CommandLine.StringSliceP(name, "", value, usage) } // StringSliceP is like StringSlice, but accepts a shorthand letter that can be used after a single dash. func StringSliceP(name, shorthand string, value []string, usage string) *[]string { return CommandLine.StringSliceP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/uint.go ================================================ package pflag import "strconv" // -- uint Value type uintValue uint func newUintValue(val uint, p *uint) *uintValue { *p = val return (*uintValue)(p) } func (i *uintValue) Set(s string) error { v, err := strconv.ParseUint(s, 0, 64) *i = uintValue(v) return err } func (i *uintValue) Type() string { return "uint" } func (i *uintValue) String() string { return strconv.FormatUint(uint64(*i), 10) } func uintConv(sval string) (interface{}, error) { v, err := strconv.ParseUint(sval, 0, 0) if err != nil { return 0, err } return uint(v), nil } // GetUint return the uint value of a flag with the given name func (f *FlagSet) GetUint(name string) (uint, error) { val, err := f.getFlagType(name, "uint", uintConv) if err != nil { return 0, err } return val.(uint), nil } // UintVar defines a uint flag with specified name, default value, and usage string. // The argument p points to a uint variable in which to store the value of the flag. func (f *FlagSet) UintVar(p *uint, name string, value uint, usage string) { f.VarP(newUintValue(value, p), name, "", usage) } // UintVarP is like UintVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) UintVarP(p *uint, name, shorthand string, value uint, usage string) { f.VarP(newUintValue(value, p), name, shorthand, usage) } // UintVar defines a uint flag with specified name, default value, and usage string. // The argument p points to a uint variable in which to store the value of the flag. func UintVar(p *uint, name string, value uint, usage string) { CommandLine.VarP(newUintValue(value, p), name, "", usage) } // UintVarP is like UintVar, but accepts a shorthand letter that can be used after a single dash. func UintVarP(p *uint, name, shorthand string, value uint, usage string) { CommandLine.VarP(newUintValue(value, p), name, shorthand, usage) } // Uint defines a uint flag with specified name, default value, and usage string. // The return value is the address of a uint variable that stores the value of the flag. func (f *FlagSet) Uint(name string, value uint, usage string) *uint { p := new(uint) f.UintVarP(p, name, "", value, usage) return p } // UintP is like Uint, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) UintP(name, shorthand string, value uint, usage string) *uint { p := new(uint) f.UintVarP(p, name, shorthand, value, usage) return p } // Uint defines a uint flag with specified name, default value, and usage string. // The return value is the address of a uint variable that stores the value of the flag. func Uint(name string, value uint, usage string) *uint { return CommandLine.UintP(name, "", value, usage) } // UintP is like Uint, but accepts a shorthand letter that can be used after a single dash. func UintP(name, shorthand string, value uint, usage string) *uint { return CommandLine.UintP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/uint16.go ================================================ package pflag import "strconv" // -- uint16 value type uint16Value uint16 func newUint16Value(val uint16, p *uint16) *uint16Value { *p = val return (*uint16Value)(p) } func (i *uint16Value) Set(s string) error { v, err := strconv.ParseUint(s, 0, 16) *i = uint16Value(v) return err } func (i *uint16Value) Type() string { return "uint16" } func (i *uint16Value) String() string { return strconv.FormatUint(uint64(*i), 10) } func uint16Conv(sval string) (interface{}, error) { v, err := strconv.ParseUint(sval, 0, 16) if err != nil { return 0, err } return uint16(v), nil } // GetUint16 return the uint16 value of a flag with the given name func (f *FlagSet) GetUint16(name string) (uint16, error) { val, err := f.getFlagType(name, "uint16", uint16Conv) if err != nil { return 0, err } return val.(uint16), nil } // Uint16Var defines a uint flag with specified name, default value, and usage string. // The argument p points to a uint variable in which to store the value of the flag. func (f *FlagSet) Uint16Var(p *uint16, name string, value uint16, usage string) { f.VarP(newUint16Value(value, p), name, "", usage) } // Uint16VarP is like Uint16Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint16VarP(p *uint16, name, shorthand string, value uint16, usage string) { f.VarP(newUint16Value(value, p), name, shorthand, usage) } // Uint16Var defines a uint flag with specified name, default value, and usage string. // The argument p points to a uint variable in which to store the value of the flag. func Uint16Var(p *uint16, name string, value uint16, usage string) { CommandLine.VarP(newUint16Value(value, p), name, "", usage) } // Uint16VarP is like Uint16Var, but accepts a shorthand letter that can be used after a single dash. func Uint16VarP(p *uint16, name, shorthand string, value uint16, usage string) { CommandLine.VarP(newUint16Value(value, p), name, shorthand, usage) } // Uint16 defines a uint flag with specified name, default value, and usage string. // The return value is the address of a uint variable that stores the value of the flag. func (f *FlagSet) Uint16(name string, value uint16, usage string) *uint16 { p := new(uint16) f.Uint16VarP(p, name, "", value, usage) return p } // Uint16P is like Uint16, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint16P(name, shorthand string, value uint16, usage string) *uint16 { p := new(uint16) f.Uint16VarP(p, name, shorthand, value, usage) return p } // Uint16 defines a uint flag with specified name, default value, and usage string. // The return value is the address of a uint variable that stores the value of the flag. func Uint16(name string, value uint16, usage string) *uint16 { return CommandLine.Uint16P(name, "", value, usage) } // Uint16P is like Uint16, but accepts a shorthand letter that can be used after a single dash. func Uint16P(name, shorthand string, value uint16, usage string) *uint16 { return CommandLine.Uint16P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/uint32.go ================================================ package pflag import "strconv" // -- uint32 value type uint32Value uint32 func newUint32Value(val uint32, p *uint32) *uint32Value { *p = val return (*uint32Value)(p) } func (i *uint32Value) Set(s string) error { v, err := strconv.ParseUint(s, 0, 32) *i = uint32Value(v) return err } func (i *uint32Value) Type() string { return "uint32" } func (i *uint32Value) String() string { return strconv.FormatUint(uint64(*i), 10) } func uint32Conv(sval string) (interface{}, error) { v, err := strconv.ParseUint(sval, 0, 32) if err != nil { return 0, err } return uint32(v), nil } // GetUint32 return the uint32 value of a flag with the given name func (f *FlagSet) GetUint32(name string) (uint32, error) { val, err := f.getFlagType(name, "uint32", uint32Conv) if err != nil { return 0, err } return val.(uint32), nil } // Uint32Var defines a uint32 flag with specified name, default value, and usage string. // The argument p points to a uint32 variable in which to store the value of the flag. func (f *FlagSet) Uint32Var(p *uint32, name string, value uint32, usage string) { f.VarP(newUint32Value(value, p), name, "", usage) } // Uint32VarP is like Uint32Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint32VarP(p *uint32, name, shorthand string, value uint32, usage string) { f.VarP(newUint32Value(value, p), name, shorthand, usage) } // Uint32Var defines a uint32 flag with specified name, default value, and usage string. // The argument p points to a uint32 variable in which to store the value of the flag. func Uint32Var(p *uint32, name string, value uint32, usage string) { CommandLine.VarP(newUint32Value(value, p), name, "", usage) } // Uint32VarP is like Uint32Var, but accepts a shorthand letter that can be used after a single dash. func Uint32VarP(p *uint32, name, shorthand string, value uint32, usage string) { CommandLine.VarP(newUint32Value(value, p), name, shorthand, usage) } // Uint32 defines a uint32 flag with specified name, default value, and usage string. // The return value is the address of a uint32 variable that stores the value of the flag. func (f *FlagSet) Uint32(name string, value uint32, usage string) *uint32 { p := new(uint32) f.Uint32VarP(p, name, "", value, usage) return p } // Uint32P is like Uint32, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint32P(name, shorthand string, value uint32, usage string) *uint32 { p := new(uint32) f.Uint32VarP(p, name, shorthand, value, usage) return p } // Uint32 defines a uint32 flag with specified name, default value, and usage string. // The return value is the address of a uint32 variable that stores the value of the flag. func Uint32(name string, value uint32, usage string) *uint32 { return CommandLine.Uint32P(name, "", value, usage) } // Uint32P is like Uint32, but accepts a shorthand letter that can be used after a single dash. func Uint32P(name, shorthand string, value uint32, usage string) *uint32 { return CommandLine.Uint32P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/uint64.go ================================================ package pflag import "strconv" // -- uint64 Value type uint64Value uint64 func newUint64Value(val uint64, p *uint64) *uint64Value { *p = val return (*uint64Value)(p) } func (i *uint64Value) Set(s string) error { v, err := strconv.ParseUint(s, 0, 64) *i = uint64Value(v) return err } func (i *uint64Value) Type() string { return "uint64" } func (i *uint64Value) String() string { return strconv.FormatUint(uint64(*i), 10) } func uint64Conv(sval string) (interface{}, error) { v, err := strconv.ParseUint(sval, 0, 64) if err != nil { return 0, err } return uint64(v), nil } // GetUint64 return the uint64 value of a flag with the given name func (f *FlagSet) GetUint64(name string) (uint64, error) { val, err := f.getFlagType(name, "uint64", uint64Conv) if err != nil { return 0, err } return val.(uint64), nil } // Uint64Var defines a uint64 flag with specified name, default value, and usage string. // The argument p points to a uint64 variable in which to store the value of the flag. func (f *FlagSet) Uint64Var(p *uint64, name string, value uint64, usage string) { f.VarP(newUint64Value(value, p), name, "", usage) } // Uint64VarP is like Uint64Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint64VarP(p *uint64, name, shorthand string, value uint64, usage string) { f.VarP(newUint64Value(value, p), name, shorthand, usage) } // Uint64Var defines a uint64 flag with specified name, default value, and usage string. // The argument p points to a uint64 variable in which to store the value of the flag. func Uint64Var(p *uint64, name string, value uint64, usage string) { CommandLine.VarP(newUint64Value(value, p), name, "", usage) } // Uint64VarP is like Uint64Var, but accepts a shorthand letter that can be used after a single dash. func Uint64VarP(p *uint64, name, shorthand string, value uint64, usage string) { CommandLine.VarP(newUint64Value(value, p), name, shorthand, usage) } // Uint64 defines a uint64 flag with specified name, default value, and usage string. // The return value is the address of a uint64 variable that stores the value of the flag. func (f *FlagSet) Uint64(name string, value uint64, usage string) *uint64 { p := new(uint64) f.Uint64VarP(p, name, "", value, usage) return p } // Uint64P is like Uint64, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint64P(name, shorthand string, value uint64, usage string) *uint64 { p := new(uint64) f.Uint64VarP(p, name, shorthand, value, usage) return p } // Uint64 defines a uint64 flag with specified name, default value, and usage string. // The return value is the address of a uint64 variable that stores the value of the flag. func Uint64(name string, value uint64, usage string) *uint64 { return CommandLine.Uint64P(name, "", value, usage) } // Uint64P is like Uint64, but accepts a shorthand letter that can be used after a single dash. func Uint64P(name, shorthand string, value uint64, usage string) *uint64 { return CommandLine.Uint64P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/uint8.go ================================================ package pflag import "strconv" // -- uint8 Value type uint8Value uint8 func newUint8Value(val uint8, p *uint8) *uint8Value { *p = val return (*uint8Value)(p) } func (i *uint8Value) Set(s string) error { v, err := strconv.ParseUint(s, 0, 8) *i = uint8Value(v) return err } func (i *uint8Value) Type() string { return "uint8" } func (i *uint8Value) String() string { return strconv.FormatUint(uint64(*i), 10) } func uint8Conv(sval string) (interface{}, error) { v, err := strconv.ParseUint(sval, 0, 8) if err != nil { return 0, err } return uint8(v), nil } // GetUint8 return the uint8 value of a flag with the given name func (f *FlagSet) GetUint8(name string) (uint8, error) { val, err := f.getFlagType(name, "uint8", uint8Conv) if err != nil { return 0, err } return val.(uint8), nil } // Uint8Var defines a uint8 flag with specified name, default value, and usage string. // The argument p points to a uint8 variable in which to store the value of the flag. func (f *FlagSet) Uint8Var(p *uint8, name string, value uint8, usage string) { f.VarP(newUint8Value(value, p), name, "", usage) } // Uint8VarP is like Uint8Var, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint8VarP(p *uint8, name, shorthand string, value uint8, usage string) { f.VarP(newUint8Value(value, p), name, shorthand, usage) } // Uint8Var defines a uint8 flag with specified name, default value, and usage string. // The argument p points to a uint8 variable in which to store the value of the flag. func Uint8Var(p *uint8, name string, value uint8, usage string) { CommandLine.VarP(newUint8Value(value, p), name, "", usage) } // Uint8VarP is like Uint8Var, but accepts a shorthand letter that can be used after a single dash. func Uint8VarP(p *uint8, name, shorthand string, value uint8, usage string) { CommandLine.VarP(newUint8Value(value, p), name, shorthand, usage) } // Uint8 defines a uint8 flag with specified name, default value, and usage string. // The return value is the address of a uint8 variable that stores the value of the flag. func (f *FlagSet) Uint8(name string, value uint8, usage string) *uint8 { p := new(uint8) f.Uint8VarP(p, name, "", value, usage) return p } // Uint8P is like Uint8, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) Uint8P(name, shorthand string, value uint8, usage string) *uint8 { p := new(uint8) f.Uint8VarP(p, name, shorthand, value, usage) return p } // Uint8 defines a uint8 flag with specified name, default value, and usage string. // The return value is the address of a uint8 variable that stores the value of the flag. func Uint8(name string, value uint8, usage string) *uint8 { return CommandLine.Uint8P(name, "", value, usage) } // Uint8P is like Uint8, but accepts a shorthand letter that can be used after a single dash. func Uint8P(name, shorthand string, value uint8, usage string) *uint8 { return CommandLine.Uint8P(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/spf13/pflag/uint_slice.go ================================================ package pflag import ( "fmt" "strconv" "strings" ) // -- uintSlice Value type uintSliceValue struct { value *[]uint changed bool } func newUintSliceValue(val []uint, p *[]uint) *uintSliceValue { uisv := new(uintSliceValue) uisv.value = p *uisv.value = val return uisv } func (s *uintSliceValue) Set(val string) error { ss := strings.Split(val, ",") out := make([]uint, len(ss)) for i, d := range ss { u, err := strconv.ParseUint(d, 10, 0) if err != nil { return err } out[i] = uint(u) } if !s.changed { *s.value = out } else { *s.value = append(*s.value, out...) } s.changed = true return nil } func (s *uintSliceValue) Type() string { return "uintSlice" } func (s *uintSliceValue) String() string { out := make([]string, len(*s.value)) for i, d := range *s.value { out[i] = fmt.Sprintf("%d", d) } return "[" + strings.Join(out, ",") + "]" } func uintSliceConv(val string) (interface{}, error) { val = strings.Trim(val, "[]") // Empty string would cause a slice with one (empty) entry if len(val) == 0 { return []uint{}, nil } ss := strings.Split(val, ",") out := make([]uint, len(ss)) for i, d := range ss { u, err := strconv.ParseUint(d, 10, 0) if err != nil { return nil, err } out[i] = uint(u) } return out, nil } // GetUintSlice returns the []uint value of a flag with the given name. func (f *FlagSet) GetUintSlice(name string) ([]uint, error) { val, err := f.getFlagType(name, "uintSlice", uintSliceConv) if err != nil { return []uint{}, err } return val.([]uint), nil } // UintSliceVar defines a uintSlice flag with specified name, default value, and usage string. // The argument p points to a []uint variable in which to store the value of the flag. func (f *FlagSet) UintSliceVar(p *[]uint, name string, value []uint, usage string) { f.VarP(newUintSliceValue(value, p), name, "", usage) } // UintSliceVarP is like UintSliceVar, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) UintSliceVarP(p *[]uint, name, shorthand string, value []uint, usage string) { f.VarP(newUintSliceValue(value, p), name, shorthand, usage) } // UintSliceVar defines a uint[] flag with specified name, default value, and usage string. // The argument p points to a uint[] variable in which to store the value of the flag. func UintSliceVar(p *[]uint, name string, value []uint, usage string) { CommandLine.VarP(newUintSliceValue(value, p), name, "", usage) } // UintSliceVarP is like the UintSliceVar, but accepts a shorthand letter that can be used after a single dash. func UintSliceVarP(p *[]uint, name, shorthand string, value []uint, usage string) { CommandLine.VarP(newUintSliceValue(value, p), name, shorthand, usage) } // UintSlice defines a []uint flag with specified name, default value, and usage string. // The return value is the address of a []uint variable that stores the value of the flag. func (f *FlagSet) UintSlice(name string, value []uint, usage string) *[]uint { p := []uint{} f.UintSliceVarP(&p, name, "", value, usage) return &p } // UintSliceP is like UintSlice, but accepts a shorthand letter that can be used after a single dash. func (f *FlagSet) UintSliceP(name, shorthand string, value []uint, usage string) *[]uint { p := []uint{} f.UintSliceVarP(&p, name, shorthand, value, usage) return &p } // UintSlice defines a []uint flag with specified name, default value, and usage string. // The return value is the address of a []uint variable that stores the value of the flag. func UintSlice(name string, value []uint, usage string) *[]uint { return CommandLine.UintSliceP(name, "", value, usage) } // UintSliceP is like UintSlice, but accepts a shorthand letter that can be used after a single dash. func UintSliceP(name, shorthand string, value []uint, usage string) *[]uint { return CommandLine.UintSliceP(name, shorthand, value, usage) } ================================================ FILE: vendor/github.com/willf/bitset/LICENSE ================================================ Copyright (c) 2014 Will Fitzgerald. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: vendor/github.com/willf/bitset/bitset.go ================================================ /* Package bitset implements bitsets, a mapping between non-negative integers and boolean values. It should be more efficient than map[uint] bool. It provides methods for setting, clearing, flipping, and testing individual integers. But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits. BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used. Many of the methods, including Set,Clear, and Flip, return a BitSet pointer, which allows for chaining. Example use: import "bitset" var b BitSet b.Set(10).Set(11) if b.Test(1000) { b.Clear(1000) } if B.Intersection(bitset.New(100).Set(10)).Count() > 1 { fmt.Println("Intersection works.") } As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets. */ package bitset import ( "bufio" "bytes" "encoding/base64" "encoding/binary" "encoding/json" "errors" "fmt" "io" "strconv" ) // the wordSize of a bit set const wordSize = uint(64) // log2WordSize is lg(wordSize) const log2WordSize = uint(6) // allBits has every bit set const allBits uint64 = 0xffffffffffffffff // A BitSet is a set of bits. The zero value of a BitSet is an empty set of length 0. type BitSet struct { length uint set []uint64 } // Error is used to distinguish errors (panics) generated in this package. type Error string // safeSet will fixup b.set to be non-nil and return the field value func (b *BitSet) safeSet() []uint64 { if b.set == nil { b.set = make([]uint64, wordsNeeded(0)) } return b.set } // From is a constructor used to create a BitSet from an array of integers func From(buf []uint64) *BitSet { return &BitSet{uint(len(buf)) * 64, buf} } // Bytes returns the bitset as array of integers func (b *BitSet) Bytes() []uint64 { return b.set } // wordsNeeded calculates the number of words needed for i bits func wordsNeeded(i uint) int { if i > ((^uint(0)) - wordSize + 1) { return int((^uint(0)) >> log2WordSize) } return int((i + (wordSize - 1)) >> log2WordSize) } // New creates a new BitSet with a hint that length bits will be required func New(length uint) (bset *BitSet) { defer func() { if r := recover(); r != nil { bset = &BitSet{ 0, make([]uint64, 0), } } }() bset = &BitSet{ length, make([]uint64, wordsNeeded(length)), } return bset } // Cap returns the total possible capicity, or number of bits func Cap() uint { return ^uint(0) } // Len returns the length of the BitSet in words func (b *BitSet) Len() uint { return b.length } // extendSetMaybe adds additional words to incorporate new bits if needed func (b *BitSet) extendSetMaybe(i uint) { if i >= b.length { // if we need more bits, make 'em nsize := wordsNeeded(i + 1) if b.set == nil { b.set = make([]uint64, nsize) } else if cap(b.set) >= nsize { b.set = b.set[:nsize] // fast resize } else if len(b.set) < nsize { newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x copy(newset, b.set) b.set = newset } b.length = i + 1 } } // Test whether bit i is set. func (b *BitSet) Test(i uint) bool { if i >= b.length { return false } return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0 } // Set bit i to 1 func (b *BitSet) Set(i uint) *BitSet { b.extendSetMaybe(i) b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1)) return b } // Clear bit i to 0 func (b *BitSet) Clear(i uint) *BitSet { if i >= b.length { return b } b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1)) return b } // SetTo sets bit i to value func (b *BitSet) SetTo(i uint, value bool) *BitSet { if value { return b.Set(i) } return b.Clear(i) } // Flip bit at i func (b *BitSet) Flip(i uint) *BitSet { if i >= b.length { return b.Set(i) } b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1)) return b } // String creates a string representation of the Bitmap func (b *BitSet) String() string { // follows code from https://github.com/RoaringBitmap/roaring var buffer bytes.Buffer start := []byte("{") buffer.Write(start) counter := 0 i, e := b.NextSet(0) for e { counter = counter + 1 // to avoid exhausting the memory if counter > 0x40000 { buffer.WriteString("...") break } buffer.WriteString(strconv.FormatInt(int64(i), 10)) i, e = b.NextSet(i + 1) if e { buffer.WriteString(",") } } buffer.WriteString("}") return buffer.String() } // NextSet returns the next bit set from the specified index, // including possibly the current index // along with an error code (true = valid, false = no set bit found) // for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...} func (b *BitSet) NextSet(i uint) (uint, bool) { x := int(i >> log2WordSize) if x >= len(b.set) { return 0, false } w := b.set[x] w = w >> (i & (wordSize - 1)) if w != 0 { return i + trailingZeroes64(w), true } x = x + 1 for x < len(b.set) { if b.set[x] != 0 { return uint(x)*wordSize + trailingZeroes64(b.set[x]), true } x = x + 1 } return 0, false } // NextClear returns the next clear bit from the specified index, // including possibly the current index // along with an error code (true = valid, false = no bit found i.e. all bits are set) func (b *BitSet) NextClear(i uint) (uint, bool) { x := int(i >> log2WordSize) if x >= len(b.set) { return 0, false } w := b.set[x] w = w >> (i & (wordSize - 1)) wA := allBits >> (i & (wordSize - 1)) if w != wA { return i + trailingZeroes64(^w), true } x++ for x < len(b.set) { if b.set[x] != allBits { return uint(x)*wordSize + trailingZeroes64(^b.set[x]), true } x++ } return 0, false } // ClearAll clears the entire BitSet func (b *BitSet) ClearAll() *BitSet { if b != nil && b.set != nil { for i := range b.set { b.set[i] = 0 } } return b } // wordCount returns the number of words used in a bit set func (b *BitSet) wordCount() int { return len(b.set) } // Clone this BitSet func (b *BitSet) Clone() *BitSet { c := New(b.length) if b.set != nil { // Clone should not modify current object copy(c.set, b.set) } return c } // Copy into a destination BitSet // Returning the size of the destination BitSet // like array copy func (b *BitSet) Copy(c *BitSet) (count uint) { if c == nil { return } if b.set != nil { // Copy should not modify current object copy(c.set, b.set) } count = c.length if b.length < c.length { count = b.length } return } // Count (number of set bits) func (b *BitSet) Count() uint { if b != nil && b.set != nil { return uint(popcntSlice(b.set)) } return 0 } var deBruijn = [...]byte{ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, } func trailingZeroes64(v uint64) uint { return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58]) } // Equal tests the equvalence of two BitSets. // False if they are of different sizes, otherwise true // only if all the same bits are set func (b *BitSet) Equal(c *BitSet) bool { if c == nil { return false } if b.length != c.length { return false } if b.length == 0 { // if they have both length == 0, then could have nil set return true } // testing for equality shoud not transform the bitset (no call to safeSet) for p, v := range b.set { if c.set[p] != v { return false } } return true } func panicIfNull(b *BitSet) { if b == nil { panic(Error("BitSet must not be null")) } } // Difference of base set and other set // This is the BitSet equivalent of &^ (and not) func (b *BitSet) Difference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) result = b.Clone() // clone b (in case b is bigger than compare) l := int(compare.wordCount()) if l > int(b.wordCount()) { l = int(b.wordCount()) } for i := 0; i < l; i++ { result.set[i] = b.set[i] &^ compare.set[i] } return } // DifferenceCardinality computes the cardinality of the differnce func (b *BitSet) DifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) l := int(compare.wordCount()) if l > int(b.wordCount()) { l = int(b.wordCount()) } cnt := uint64(0) cnt += popcntMaskSlice(b.set[:l], compare.set[:l]) cnt += popcntSlice(b.set[l:]) return uint(cnt) } // InPlaceDifference computes the difference of base set and other set // This is the BitSet equivalent of &^ (and not) func (b *BitSet) InPlaceDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) l := int(compare.wordCount()) if l > int(b.wordCount()) { l = int(b.wordCount()) } for i := 0; i < l; i++ { b.set[i] &^= compare.set[i] } } // Convenience function: return two bitsets ordered by // increasing length. Note: neither can be nil func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) { if a.length <= b.length { ap, bp = a, b } else { ap, bp = b, a } return } // Intersection of base set and other set // This is the BitSet equivalent of & (and) func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) result = New(b.length) for i, word := range b.set { result.set[i] = word & compare.set[i] } return } // IntersectionCardinality computes the cardinality of the union func (b *BitSet) IntersectionCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) cnt := popcntAndSlice(b.set, compare.set) return uint(cnt) } // InPlaceIntersection destructively computes the intersection of // base set and the compare set. // This is the BitSet equivalent of & (and) func (b *BitSet) InPlaceIntersection(compare *BitSet) { panicIfNull(b) panicIfNull(compare) l := int(compare.wordCount()) if l > int(b.wordCount()) { l = int(b.wordCount()) } for i := 0; i < l; i++ { b.set[i] &= compare.set[i] } for i := l; i < len(b.set); i++ { b.set[i] = 0 } if compare.length > 0 { b.extendSetMaybe(compare.length - 1) } return } // Union of base set and other set // This is the BitSet equivalent of | (or) func (b *BitSet) Union(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) result = compare.Clone() for i, word := range b.set { result.set[i] = word | compare.set[i] } return } // UnionCardinality computes the cardinality of the uniton of the base set // and the compare set. func (b *BitSet) UnionCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) cnt := popcntOrSlice(b.set, compare.set) if len(compare.set) > len(b.set) { cnt += popcntSlice(compare.set[len(b.set):]) } return uint(cnt) } // InPlaceUnion creates the destructive union of base set and compare set. // This is the BitSet equivalent of | (or). func (b *BitSet) InPlaceUnion(compare *BitSet) { panicIfNull(b) panicIfNull(compare) l := int(compare.wordCount()) if l > int(b.wordCount()) { l = int(b.wordCount()) } if compare.length > 0 { b.extendSetMaybe(compare.length - 1) } for i := 0; i < l; i++ { b.set[i] |= compare.set[i] } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { b.set[i] = compare.set[i] } } } // SymmetricDifference of base set and other set // This is the BitSet equivalent of ^ (xor) func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) // compare is bigger, so clone it result = compare.Clone() for i, word := range b.set { result.set[i] = word ^ compare.set[i] } return } // SymmetricDifferenceCardinality computes the cardinality of the symmetric difference func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint { panicIfNull(b) panicIfNull(compare) b, compare = sortByLength(b, compare) cnt := popcntXorSlice(b.set, compare.set) if len(compare.set) > len(b.set) { cnt += popcntSlice(compare.set[len(b.set):]) } return uint(cnt) } // InPlaceSymmetricDifference creates the destructive SymmetricDifference of base set and other set // This is the BitSet equivalent of ^ (xor) func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) { panicIfNull(b) panicIfNull(compare) l := int(compare.wordCount()) if l > int(b.wordCount()) { l = int(b.wordCount()) } if compare.length > 0 { b.extendSetMaybe(compare.length - 1) } for i := 0; i < l; i++ { b.set[i] ^= compare.set[i] } if len(compare.set) > l { for i := l; i < len(compare.set); i++ { b.set[i] = compare.set[i] } } } // Is the length an exact multiple of word sizes? func (b *BitSet) isLenExactMultiple() bool { return b.length%wordSize == 0 } // Clean last word by setting unused bits to 0 func (b *BitSet) cleanLastWord() { if !b.isLenExactMultiple() { b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize) } } // Complement computes the (local) complement of a biset (up to length bits) func (b *BitSet) Complement() (result *BitSet) { panicIfNull(b) result = New(b.length) for i, word := range b.set { result.set[i] = ^word } result.cleanLastWord() return } // All returns true if all bits are set, false otherwise. Returns true for // empty sets. func (b *BitSet) All() bool { panicIfNull(b) return b.Count() == b.length } // None returns true if no bit is set, false otherwise. Retursn true for // empty sets. func (b *BitSet) None() bool { panicIfNull(b) if b != nil && b.set != nil { for _, word := range b.set { if word > 0 { return false } } return true } return true } // Any returns true if any bit is set, false otherwise func (b *BitSet) Any() bool { panicIfNull(b) return !b.None() } // IsSuperSet returns true if this is a superset of the other set func (b *BitSet) IsSuperSet(other *BitSet) bool { for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) { if !b.Test(i) { return false } } return true } // IsStrictSuperSet returns true if this is a strict superset of the other set func (b *BitSet) IsStrictSuperSet(other *BitSet) bool { return b.Count() > other.Count() && b.IsSuperSet(other) } // DumpAsBits dumps a bit set as a string of bits func (b *BitSet) DumpAsBits() string { if b.set == nil { return "." } buffer := bytes.NewBufferString("") i := len(b.set) - 1 for ; i >= 0; i-- { fmt.Fprintf(buffer, "%064b.", b.set[i]) } return string(buffer.Bytes()) } // BinaryStorageSize returns the binary storage requirements func (b *BitSet) BinaryStorageSize() int { return binary.Size(uint64(0)) + binary.Size(b.set) } // WriteTo writes a BitSet to a stream func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { length := uint64(b.length) // Write length err := binary.Write(stream, binary.BigEndian, length) if err != nil { return 0, err } // Write set err = binary.Write(stream, binary.BigEndian, b.set) return int64(b.BinaryStorageSize()), err } // ReadFrom reads a BitSet from a stream written using WriteTo func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { var length uint64 // Read length first err := binary.Read(stream, binary.BigEndian, &length) if err != nil { return 0, err } newset := New(uint(length)) if uint64(newset.length) != length { return 0, errors.New("Unmarshalling error: type mismatch") } // Read remaining bytes as set err = binary.Read(stream, binary.BigEndian, newset.set) if err != nil { return 0, err } *b = *newset return int64(b.BinaryStorageSize()), nil } // MarshalBinary encodes a BitSet into a binary form and returns the result. func (b *BitSet) MarshalBinary() ([]byte, error) { var buf bytes.Buffer writer := bufio.NewWriter(&buf) _, err := b.WriteTo(writer) if err != nil { return []byte{}, err } err = writer.Flush() return buf.Bytes(), err } // UnmarshalBinary decodes the binary form generated by MarshalBinary. func (b *BitSet) UnmarshalBinary(data []byte) error { buf := bytes.NewReader(data) reader := bufio.NewReader(buf) _, err := b.ReadFrom(reader) return err } // MarshalJSON marshals a BitSet as a JSON structure func (b *BitSet) MarshalJSON() ([]byte, error) { buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize())) _, err := b.WriteTo(buffer) if err != nil { return nil, err } // URLEncode all bytes return json.Marshal(base64.URLEncoding.EncodeToString(buffer.Bytes())) } // UnmarshalJSON unmarshals a BitSet from JSON created using MarshalJSON func (b *BitSet) UnmarshalJSON(data []byte) error { // Unmarshal as string var s string err := json.Unmarshal(data, &s) if err != nil { return err } // URLDecode string buf, err := base64.URLEncoding.DecodeString(s) if err != nil { return err } _, err = b.ReadFrom(bytes.NewReader(buf)) return err } ================================================ FILE: vendor/github.com/willf/bitset/popcnt.go ================================================ package bitset // bit population count, take from // https://code.google.com/p/go/issues/detail?id=4988#c11 // credit: https://code.google.com/u/arnehormann/ func popcount(x uint64) (n uint64) { x -= (x >> 1) & 0x5555555555555555 x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 x += x >> 4 x &= 0x0f0f0f0f0f0f0f0f x *= 0x0101010101010101 return x >> 56 } func popcntSliceGo(s []uint64) uint64 { cnt := uint64(0) for _, x := range s { cnt += popcount(x) } return cnt } func popcntMaskSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] &^ m[i]) } return cnt } func popcntAndSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] & m[i]) } return cnt } func popcntOrSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] | m[i]) } return cnt } func popcntXorSliceGo(s, m []uint64) uint64 { cnt := uint64(0) for i := range s { cnt += popcount(s[i] ^ m[i]) } return cnt } ================================================ FILE: vendor/github.com/willf/bitset/popcnt_amd64.go ================================================ // +build amd64,!appengine package bitset // *** the following functions are defined in popcnt_amd64.s //go:noescape func hasAsm() bool // useAsm is a flag used to select the GO or ASM implementation of the popcnt function var useAsm = hasAsm() //go:noescape func popcntSliceAsm(s []uint64) uint64 //go:noescape func popcntMaskSliceAsm(s, m []uint64) uint64 //go:noescape func popcntAndSliceAsm(s, m []uint64) uint64 //go:noescape func popcntOrSliceAsm(s, m []uint64) uint64 //go:noescape func popcntXorSliceAsm(s, m []uint64) uint64 func popcntSlice(s []uint64) uint64 { if useAsm { return popcntSliceAsm(s) } return popcntSliceGo(s) } func popcntMaskSlice(s, m []uint64) uint64 { if useAsm { return popcntMaskSliceAsm(s, m) } return popcntMaskSliceGo(s, m) } func popcntAndSlice(s, m []uint64) uint64 { if useAsm { return popcntAndSliceAsm(s, m) } return popcntAndSliceGo(s, m) } func popcntOrSlice(s, m []uint64) uint64 { if useAsm { return popcntOrSliceAsm(s, m) } return popcntOrSliceGo(s, m) } func popcntXorSlice(s, m []uint64) uint64 { if useAsm { return popcntXorSliceAsm(s, m) } return popcntXorSliceGo(s, m) } ================================================ FILE: vendor/github.com/willf/bitset/popcnt_amd64.s ================================================ // +build amd64,!appengine TEXT ·hasAsm(SB),4,$0-1 MOVQ $1, AX CPUID SHRQ $23, CX ANDQ $1, CX MOVB CX, ret+0(FP) RET #define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 TEXT ·popcntSliceAsm(SB),4,$0-32 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntSliceEnd popcntSliceLoop: BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX ADDQ DX, AX ADDQ $8, SI LOOP popcntSliceLoop popcntSliceEnd: MOVQ AX, ret+24(FP) RET TEXT ·popcntMaskSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntMaskSliceEnd MOVQ m+24(FP), DI popcntMaskSliceLoop: MOVQ (DI), DX NOTQ DX ANDQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntMaskSliceLoop popcntMaskSliceEnd: MOVQ AX, ret+48(FP) RET TEXT ·popcntAndSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntAndSliceEnd MOVQ m+24(FP), DI popcntAndSliceLoop: MOVQ (DI), DX ANDQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntAndSliceLoop popcntAndSliceEnd: MOVQ AX, ret+48(FP) RET TEXT ·popcntOrSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntOrSliceEnd MOVQ m+24(FP), DI popcntOrSliceLoop: MOVQ (DI), DX ORQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntOrSliceLoop popcntOrSliceEnd: MOVQ AX, ret+48(FP) RET TEXT ·popcntXorSliceAsm(SB),4,$0-56 XORQ AX, AX MOVQ s+0(FP), SI MOVQ s_len+8(FP), CX TESTQ CX, CX JZ popcntXorSliceEnd MOVQ m+24(FP), DI popcntXorSliceLoop: MOVQ (DI), DX XORQ (SI), DX POPCNTQ_DX_DX ADDQ DX, AX ADDQ $8, SI ADDQ $8, DI LOOP popcntXorSliceLoop popcntXorSliceEnd: MOVQ AX, ret+48(FP) RET ================================================ FILE: vendor/github.com/willf/bitset/popcnt_generic.go ================================================ // +build !amd64 appengine package bitset func popcntSlice(s []uint64) uint64 { return popcntSliceGo(s) } func popcntMaskSlice(s, m []uint64) uint64 { return popcntMaskSliceGo(s, m) } func popcntAndSlice(s, m []uint64) uint64 { return popcntAndSliceGo(s, m) } func popcntOrSlice(s, m []uint64) uint64 { return popcntOrSliceGo(s, m) } func popcntXorSlice(s, m []uint64) uint64 { return popcntXorSliceGo(s, m) } ================================================ FILE: vendor/manifest ================================================ { "version": 0, "dependencies": [ { "importpath": "github.com/edsrzf/mmap-go", "repository": "https://github.com/edsrzf/mmap-go", "vcs": "git", "revision": "935e0e8a636ca4ba70b713f3e38a19e1b77739e8", "branch": "master", "notests": true }, { "importpath": "github.com/inconshreveable/mousetrap", "repository": "https://github.com/inconshreveable/mousetrap", "vcs": "git", "revision": "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75", "branch": "master", "notests": true }, { "importpath": "github.com/spf13/cobra", "repository": "https://github.com/spf13/cobra", "vcs": "git", "revision": "16c014f1a19d865b765b420e74508f80eb831ada", "branch": "master", "notests": true }, { "importpath": "github.com/spf13/pflag", "repository": "https://github.com/spf13/pflag", "vcs": "git", "revision": "9ff6c6923cfffbcd502984b8e0c80539a94968b7", "branch": "master", "notests": true }, { "importpath": "github.com/willf/bitset", "repository": "https://github.com/willf/bitset", "vcs": "git", "revision": "5c3c0fce48842b2c0bbaa99b4e61b0175d84b47c", "branch": "master", "notests": true } ] } ================================================ FILE: writer.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bufio" "io" ) // A writer is a buffered writer used by vellum. It counts how many bytes have // been written and has some convenience methods used for encoding the data. type writer struct { w *bufio.Writer counter int } func newWriter(w io.Writer) *writer { return &writer{ w: bufio.NewWriter(w), } } func (w *writer) Reset(newWriter io.Writer) { w.w.Reset(newWriter) w.counter = 0 } func (w *writer) WriteByte(c byte) error { err := w.w.WriteByte(c) if err != nil { return err } w.counter++ return nil } func (w *writer) Write(p []byte) (int, error) { n, err := w.w.Write(p) w.counter += n return n, err } func (w *writer) Flush() error { return w.w.Flush() } func (w *writer) WritePackedUintIn(v uint64, n int) error { for shift := uint(0); shift < uint(n*8); shift += 8 { err := w.WriteByte(byte(v >> shift)) if err != nil { return err } } return nil } func (w *writer) WritePackedUint(v uint64) error { n := packedSize(v) return w.WritePackedUintIn(v, n) } func packedSize(n uint64) int { if n < 1<<8 { return 1 } else if n < 1<<16 { return 2 } else if n < 1<<24 { return 3 } else if n < 1<<32 { return 4 } else if n < 1<<40 { return 5 } else if n < 1<<48 { return 6 } else if n < 1<<56 { return 7 } return 8 } ================================================ FILE: writer_test.go ================================================ // Copyright (c) 2017 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vellum import ( "bufio" "errors" "fmt" "testing" ) func TestPackedSize(t *testing.T) { tests := []struct { input uint64 want int }{ {0, 1}, {1<<8 - 1, 1}, {1 << 8, 2}, {1<<16 - 1, 2}, {1 << 16, 3}, {1<<24 - 1, 3}, {1 << 24, 4}, {1<<32 - 1, 4}, {1 << 32, 5}, {1<<40 - 1, 5}, {1 << 40, 6}, {1<<48 - 1, 6}, {1 << 48, 7}, {1<<56 - 1, 7}, {1 << 56, 8}, {1<<64 - 1, 8}, } for _, test := range tests { t.Run(fmt.Sprintf("input %d", test.input), func(t *testing.T) { got := packedSize(test.input) if got != test.want { t.Errorf("wanted: %d, got: %d", test.want, got) } }) } } var errStub = errors.New("stub error") type stubWriter struct { err error } func (s *stubWriter) Write(p []byte) (n int, err error) { err = s.err return } func TestWriteByteErr(t *testing.T) { // create writer, force underlying buffered writer to size 1 w := &writer{ w: bufio.NewWriterSize(&stubWriter{errStub}, 1), } // then write 2 bytes, which should force error _ = w.WriteByte('a') err := w.WriteByte('a') if err != errStub { t.Errorf("expected %v, got %v", errStub, err) } } func TestWritePackedUintErr(t *testing.T) { // create writer, force underlying buffered writer to size 1 w := &writer{ w: bufio.NewWriterSize(&stubWriter{errStub}, 1), } err := w.WritePackedUint(36592) if err != errStub { t.Errorf("expected %v, got %v", errStub, err) } }