Repository: antgroup/hugescm Branch: master Commit: a41d2d951159 Files: 1006 Total size: 4.6 MB Directory structure: gitextract_x8awn04j/ ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ └── ci.yml ├── .gitignore ├── .golangci.yml ├── CHANGELOG.md ├── LEGAL.md ├── LICENSE ├── Makefile ├── README.md ├── README.zh-CN.md ├── VERSION ├── bali.toml ├── cmd/ │ ├── README.md │ ├── hot/ │ │ ├── command/ │ │ │ ├── command.go │ │ │ ├── command_az.go │ │ │ ├── command_cat.go │ │ │ ├── command_co.go │ │ │ ├── command_diff.go │ │ │ ├── command_expire_refs.go │ │ │ ├── command_graft.go │ │ │ ├── command_mc.go │ │ │ ├── command_prune_refs.go │ │ │ ├── command_remove.go │ │ │ ├── command_scan_refs.go │ │ │ ├── command_show.go │ │ │ ├── command_size.go │ │ │ ├── command_smart.go │ │ │ ├── command_snapshot.go │ │ │ ├── command_stat.go │ │ │ ├── command_unbranch.go │ │ │ ├── misc.go │ │ │ └── pager.go │ │ ├── crate.toml │ │ ├── hot.go │ │ ├── pkg/ │ │ │ ├── README.md │ │ │ ├── co/ │ │ │ │ ├── co.go │ │ │ │ ├── misc.go │ │ │ │ └── misc_test.go │ │ │ ├── diff/ │ │ │ │ ├── diff.go │ │ │ │ └── parser_test.go │ │ │ ├── hud/ │ │ │ │ ├── bar.go │ │ │ │ └── display.go │ │ │ ├── mc/ │ │ │ │ └── migrate.go │ │ │ ├── refs/ │ │ │ │ └── refs.go │ │ │ ├── replay/ │ │ │ │ ├── cache.go │ │ │ │ ├── cleanup.go │ │ │ │ ├── drop.go │ │ │ │ ├── graft.go │ │ │ │ ├── misc.go │ │ │ │ ├── replay.go │ │ │ │ ├── unbranch.go │ │ │ │ └── update.go │ │ │ ├── stat/ │ │ │ │ ├── az.go │ │ │ │ ├── color.go │ │ │ │ ├── draw.go │ │ │ │ ├── size.go │ │ │ │ ├── stat.go │ │ │ │ ├── stat_test.go │ │ │ │ └── table.go │ │ │ └── tr/ │ │ │ ├── README.md │ │ │ ├── languages/ │ │ │ │ └── zh-CN.toml │ │ │ ├── tr.go │ │ │ └── tr_test.go │ │ └── winres.toml │ ├── zeta/ │ │ ├── crate.toml │ │ ├── main.go │ │ └── winres.toml │ ├── zeta-mc/ │ │ ├── crate.toml │ │ ├── main.go │ │ ├── migrate.go │ │ ├── msic.go │ │ └── winres.toml │ └── zeta-serve/ │ ├── command_encrypt.go │ ├── command_httpd.go │ ├── command_keygen.go │ ├── command_sshd.go │ ├── global.go │ ├── main.go │ ├── shutdown.go │ ├── shutdown_other.go │ └── shutdown_windows.go ├── docs/ │ ├── README.md │ ├── cdc.md │ ├── config.md │ ├── design.md │ ├── hot.md │ ├── object-format.md │ ├── pack-format.md │ ├── protocol.md │ ├── pull-strategy.md │ ├── sparse-checkout.md │ ├── stash.md │ ├── switch.md │ ├── version-negotiation.md │ ├── zeta.toml │ └── zeta.toml.example ├── go.mod ├── go.sum ├── modules/ │ ├── README.md │ ├── base58/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── alphabet.go │ │ ├── base58.go │ │ ├── base58_test.go │ │ ├── base58bench_test.go │ │ ├── base58check.go │ │ ├── base58check_test.go │ │ ├── cov_report.sh │ │ ├── doc.go │ │ ├── example_test.go │ │ └── genalphabet.go │ ├── binary/ │ │ ├── read.go │ │ └── write.go │ ├── bitmap/ │ │ ├── LICENSE │ │ ├── bitmap.go │ │ └── bitmap_test.go │ ├── chardet/ │ │ ├── 2022.go │ │ ├── LICENSE │ │ ├── VERSION │ │ ├── detector.go │ │ ├── encoding.go │ │ ├── icu-license.html │ │ ├── multi_byte.go │ │ ├── recognizer.go │ │ ├── single_byte.go │ │ ├── unicode.go │ │ └── utf8.go │ ├── command/ │ │ ├── command.go │ │ ├── shepherd.go │ │ ├── shepherd_linux.go │ │ ├── shepherd_test.go │ │ ├── shepherd_unix.go │ │ ├── shepherd_win.go │ │ └── util.go │ ├── crc/ │ │ └── reader.go │ ├── deflect/ │ │ ├── az.go │ │ ├── deflect.go │ │ ├── deflect_test.go │ │ ├── du.go │ │ ├── pack.go │ │ └── struct.go │ ├── diferenco/ │ │ ├── MERGE_PARALLEL.md │ │ ├── README.md │ │ ├── algorithms_bench_test.go │ │ ├── color/ │ │ │ └── color.go │ │ ├── diferenco.go │ │ ├── diferenco_test.go │ │ ├── gen_unicode.go │ │ ├── histogram.go │ │ ├── histogram_test.go │ │ ├── lcs/ │ │ │ ├── LICENSE │ │ │ ├── common.go │ │ │ ├── common_test.go │ │ │ ├── doc.go │ │ │ ├── git.sh │ │ │ ├── labels.go │ │ │ ├── old.go │ │ │ ├── old_test.go │ │ │ └── sequence.go │ │ ├── merge.go │ │ ├── merge_parallel.go │ │ ├── merge_parallel_bench_test.go │ │ ├── merge_parallel_test.go │ │ ├── merge_test.go │ │ ├── minimal.go │ │ ├── minimal_test.go │ │ ├── myers.go │ │ ├── myers_bench_test.go │ │ ├── myers_test.go │ │ ├── onp.go │ │ ├── onp_test.go │ │ ├── patience.go │ │ ├── patience_bench_test.go │ │ ├── patience_test.go │ │ ├── regression_test.go │ │ ├── sink.go │ │ ├── sink_test.go │ │ ├── suffixarray.go │ │ ├── suffixarray_test.go │ │ ├── testdata/ │ │ │ ├── a.txt │ │ │ ├── b.txt │ │ │ ├── css_1.css │ │ │ ├── css_2.css │ │ │ ├── simple_1.scss │ │ │ └── simple_2.scss │ │ ├── text.go │ │ ├── unicode.go │ │ ├── unicode_data.go │ │ ├── unicode_test.go │ │ ├── unified.go │ │ └── unified_encoder.go │ ├── env/ │ │ ├── broker.go │ │ ├── builder.go │ │ ├── constant.go │ │ ├── env.go │ │ ├── env_test.go │ │ ├── env_unix.go │ │ ├── env_windows.go │ │ └── env_windows_test.go │ ├── fnmatch/ │ │ ├── LICENSE │ │ ├── VERSION │ │ ├── fnmatch.go │ │ └── fnmatch_test.go │ ├── gcfg/ │ │ ├── LICENSE │ │ ├── Makefile │ │ ├── VERSION │ │ ├── doc.go │ │ ├── errors.go │ │ ├── errors_test.go │ │ ├── example_test.go │ │ ├── issues_test.go │ │ ├── read.go │ │ ├── read_test.go │ │ ├── scanner/ │ │ │ ├── errors.go │ │ │ ├── example_test.go │ │ │ ├── scanner.go │ │ │ └── scanner_test.go │ │ ├── set.go │ │ ├── token/ │ │ │ ├── position.go │ │ │ ├── position_test.go │ │ │ ├── serialize.go │ │ │ ├── serialize_test.go │ │ │ └── token.go │ │ └── types/ │ │ ├── bool.go │ │ ├── doc.go │ │ ├── enum.go │ │ ├── enum_test.go │ │ ├── int.go │ │ ├── int_test.go │ │ ├── scan.go │ │ └── scan_test.go │ ├── git/ │ │ ├── branch.go │ │ ├── command.go │ │ ├── commit.go │ │ ├── commit_test.go │ │ ├── config/ │ │ │ ├── config.go │ │ │ ├── decoder.go │ │ │ ├── option.go │ │ │ └── section.go │ │ ├── constant.go │ │ ├── decode.go │ │ ├── error.go │ │ ├── filemode.go │ │ ├── gitobj/ │ │ │ ├── LICENSE.md │ │ │ ├── README.md │ │ │ ├── SECURITY.md │ │ │ ├── VERSION │ │ │ ├── backend.go │ │ │ ├── backend_nix.go │ │ │ ├── backend_test.go │ │ │ ├── backend_windows.go │ │ │ ├── blob.go │ │ │ ├── blob_test.go │ │ │ ├── commit.go │ │ │ ├── commit_test.go │ │ │ ├── errors/ │ │ │ │ ├── errors.go │ │ │ │ └── errors_test.go │ │ │ ├── errors.go │ │ │ ├── errors_test.go │ │ │ ├── file_storer.go │ │ │ ├── memory_storer.go │ │ │ ├── memory_storer_test.go │ │ │ ├── object.go │ │ │ ├── object_db.go │ │ │ ├── object_db_test.go │ │ │ ├── object_reader.go │ │ │ ├── object_reader_test.go │ │ │ ├── object_type.go │ │ │ ├── object_type_test.go │ │ │ ├── object_writer.go │ │ │ ├── object_writer_test.go │ │ │ ├── pack/ │ │ │ │ ├── bounds.go │ │ │ │ ├── bounds_test.go │ │ │ │ ├── chain.go │ │ │ │ ├── chain_base.go │ │ │ │ ├── chain_base_test.go │ │ │ │ ├── chain_delta.go │ │ │ │ ├── chain_delta_test.go │ │ │ │ ├── chain_test.go │ │ │ │ ├── delayed_object.go │ │ │ │ ├── errors.go │ │ │ │ ├── errors_test.go │ │ │ │ ├── index.go │ │ │ │ ├── index_decode.go │ │ │ │ ├── index_decode_test.go │ │ │ │ ├── index_entry.go │ │ │ │ ├── index_test.go │ │ │ │ ├── index_v1.go │ │ │ │ ├── index_v1_test.go │ │ │ │ ├── index_v2.go │ │ │ │ ├── index_v2_test.go │ │ │ │ ├── index_version.go │ │ │ │ ├── io.go │ │ │ │ ├── io_test.go │ │ │ │ ├── object.go │ │ │ │ ├── object_test.go │ │ │ │ ├── packfile.go │ │ │ │ ├── packfile_decode.go │ │ │ │ ├── packfile_decode_test.go │ │ │ │ ├── packfile_test.go │ │ │ │ ├── set.go │ │ │ │ ├── set_test.go │ │ │ │ ├── storage.go │ │ │ │ ├── type.go │ │ │ │ └── type_test.go │ │ │ ├── storage/ │ │ │ │ ├── backend.go │ │ │ │ ├── decompressing_readcloser.go │ │ │ │ ├── multi_storage.go │ │ │ │ └── storage.go │ │ │ ├── storer.go │ │ │ ├── tag.go │ │ │ ├── tag_test.go │ │ │ ├── tree.go │ │ │ └── tree_test.go │ │ ├── hash.go │ │ ├── hash_test.go │ │ ├── object.go │ │ ├── odb.go │ │ ├── reference.go │ │ ├── reftable/ │ │ │ └── reftable.go │ │ ├── remote.go │ │ ├── repo.go │ │ ├── repo_test.go │ │ ├── signature.go │ │ ├── stats/ │ │ │ ├── commit-graph.go │ │ │ └── status.go │ │ ├── tag.go │ │ ├── tree.go │ │ ├── updateref.go │ │ ├── util.go │ │ ├── version.go │ │ └── version_test.go │ ├── hexview/ │ │ ├── format.go │ │ └── format_test.go │ ├── keyring/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── VERSION │ │ ├── keyring.go │ │ ├── keyring_darwin.go │ │ ├── keyring_darwin_security.go │ │ ├── keyring_darwin_security_test.go │ │ ├── keyring_darwin_test.go │ │ ├── keyring_file.go │ │ ├── keyring_file_test.go │ │ ├── keyring_test.go │ │ ├── keyring_unix.go │ │ ├── keyring_windows.go │ │ └── secret_service/ │ │ └── secret_service.go │ ├── lfs/ │ │ ├── LICENSE │ │ ├── error.go │ │ ├── pointer.go │ │ └── pointer_test.go │ ├── locale/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── error.go │ │ ├── locale.go │ │ ├── locale_darwin.go │ │ ├── locale_js.go │ │ ├── locale_posix.go │ │ ├── locale_shared.go │ │ └── locale_windows.go │ ├── merkletrie/ │ │ ├── LICENSE │ │ ├── change.go │ │ ├── difftree.go │ │ ├── doc.go │ │ ├── doubleiter.go │ │ ├── filesystem/ │ │ │ ├── node.go │ │ │ └── node_test.go │ │ ├── index/ │ │ │ └── node.go │ │ ├── internal/ │ │ │ ├── frame/ │ │ │ │ └── frame.go │ │ │ └── fsnoder/ │ │ │ ├── dir.go │ │ │ ├── doc.go │ │ │ ├── file.go │ │ │ └── new.go │ │ ├── iter.go │ │ └── noder/ │ │ ├── noder.go │ │ ├── path.go │ │ ├── sparse.go │ │ └── sparse_test.go │ ├── mime/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── VERSION │ │ ├── internal/ │ │ │ ├── charset/ │ │ │ │ ├── charset.go │ │ │ │ └── charset_test.go │ │ │ ├── csv/ │ │ │ │ ├── parser.go │ │ │ │ └── parser_test.go │ │ │ ├── json/ │ │ │ │ ├── parser.go │ │ │ │ └── parser_test.go │ │ │ ├── magic/ │ │ │ │ ├── archive.go │ │ │ │ ├── archive_test.go │ │ │ │ ├── audio.go │ │ │ │ ├── binary.go │ │ │ │ ├── database.go │ │ │ │ ├── document.go │ │ │ │ ├── font.go │ │ │ │ ├── ftyp.go │ │ │ │ ├── geo.go │ │ │ │ ├── image.go │ │ │ │ ├── magic.go │ │ │ │ ├── magic_test.go │ │ │ │ ├── meteo.go │ │ │ │ ├── ms_office.go │ │ │ │ ├── netpbm.go │ │ │ │ ├── ogg.go │ │ │ │ ├── text.go │ │ │ │ ├── text_csv.go │ │ │ │ ├── text_test.go │ │ │ │ ├── video.go │ │ │ │ ├── zip.go │ │ │ │ └── zip_test.go │ │ │ ├── markup/ │ │ │ │ ├── markup.go │ │ │ │ └── markup_test.go │ │ │ └── scan/ │ │ │ ├── bytes.go │ │ │ └── bytes_test.go │ │ ├── mime.go │ │ ├── mime_test.go │ │ ├── mimetsx │ │ ├── mimetype.go │ │ ├── sanitize.go │ │ └── tree.go │ ├── oss/ │ │ ├── bucket.go │ │ ├── delete.go │ │ ├── error.go │ │ ├── gcs.example │ │ ├── list.go │ │ ├── misc.go │ │ ├── misc_test.go │ │ ├── multipart.go │ │ ├── oss.go │ │ ├── s3.example │ │ ├── signature.go │ │ └── upload.go │ ├── patchview/ │ │ ├── highlight.go │ │ ├── highlight_test.go │ │ ├── renderer.go │ │ ├── status_bar.go │ │ ├── styles.go │ │ └── view.go │ ├── plumbing/ │ │ ├── LICENSE │ │ ├── error.go │ │ ├── filemode/ │ │ │ ├── filemode.go │ │ │ └── filemode_test.go │ │ ├── format/ │ │ │ ├── ignore/ │ │ │ │ ├── dir.go │ │ │ │ ├── doc.go │ │ │ │ ├── ignore_test.go │ │ │ │ ├── matcher.go │ │ │ │ └── pattern.go │ │ │ ├── index/ │ │ │ │ ├── decoder.go │ │ │ │ ├── decoder_test.go │ │ │ │ ├── doc.go │ │ │ │ ├── encoder.go │ │ │ │ ├── encoder_test.go │ │ │ │ ├── index.go │ │ │ │ └── match.go │ │ │ ├── pktline/ │ │ │ │ ├── encoder.go │ │ │ │ ├── encoder_test.go │ │ │ │ ├── scanner.go │ │ │ │ └── scanner_test.go │ │ │ └── readme.md │ │ ├── hash.go │ │ ├── reference.go │ │ └── validate.go │ ├── progressbar/ │ │ ├── LICENSE │ │ ├── VERSION │ │ ├── colorstring/ │ │ │ ├── LICENSE │ │ │ └── colorstring.go │ │ ├── progressbar.go │ │ └── spinners.go │ ├── securejoin/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── join.go │ │ └── vfs.go │ ├── shlex/ │ │ ├── LICENSE │ │ └── shlex.go │ ├── streamio/ │ │ ├── bytes.go │ │ ├── io.go │ │ ├── io_test.go │ │ ├── sync.go │ │ ├── zlib.go │ │ ├── zlib_test.go │ │ ├── zstd.go │ │ └── zstd_test.go │ ├── strengthen/ │ │ ├── du.go │ │ ├── du_test.go │ │ ├── du_windows.go │ │ ├── duration.go │ │ ├── duration_test.go │ │ ├── formatsize.go │ │ ├── fs_unix.go │ │ ├── fs_windows.go │ │ ├── limitwriter.go │ │ ├── measure.go │ │ ├── net.go │ │ ├── os_unix.go │ │ ├── os_windows.go │ │ ├── path.go │ │ ├── path_test.go │ │ ├── rid.go │ │ ├── rid_test.go │ │ ├── statfs.go │ │ ├── statfs_linux.go │ │ ├── statfs_openbsd.go │ │ ├── statfs_test.go │ │ ├── statfs_unix.go │ │ ├── statfs_windows.go │ │ └── strings.go │ ├── symlink/ │ │ ├── LICENSE │ │ ├── LICENSE.APACHE │ │ ├── LICENSE.BSD │ │ ├── doc.go │ │ ├── fs.go │ │ ├── fs_unix.go │ │ └── fs_windows.go │ ├── systemproxy/ │ │ ├── dialer.go │ │ ├── env.go │ │ ├── http.go │ │ ├── http_test.go │ │ ├── internal/ │ │ │ ├── readme.md │ │ │ └── socks/ │ │ │ ├── client.go │ │ │ └── socks.go │ │ ├── pre_host.go │ │ ├── pre_host_test.go │ │ ├── proxy.go │ │ ├── proxy_darwin.go │ │ ├── proxy_darwin_test.go │ │ ├── proxy_others.go │ │ ├── proxy_test.go │ │ ├── proxy_windows.go │ │ ├── socks5.go │ │ ├── url.go │ │ └── url_test.go │ ├── term/ │ │ ├── color.go │ │ ├── fmt.go │ │ ├── fmt_test.go │ │ ├── sanitized.go │ │ ├── terminal.go │ │ ├── terminal_others.go │ │ └── terminal_windows.go │ ├── trace/ │ │ ├── error.go │ │ ├── trace.go │ │ └── trace_test.go │ ├── tui/ │ │ ├── color.go │ │ ├── confirm.go │ │ ├── input.go │ │ └── pager.go │ ├── vfs/ │ │ ├── LICENSE │ │ ├── bound.go │ │ ├── bound_test.go │ │ ├── glob.go │ │ └── vfs.go │ ├── viewport/ │ │ ├── LICENSE │ │ ├── README.md │ │ ├── configuration.go │ │ ├── content_manager.go │ │ ├── display_manager.go │ │ ├── filterableviewport/ │ │ │ ├── filterableviewport.go │ │ │ ├── filterableviewport_filterlineposition_test.go │ │ │ ├── filterableviewport_filterlineprefix_test.go │ │ │ ├── filterableviewport_saving_test.go │ │ │ ├── filterableviewport_searchhistory_test.go │ │ │ ├── filterableviewport_test.go │ │ │ ├── filtermode.go │ │ │ ├── keymap.go │ │ │ └── styles.go │ │ ├── highlight.go │ │ ├── internal/ │ │ │ ├── fuzzy/ │ │ │ │ ├── fuzzy.go │ │ │ │ └── fuzzy_test.go │ │ │ └── test_util.go │ │ ├── item/ │ │ │ ├── ansi.go │ │ │ ├── ansi_test.go │ │ │ ├── concat.go │ │ │ ├── concat_test.go │ │ │ ├── item.go │ │ │ ├── item_bench_test.go │ │ │ ├── model.go │ │ │ ├── multiline.go │ │ │ ├── multiline_test.go │ │ │ ├── safecast.go │ │ │ ├── safecast_test.go │ │ │ ├── single.go │ │ │ ├── single_test.go │ │ │ ├── string.go │ │ │ └── string_test.go │ │ ├── keymap.go │ │ ├── navigation_manager.go │ │ ├── object.go │ │ ├── styles.go │ │ ├── viewport.go │ │ ├── viewport_multiline_test.go │ │ ├── viewport_no_selection_no_wrap_test.go │ │ ├── viewport_no_selection_wrap_test.go │ │ ├── viewport_postheader_test.go │ │ ├── viewport_prefooter_test.go │ │ ├── viewport_progressbar_test.go │ │ ├── viewport_saving_test.go │ │ ├── viewport_selection_no_wrap_test.go │ │ ├── viewport_selection_wrap_test.go │ │ └── viewport_test_util_test.go │ ├── wildmatch/ │ │ ├── LICENSE.md │ │ ├── package.go │ │ ├── wildmatch.go │ │ ├── wildmatch_casefold.go │ │ ├── wildmatch_nocasefold.go │ │ └── wildmatch_test.go │ └── zeta/ │ ├── backend/ │ │ ├── decode.go │ │ ├── encode.go │ │ ├── errors.go │ │ ├── file_storer.go │ │ ├── odb.go │ │ ├── odb_test.go │ │ ├── pack/ │ │ │ ├── bounds.go │ │ │ ├── encode.go │ │ │ ├── errors.go │ │ │ ├── index.go │ │ │ ├── index_version.go │ │ │ ├── pack_test.go │ │ │ ├── packfile.go │ │ │ ├── reader.go │ │ │ ├── set.go │ │ │ └── storage.go │ │ ├── pack-objects.go │ │ ├── pack-objects_test.go │ │ ├── prune.go │ │ ├── storage/ │ │ │ └── storage.go │ │ └── unpack.go │ ├── config/ │ │ ├── boolean_test.go │ │ ├── codec_toml.go │ │ ├── codec_toml_test.go │ │ ├── compat_test.go │ │ ├── config.go │ │ ├── config_test.toml │ │ ├── config_test_bad.toml │ │ ├── decode.go │ │ ├── decode_test.go │ │ ├── display.go │ │ ├── document.go │ │ ├── document_test.go │ │ ├── encode.go │ │ ├── encode_test.go │ │ ├── type.go │ │ ├── type_test.go │ │ ├── validate.go │ │ ├── value.go │ │ └── value_test.go │ ├── error.go │ ├── object/ │ │ ├── blob.go │ │ ├── change.go │ │ ├── change_adaptor.go │ │ ├── commit.go │ │ ├── commit_test.go │ │ ├── commit_walker.go │ │ ├── commit_walker_atime.go │ │ ├── commit_walker_bfs.go │ │ ├── commit_walker_bfs_filtered.go │ │ ├── commit_walker_ctime.go │ │ ├── commit_walker_limit.go │ │ ├── commit_walker_path.go │ │ ├── commit_walker_test.go │ │ ├── commit_walker_topo_order.go │ │ ├── difftree.go │ │ ├── file.go │ │ ├── fragments.go │ │ ├── merge_base.go │ │ ├── object.go │ │ ├── patch.go │ │ ├── patch_test.go │ │ ├── rename.go │ │ ├── storage.go │ │ ├── tag.go │ │ ├── tree.go │ │ ├── tree_test.go │ │ └── treenode.go │ ├── reflog/ │ │ ├── reflog.go │ │ └── reflog_test.go │ └── refs/ │ ├── backend.go │ ├── error.go │ ├── filesystem.go │ ├── filesystem_test.go │ ├── references.go │ ├── rules.go │ └── rules_test.go ├── pkg/ │ ├── command/ │ │ ├── README.md │ │ ├── command.go │ │ ├── command_add.go │ │ ├── command_branch.go │ │ ├── command_cat.go │ │ ├── command_check_ignore.go │ │ ├── command_checkout.go │ │ ├── command_cherry_pick.go │ │ ├── command_clean.go │ │ ├── command_commit.go │ │ ├── command_config.go │ │ ├── command_diff.go │ │ ├── command_fetch.go │ │ ├── command_for_each_ref.go │ │ ├── command_gc.go │ │ ├── command_hash_object.go │ │ ├── command_init.go │ │ ├── command_log.go │ │ ├── command_ls_files.go │ │ ├── command_ls_tree.go │ │ ├── command_merge.go │ │ ├── command_merge_base.go │ │ ├── command_merge_file.go │ │ ├── command_merge_tree.go │ │ ├── command_pull.go │ │ ├── command_push.go │ │ ├── command_rebase.go │ │ ├── command_remote.go │ │ ├── command_rename.go │ │ ├── command_reset.go │ │ ├── command_restore.go │ │ ├── command_rev_parse.go │ │ ├── command_revert.go │ │ ├── command_rm.go │ │ ├── command_show.go │ │ ├── command_stash.go │ │ ├── command_status.go │ │ ├── command_switch.go │ │ ├── command_tag.go │ │ ├── command_version.go │ │ └── msic.go │ ├── kong/ │ │ ├── COPYING │ │ ├── FORK.md │ │ ├── README.md │ │ ├── build.go │ │ ├── callbacks.go │ │ ├── camelcase.go │ │ ├── context.go │ │ ├── defaults.go │ │ ├── doc.go │ │ ├── error.go │ │ ├── exit.go │ │ ├── global.go │ │ ├── guesswidth.go │ │ ├── help.go │ │ ├── hooks.go │ │ ├── interpolate.go │ │ ├── kong.go │ │ ├── levenshtein.go │ │ ├── mapper.go │ │ ├── model.go │ │ ├── negatable.go │ │ ├── options.go │ │ ├── resolver.go │ │ ├── scanner.go │ │ ├── tag.go │ │ ├── util.go │ │ └── visit.go │ ├── migrate/ │ │ ├── migrate.go │ │ └── progressbar.go │ ├── progress/ │ │ ├── indicators.go │ │ ├── multibar.go │ │ ├── progressbar.go │ │ └── progressbar_test.go │ ├── serve/ │ │ ├── argon2id/ │ │ │ ├── LICENSE │ │ │ ├── VERSION │ │ │ ├── argon2id.go │ │ │ └── argon2id_test.go │ │ ├── config.go │ │ ├── database/ │ │ │ ├── access_level.go │ │ │ ├── branches.go │ │ │ ├── database.go │ │ │ ├── error.go │ │ │ ├── keys.go │ │ │ ├── member.go │ │ │ ├── namespaces.go │ │ │ ├── reference.go │ │ │ ├── repositories.go │ │ │ ├── tags.go │ │ │ ├── types.go │ │ │ ├── update.go │ │ │ ├── user.go │ │ │ ├── user_test.go │ │ │ └── zeta.sql │ │ ├── encrypt.go │ │ ├── encrypt_test.go │ │ ├── httpserver/ │ │ │ ├── auth.go │ │ │ ├── bearer.go │ │ │ ├── config.go │ │ │ ├── management.go │ │ │ ├── metadata.go │ │ │ ├── request.go │ │ │ ├── response.go │ │ │ ├── server.go │ │ │ └── transfer.go │ │ ├── languages/ │ │ │ └── zh-CN.toml │ │ ├── languages.go │ │ ├── languages_test.go │ │ ├── odb/ │ │ │ ├── cache.go │ │ │ ├── database.go │ │ │ ├── decode.go │ │ │ ├── encode.go │ │ │ ├── odb.go │ │ │ ├── oss.go │ │ │ ├── quarantine.go │ │ │ └── unpack.go │ │ ├── protocol/ │ │ │ ├── input.go │ │ │ ├── pack.go │ │ │ ├── protocol.go │ │ │ └── range.go │ │ ├── repo/ │ │ │ ├── push.go │ │ │ ├── repositories.go │ │ │ ├── resources/ │ │ │ │ ├── README.md │ │ │ │ └── zetaignore │ │ │ └── revision.go │ │ └── sshserver/ │ │ ├── auth.go │ │ ├── command.go │ │ ├── command_ls-remote.go │ │ ├── command_metadata.go │ │ ├── command_objects.go │ │ ├── command_push.go │ │ ├── command_test.go │ │ ├── config.go │ │ ├── parseargv.go │ │ ├── rainbow/ │ │ │ ├── art.go │ │ │ ├── art_test.go │ │ │ └── rainbow.go │ │ ├── server.go │ │ └── session.go │ ├── tr/ │ │ ├── README.md │ │ ├── languages/ │ │ │ └── zh-CN.toml │ │ ├── translate.go │ │ └── translate_test.go │ ├── transport/ │ │ ├── client/ │ │ │ └── client.go │ │ ├── endpoint.go │ │ ├── http/ │ │ │ ├── auth.go │ │ │ ├── base.go │ │ │ ├── base_test.go │ │ │ ├── blob.go │ │ │ ├── external.go │ │ │ ├── external_test.go │ │ │ ├── metadata.go │ │ │ ├── netrc.go │ │ │ ├── netrc_test.go │ │ │ ├── push.go │ │ │ ├── reference.go │ │ │ └── trace.go │ │ ├── ssh/ │ │ │ ├── auth.go │ │ │ ├── base.go │ │ │ ├── command.go │ │ │ ├── command_test.go │ │ │ ├── config/ │ │ │ │ ├── AUTHORS.txt │ │ │ │ ├── CHANGELOG.md │ │ │ │ ├── LICENSE │ │ │ │ ├── Makefile │ │ │ │ ├── README.md │ │ │ │ ├── config.go │ │ │ │ ├── config_test.go │ │ │ │ ├── config_unix.go │ │ │ │ ├── config_windows.go │ │ │ │ ├── lexer.go │ │ │ │ ├── parser.go │ │ │ │ ├── parser_test.go │ │ │ │ ├── position.go │ │ │ │ ├── testdata/ │ │ │ │ │ ├── anotherfile │ │ │ │ │ ├── config-no-ending-newline │ │ │ │ │ ├── config1 │ │ │ │ │ ├── config1-with-match-directive │ │ │ │ │ ├── config2 │ │ │ │ │ ├── config3 │ │ │ │ │ ├── config4 │ │ │ │ │ ├── dos-lines │ │ │ │ │ ├── eol-comments │ │ │ │ │ ├── eqsign │ │ │ │ │ ├── extraspace │ │ │ │ │ ├── fuzz/ │ │ │ │ │ │ └── FuzzDecode/ │ │ │ │ │ │ ├── 3cfc035ae4867ca13fa7bfaf2793731f05fd4d59c3af8761ea365c7485c752fd │ │ │ │ │ │ └── 4f8b378d89916e9b4fd796f74f5b12efb5cd85faaba9fea8fbe419d6af63add8 │ │ │ │ │ ├── identities │ │ │ │ │ ├── include │ │ │ │ │ ├── include-recursive │ │ │ │ │ ├── invalid-port │ │ │ │ │ ├── match-directive │ │ │ │ │ ├── negated │ │ │ │ │ └── system-include │ │ │ │ ├── token.go │ │ │ │ ├── validators.go │ │ │ │ └── validators_test.go │ │ │ ├── knownhosts/ │ │ │ │ ├── LICENSE │ │ │ │ ├── knownhosts.go │ │ │ │ └── knownhosts_test.go │ │ │ ├── metadata.go │ │ │ ├── objects.go │ │ │ └── push.go │ │ ├── struct.go │ │ ├── struct_test.go │ │ ├── transport.go │ │ └── util.go │ ├── version/ │ │ ├── uname.go │ │ ├── uname_linux.go │ │ ├── uname_test.go │ │ ├── uname_unix.go │ │ ├── uname_windows.go │ │ ├── verison.go │ │ └── version_test.go │ └── zeta/ │ ├── aria2.go │ ├── blame.go │ ├── blame_test.go │ ├── branch.go │ ├── cat.go │ ├── cdc.go │ ├── config.go │ ├── display.go │ ├── dragonfly.go │ ├── editor.go │ ├── fetch.go │ ├── gc.go │ ├── log.go │ ├── log_test.go │ ├── lstree.go │ ├── merge_file.go │ ├── merge_tree.go │ ├── misc.go │ ├── misc_test.go │ ├── objects.go │ ├── odb/ │ │ ├── commit.go │ │ ├── counting-objects.go │ │ ├── decode.go │ │ ├── index.go │ │ ├── merge.go │ │ ├── merge_driver.go │ │ ├── merge_test.go │ │ ├── merge_text.go │ │ ├── odb.go │ │ ├── pack.go │ │ ├── references.go │ │ ├── transfer.go │ │ ├── tree.go │ │ ├── unpack.go │ │ ├── unpack_test.go │ │ └── util.go │ ├── options.go │ ├── pager.go │ ├── promisor.go │ ├── push.go │ ├── references.go │ ├── references_test.go │ ├── repository.go │ ├── revision.go │ ├── revision_test.go │ ├── safetensors.go │ ├── safetensors_test.go │ ├── show.go │ ├── showdiff.go │ ├── status.go │ ├── switch.go │ ├── switch_test.go │ ├── tag.go │ ├── transfer.go │ ├── tree.go │ ├── update.go │ ├── worktree.go │ ├── worktree_bsd.go │ ├── worktree_checkout.go │ ├── worktree_co-extra.go │ ├── worktree_commit.go │ ├── worktree_diff.go │ ├── worktree_drawin.go │ ├── worktree_linux.go │ ├── worktree_ls_files.go │ ├── worktree_merge.go │ ├── worktree_pull.go │ ├── worktree_rebase.go │ ├── worktree_rename.go │ ├── worktree_replay.go │ ├── worktree_restore.go │ ├── worktree_stash.go │ ├── worktree_status.go │ ├── worktree_test.go │ ├── worktree_tree.go │ ├── worktree_unix_other.go │ └── worktree_windows.go ├── script/ │ ├── inno.sh │ ├── release.bat │ ├── release.ps1 │ ├── release.sh │ └── zeta.iss ├── share/ │ ├── zeta-serve-httpd.toml │ └── zeta-serve-sshd.toml └── utils/ ├── auth/ │ └── auth.go ├── bar2/ │ └── main.go ├── cli/ │ └── command_test.go ├── darwinproxy/ │ └── darwinproxy_test.go ├── diffbug/ │ ├── a.txt │ ├── b.txt │ ├── c.txt │ └── difffix_test.go ├── fs_warning/ │ └── main.go ├── keyring/ │ └── main.go ├── lcs/ │ └── lcs.go ├── match/ │ └── match_test.go ├── mimex/ │ ├── a.txt │ ├── a16.txt │ ├── a8.txt │ ├── b.txt │ ├── b16.txt │ ├── conflict-16-8-16.txt │ ├── conflict-16.txt │ ├── conflict.txt │ ├── mime_test.go │ ├── o16.txt │ └── origin.txt ├── rename/ │ ├── rename_test.go │ └── rename_windows.go ├── setv/ │ └── main.go └── term/ └── detect.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" commit-message: prefix: "build" - package-ecosystem: "gomod" directory: "/" schedule: interval: "daily" commit-message: prefix: "build" groups: golang.org: patterns: - "golang.org/*" ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: paths-ignore: # https://help.github.com/en/actions/reference/workflow-syntax-for-github-actions - "docs/**" - "**.md" - "**.txt" - "LICENSE" pull_request: paths-ignore: - "docs/**" - "**.md" - "**.txt" - "LICENSE" jobs: build: strategy: matrix: platform: [ubuntu-latest, windows-latest] include: - platform: ubuntu-latest release_command: ./script/release.sh - platform: windows-latest release_command: pwsh -NoProfile -NoLogo -ExecutionPolicy unrestricted -File "./script/release.ps1" runs-on: ${{ matrix.platform }} steps: - uses: actions/checkout@v6 with: fetch-depth: 1 - name: Setup go uses: actions/setup-go@v6 with: go-version: "stable" - name: Build run: ${{ matrix.release_command }} - name: Upload release uses: svenstaro/upload-release-action@v2 if: startsWith(github.ref, 'refs/tags/') with: file_glob: true file: out/* tag: ${{ github.ref }} repo_token: ${{ secrets.GITHUB_TOKEN }} overwrite: true ================================================ FILE: .gitignore ================================================ bin/* /build/ /dest/ /etc/zeta.toml /cmd/zeta/zeta *.exe *.dll *.so *.a *.tar.gz *.zip /*.sh local/* .DS_Store *.gop1 *.tomlp1 *.modp1 *.sump1 *.rej *.mdp1 /out/ /.vscode/ .idea/* vendor/* Makefilep1 VERSIONp1 ================================================ FILE: .golangci.yml ================================================ version: 2 run: timeout: 5m issues-exit-code: 1 tests: true output: format: colored-line-number print-issued-lines: true print-linter-name: true linters: enable: # 复杂度检查(核心) # - nestif # 检查嵌套的 if 语句 # 静态分析 - staticcheck # 静态分析工具 # 代码简化 - predeclared # 检查是否使用了 Go 预定义的标识符 - unconvert # 检查不必要的类型转换 - wastedassign # 检查浪费的赋值语句 # 错误处理 - errcheck # 检查未处理的错误 - errorlint # 检查错误处理中的常见问题 # 其他有用的 linter - ineffassign # 检查无效赋值 linters-settings: nestif: min-complexity: 5 issues: exclude-rules: # 测试文件可以放宽一些限制 - path: _test\.go linters: - errcheck # 生成的代码可以跳过检查 - path: '.*\.pb\.go' linters: - all max-issues-per-linter: 0 max-same-issues: 0 new: false ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ## [0.23.0] - 2026-04-22 ### Added - **Hot Diff/Show Commands**: Add `hot diff` and `hot show` commands for viewing differences in git repositories - **Interactive Diff Navigation**: Add `--nav` flag to `zeta diff` and `zeta show` commands for built-in interactive diff viewer with syntax highlighting - **Advanced Viewport Module**: Import feature-rich viewport component with text wrapping, selection, and filtering capabilities - **MultiBar Progress**: Rewrite progress bar component using `bubbles/progress` with concurrent multi-bar rendering and EWMA speed tracking - **LOONG64 Support**: Enable builds for LoongArch64 architecture ### Changed - **Patch View Improvements**: - Refactor patchview module with improved navigation mode - Add LRU cache for syntax highlighting (up to 1000 entries) - Remove standalone word-diff in favor of integrated nav mode - Enhance diff theme and rendering - **TUI Enhancements**: - Switch to custom viewport implementation for better control - Optimize pager rendering performance - Improve word diff performance - **Code Cleanup**: - Remove legacy `diffformat.go` module (287 lines removed) - Code tidy and refactoring across multiple modules ### Fixed - Fix double close issue in `writeCredentials` for keyring file storage - Harden keyring file storage with atomic writes and lock handling - Fix `truncatePath` in hot commands - Fix pager status bar space display - Fix multi `-m` flag handling in commit command - Fix small bug in diferenco module ### Dependencies - **Updated**: - `charm.land/bubbletea/v2` from v2.0.2 to v2.0.6 - `charm.land/lipgloss/v2` from v2.0.2 to v2.0.3 - `golang.org/x/crypto` from v0.49.0 to v0.50.0 - `golang.org/x/net` from v0.52.0 to v0.53.0 - `golang.org/x/sys` from v0.42.0 to v0.43.0 - `golang.org/x/term` from v0.41.0 to v0.42.0 - `golang.org/x/text` from v0.35.0 to v0.36.0 - **Added**: `github.com/zeebo/xxh3` v1.1.0 for fast hashing - **Removed**: `github.com/vbauerster/mpb/v8` (replaced by custom MultiBar implementation) ## [0.22.0] - 2026-03-27 ### Added - **FastCDC Chunking**: Implement FastCDC (Content-Defined Chunking) algorithm for AI model storage optimization, supporting Safetensors format (`#7`) - **Word Diff**: Support simple word-level diff in `zeta diff` and `zeta show` commands - **Secure Keyring Storage**: Add keyring support for secure credential storage - macOS: Keychain integration - Windows: Windows Credential Manager integration - Linux: File-based storage backend - **Network Filesystem Warning**: Automatically detect and warn about network filesystems (NFS, Ceph, SMB) with highlighted filesystem names ### Changed - **TUI Framework Migration**: Switch from custom survey module to `charmbracelet/huh` for better terminal UI experience (removed 10,000+ lines of legacy code) - **Improved Table Rendering**: Replace `go-pretty` with `bubbletea table` for better TUI rendering in `zeta hot` commands - **Enhanced Pager**: Add space key support for page navigation in TUI pager - **Diferenco Improvements**: - Add `name` field to `FileStat` - Add `Format()` method to `Patch` - Optimize `MergeParallel` implementation - Improve `SplitWords` algorithm - Enhance Myers diff algorithm - **Performance Optimizations**: - Optimize worktree operations - Improve commit decoding efficiency - Enhance system proxy detection accuracy ### Fixed - Fix multiple keyring issues on Windows and Unix platforms - Fix panic in `wildmatch` pattern matching - Fix tree cache corruption issues - Fix missing context in commit walker - Fix zlib handling edge cases - Fix split words boundary issues - Fix trace color display ### Dependencies - **Go 1.26**: Upgrade to Go 1.26.0 - **Removed**: `testify` testing dependency - **Updated**: - `charm.land` ecosystem modules (bubbles, bubbletea, glamour, huh, lipgloss) - `github.com/ProtonMail/go-crypto` v1.4.1 - `github.com/klauspost/compress` v1.18.5 - `github.com/dgraph-io/ristretto/v2` v2.4.0 - Multiple `golang.org/x` modules ### Documentation - Add CDC (Content-Defined Chunking) documentation (`docs/cdc.md`) - Update README with latest features - Improve documentation organization ### Internationalization - Complete Chinese (zh-CN) translations - Add missing i18n entries ## [0.21.0] - 2025-12-16 ### Added - Initial stable release with core version control features - Metadata and file data separation architecture - Distributed database for metadata storage - Object storage for file content - Efficient transfer protocol - Fragment object support for large files - Support for AI model development, game development, and monorepo scenarios [Unreleased]: https://github.com/antgroup/hugescm/compare/v0.22.0...HEAD [0.22.0]: https://github.com/antgroup/hugescm/compare/v0.21.0...v0.22.0 [0.21.0]: https://github.com/antgroup/hugescm/releases/tag/v0.21.0 ================================================ FILE: LEGAL.md ================================================ Legal Disclaimer Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail. 法律免责声明 关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。 ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ SHELL = /usr/bin/env bash -eo pipefail PKG := github.com/antgroup/hugescm SOURCE_DIR := $(abspath $(dir $(lastword ${MAKEFILE_LIST}))) BUILD_DIR := ${SOURCE_DIR}/_build BUILD_TIME := $(shell date +'%Y-%m-%dT%H:%M:%S%z') BUILD_COMMIT := $(shell git rev-parse --short HEAD 2>/dev/null || echo 'none') BUILD_VERSION := $(shell cat VERSION || echo '0.23.0') GO_PACKAGES := $(shell go list ./... | grep -v '^${PKG}/mock/' | grep -v '^${PKG}/proto/') GO_LDFLAGS := -ldflags '-X ${PKG}/pkg/version.version=${BUILD_VERSION} -X ${PKG}/pkg/version.buildTime=${BUILD_TIME} -X ${PKG}/pkg/version.buildCommit=${BUILD_COMMIT}' .PHONY: all all: zeta zeta-mc hot .PHONY: build build: zeta zeta-mc hot .PHONY: zeta zeta: GOOS=${BUILD_TARGET} GOARCH=${BUILD_ARCH} go build -C cmd/zeta ${GO_LDFLAGS} -o ${CURDIR}/bin/zeta .PHONY: zeta-mc zeta-mc: GOOS=${BUILD_TARGET} GOARCH=${BUILD_ARCH} go build -C cmd/zeta-mc ${GO_LDFLAGS} -o ${CURDIR}/bin/zeta-mc .PHONY: hot hot: GOOS=${BUILD_TARGET} GOARCH=${BUILD_ARCH} go build -C cmd/hot ${GO_LDFLAGS} -o ${CURDIR}/bin/hot .PHONY: zeta-serve zeta-serve: GOOS=${BUILD_TARGET} GOARCH=${BUILD_ARCH} go build -C cmd/zeta-serve ${GO_LDFLAGS} -o ${CURDIR}/bin/zeta-serve ================================================ FILE: README.md ================================================ # HugeSCM - A next generation cloud-based version control system [![license badge](https://img.shields.io/github/license/antgroup/hugescm.svg)](LICENSE) [![Master Branch Status](https://github.com/antgroup/hugescm/workflows/CI/badge.svg)](https://github.com/antgroup/hugescm/actions) [![Latest Release Downloads](https://img.shields.io/github/downloads/antgroup/hugescm/latest/total.svg)](https://github.com/antgroup/hugescm/releases/latest) [![Total Downloads](https://img.shields.io/github/downloads/antgroup/hugescm/total.svg)](https://github.com/antgroup/hugescm/releases) [![Version](https://img.shields.io/github/v/release/antgroup/hugescm)](https://github.com/antgroup/hugescm/releases/latest) [简体中文](./README.zh-CN.md) ## Overview HugeSCM (codename zeta) is a cloud-native version control system designed for large-scale repositories. By separating metadata from file data, it overcomes storage and transmission limitations of traditional VCS like Git and SVN. Ideal for AI model development, game development, and monorepo scenarios. Key features: + **Data separation**: Stores metadata in distributed database, file content in object storage + **Efficient protocol**: Optimized transmission reduces bandwidth and time costs + **Fragment objects**: Handles large binary files (AI models, dependencies) efficiently Built on Git's principles without its historical constraints. ## Use Cases ### AI Model Development - Store checkpoint files (tens to hundreds of GB) - Model version management and incremental updates - Multi-team collaboration ### Game Development - Large binary resource management - Art asset version control ### Dataset Storage - Large-scale dataset version management - Data annotation collaboration ## Documentation ### Design & Architecture | Document | Description | |----------|-------------| | [design.md](./docs/design.md) | Design Philosophy - Core design concepts, architecture overview, differences from Git | | [object-format.md](./docs/object-format.md) | Object Format - Binary formats for Blob, Tree, Commit, Fragments objects | | [pack-format.md](./docs/pack-format.md) | Pack File Format - Object packaging mechanism and index format | | [protocol.md](./docs/protocol.md) | Transport Protocol - HTTP/SSH protocols, authorization, metadata and file transfer | | [version-negotiation.md](./docs/version-negotiation.md) | Version Negotiation - Baseline management, checkout, pull, push workflows | ### Configuration Reference | Document | Description | |----------|-------------| | [config.md](./docs/config.md) | Configuration File - Supported configuration options and environment variables | ### Feature Guides | Document | Description | |----------|-------------| | [switch.md](./docs/switch.md) | Branch Switching - switch command details for switching branches and commits | | [stash.md](./docs/stash.md) | Stash Feature - stash command for temporarily saving work progress | | [sparse-checkout.md](./docs/sparse-checkout.md) | Sparse Checkout - On-demand checkout of specified directories | | [pull-strategy.md](./docs/pull-strategy.md) | Pull Strategy - merge, rebase, fast-forward strategy details | ### Advanced Features | Document | Description | |----------|-------------| | [cdc.md](./docs/cdc.md) | CDC Chunking - Content-Defined Chunking implementation and configuration | | [hot.md](./docs/hot.md) | hot command - Git repository maintenance tool for cleanup, migration, and optimization | ## Build After installing the latest version of Golang, developers can build HugeSCM client using [bali](https://github.com/balibuild/bali) (build packaging tool). ```sh bali -T windows # create rpm,deb,tar,sh pack bali -T linux -A amd64 --pack='rpm,deb,tar,sh' ``` The bali build tool can create `zip`, `deb`, `tar`, `rpm`, `sh (STGZ)` compression/installation packages. ### Windows Installation Package We provide an Inno Setup script. You can use Docker + wine to generate an installation package without Windows: ```shell docker run --rm -i -v "$TOPLEVEL:/work" amake/innosetup xxxxx.iss ``` Before running this, build the Windows binary first: `bali --target=windows --arch=amd64`. > Note: On macOS with Apple Silicon, you can use OrbStack with Rosetta to run this image. ## Usage Users can run `zeta -h` to view all zeta commands, and run `zeta ${command} -h` to view detailed command help. We try to make it easy for git users to get started with zeta, and we will also enhance some commands. For example, many zeta commands support `--json` to format the output as json, which is convenient for integration with various tools. ### Config ```shell zeta config --global user.email 'zeta@example.io' zeta config --global user.name 'Example User' ``` ### Checkout The process to obtain a remote repository in git is called `clone` (or `fetch`). In zeta, we use `checkout`, abbreviated as `co`. Below is how to `checkout` a repository: ```shell zeta co http://zeta.example.io/group/repo xh1 zeta co http://zeta.example.io/group/repo xh1 -s dir1 ``` ### Track and Commit We have implemented git-like `status`, `add`, and `commit` commands, usable except in interactive mode. Use `-h` for help. On properly configured systems, zeta displays the corresponding language version. ```shell echo "hello world" > helloworld.txt zeta add helloworld.txt zeta commit -m "Hello world" ``` ### Push and Pull ```shell zeta push zeta pull ``` ## Features ### Download Acceleration Supports `direct`, `dragonfly`, and `aria2` accelerators via `core.accelerator` or `ZETA_CORE_ACCELERATOR` env var. | Accelerator | Description | | :---: | --- | | `direct` | Download directly from OSS via signed URLs (recommended for AI scenarios) | | `dragonfly` | Use dragonfly cluster for P2P acceleration | | `aria2` | Use aria2c for multi-threaded downloads | ```shell zeta config --global core.accelerator direct zeta config --global core.concurrenttransfers 8 # parallel downloads (1-50) ``` ### One-by-One Checkout Checkout files one at a time and immediately release blob objects, saving **60%+** disk space for large repositories. ```shell zeta co http://zeta.example.io/zeta-poc-test/zeta-poc-test --one ``` ![](./docs/images/one-by-one.png) ### On-demand Access Automatically downloads missing objects when needed (e.g., `zeta cat`, merge). Disable with `ZETA_CORE_PROMISOR=0`. ### Sparse Checkout Sparse checkout allows users to check out only specific directories instead of the entire repository. This is especially useful for large repositories: ```shell # Check out specific directories zeta co http://zeta.example.io/group/repo myrepo -s src/core -s src/utils ``` ### Checkout Single File In zeta, you can checkout a single file by adding `--limit=0` during the checkout process, which excludes all files except empty ones. Then, use `zeta checkout -- path` to check out the specific file. ```shell zeta co http://zeta.example.io/zeta-poc-test/zeta-poc-test --limit=0 z2 zeta checkout -- dev6.bin ``` ### Update Partial Files Some users may only want to modify specific files, which can be done by using `checkout single file` to checkout the desired file and then making the modifications. ```shell zeta add test1/2.txt zeta commit -m "XXX" zeta push ``` ### Pull Strategies HugeSCM supports three pull strategies: - **merge** - Create a merge commit (default) - **rebase** - Rebase local commits on top of remote - **fast-forward only** - Only allow fast-forward merges ```shell zeta pull # merge strategy (default) zeta pull --rebase # rebase strategy zeta pull --ff-only # fast-forward only ``` ### Stash Stash allows temporarily saving work progress: ```shell zeta stash # stash all changes zeta stash save "WIP: feature" # stash with message zeta stash list # list all stashes zeta stash pop # apply and remove latest stash ``` ### Switch Branches Switch between branches or commits: ```shell zeta switch feature # switch to branch zeta switch -c new-feature # create and switch to new branch zeta switch abc123 # switch to specific commit ``` ### Migrate Repository from Git to HugeSCM ```shell zeta-mc https://github.com/antgroup/hugescm.git hugescm-dev ``` ## CDC (Content-Defined Chunking) HugeSCM introduces CDC for efficient handling of large files. Unlike traditional fixed-size chunking, CDC determines chunk boundaries based on content, achieving better deduplication: | Scenario | Fixed Chunking | CDC Chunking | |----------|---------------|--------------| | Local modification | All subsequent chunks change | Only 1-2 chunks change | | Incremental sync | Transfer complete file | Transfer only changed chunks | | Deduplication | Low | High | Enable CDC in configuration: ```toml [fragment] threshold = "1GB" # File size threshold size = "1GB" # Target chunk size (fixed chunking) enable_cdc = true # Enable CDC chunking ``` ## Comparison with Git | Feature | Git | HugeSCM | |---------|-----|---------| | Architecture | Distributed | Centralized | | Clone method | Full clone | On-demand checkout | | Hash algorithm | SHA-1/SHA-256 | BLAKE3 | | Large file support | Git LFS | Built-in Fragments | | Data storage | Local filesystem | DB + OSS | ### Command Comparison | Git Command | HugeSCM Command | Description | |-------------|-----------------|-------------| | `git clone` | `zeta checkout` (co) | Checkout repository, not full clone | | `git fetch` | `zeta pull --fetch` | Fetch data only | | `git pull` | `zeta pull` | Pull and merge | | `git switch` | `zeta switch` | Switch branches | ## Additional Tools - hot command `hot` is a Git repository maintenance tool for cleaning up, migrating, and optimizing Git repositories. ### Common Use Cases | Task | Command | |------|---------| | Find large files | `hot size` / `hot smart -L20m` | | Remove sensitive data | `hot remove path/to/secret.txt --prune` | | Migrate SHA1 → SHA256 | `hot mc https://github.com/user/repo.git` | | Clean stale refs | `hot prune-refs "feature/deprecated-"` | | Linearize history | `hot unbranch --confirm` | | Inspect objects | `hot cat HEAD --json` | See [docs/hot.md](./docs/hot.md) for full documentation. ## License Apache License Version 2.0, see [LICENSE](LICENSE) ================================================ FILE: README.zh-CN.md ================================================ # HugeSCM - 基于云的下一代版本控制系统 [![license badge](https://img.shields.io/github/license/antgroup/hugescm.svg)](LICENSE) [![Master Branch Status](https://github.com/antgroup/hugescm/workflows/CI/badge.svg)](https://github.com/antgroup/hugescm/actions) [![Latest Release Downloads](https://img.shields.io/github/downloads/antgroup/hugescm/latest/total.svg)](https://github.com/antgroup/hugescm/releases/latest) [![Total Downloads](https://img.shields.io/github/downloads/antgroup/hugescm/total.svg)](https://github.com/antgroup/hugescm/releases) [![Version](https://img.shields.io/github/v/release/antgroup/hugescm)](https://github.com/antgroup/hugescm/releases/latest) [English](./README.md) ## 概述 HugeSCM(代号 zeta)是云原生版本控制系统,专为大规模存储库设计。通过元数据与文件数据分离,突破了 Git/SVN 等传统版本控制系统在存储和传输上的限制。适用于 AI 大模型研发、游戏研发、单一大库等场景。 核心特性: + **数据分离**:元数据存储于分布式数据库,文件内容存储于对象存储 + **高效传输**:优化传输协议,降低带宽和时间成本 + **分片对象**:高效处理大文件(AI 模型、二进制依赖等) 吸取 Git 经验,摆脱历史包袱。 ## 适用场景 ### AI 大模型研发 - 存储 checkpoint 文件(数十 GB 到数百 GB) - 模型版本管理和增量更新 - 多团队协作 ### 游戏研发 - 大型二进制资源管理 - 美术资产版本控制 ### 数据集存储 - 大规模数据集版本管理 - 数据标注协作 ## 文档 ### 设计与架构 | 文档 | 描述 | |------|------| | [design.md](./docs/design.md) | 设计哲学 - 核心设计理念、架构概述、与 Git 的差异 | | [object-format.md](./docs/object-format.md) | 对象格式详解 - Blob、Tree、Commit、Fragments 等对象的二进制格式 | | [pack-format.md](./docs/pack-format.md) | Pack 文件格式 - 对象打包机制和索引格式 | | [protocol.md](./docs/protocol.md) | 传输协议规范 - HTTP/SSH 协议、授权、元数据和文件传输 | | [version-negotiation.md](./docs/version-negotiation.md) | 版本协商机制 - 基线管理、检出、拉取、推送流程 | ### 配置参考 | 文档 | 描述 | |------|------| | [config.md](./docs/config.md) | 配置文件说明 - 支持的配置项和环境变量 | ### 功能使用 | 文档 | 描述 | |------|------| | [switch.md](./docs/switch.md) | 分支切换 - switch 命令详解,切换分支和提交 | | [stash.md](./docs/stash.md) | 暂存功能 - stash 命令详解,临时保存工作进度 | | [sparse-checkout.md](./docs/sparse-checkout.md) | 稀疏检出 - 按需检出指定目录 | | [pull-strategy.md](./docs/pull-strategy.md) | 拉取策略 - merge、rebase、fast-forward 策略详解 | ### 高级特性 | 文档 | 描述 | |------|------| | [cdc.md](./docs/cdc.md) | CDC 分片 - Content-Defined Chunking 实现原理和配置 | | [hot.md](./docs/hot.md) | hot 命令 - Git 存储库维护工具,清理大文件、删除敏感数据、迁移对象格式 | ## 构建 开发者安装好最新版本的 Golang 后,可以使用 [bali](https://github.com/balibuild/bali)(构建打包工具)构建 HugeSCM 客户端。 ```sh bali -T windows # create rpm,deb,tar,sh pack bali -T linux -A amd64 --pack='rpm,deb,tar,sh' ``` bali 构建工具可以制作 `zip`, `deb`, `tar`, `rpm`, `sh (STGZ)` 压缩/安装包。 ### Windows 安装包 我们提供了 Inno Setup 脚本,可以使用 Docker + wine 在非 Windows 环境下生成安装包: ```shell docker run --rm -i -v "$TOPLEVEL:/work" amake/innosetup xxxxx.iss ``` 运行前请先构建 Windows 二进制:`bali --target=windows --arch=amd64`。 > 注意:在搭载 Apple Silicon 芯片的 macOS 上,可以使用 OrbStack 开启 Rosetta 运行该镜像。 ## 使用 用户可以运行 `zeta -h` 查看 zeta 所有命令,并运行 `zeta ${command} -h` 查看命令详细帮助,我们尽量让使用 git 的用户容易上手 zeta,同时也会对一些命令进行增强,比如很多 zeta 命令支持 `--json` 将输出格式化为 json,方便各种工具集成。 ### 配置 ```shell zeta config --global user.email 'zeta@example.io' zeta config --global user.name 'Example User' ``` ### 检出存储库 使用 git 获取远程存储库的操作叫 `clone`(当然也可以用 `fetch`),在 zeta 中,我们限制其操作为 `checkout`,你也可以缩写为 `co`,以下是检出一个存储库: ```shell zeta co http://zeta.example.io/group/repo xh1 zeta co http://zeta.example.io/group/repo xh1 -s dir1 ``` ### 修改、跟踪、提交 我们实现了类似 git 一样的 `status`、`add`、`commit` 命令,除了交互模式外,大体上是可用的,可以使用 `-h` 查看详细帮助,在正确设置了语言环境的系统中,zeta 会显示对应的语言版本。 ```shell echo "hello world" > helloworld.txt zeta add helloworld.txt zeta commit -m "Hello world" ``` ### 推送和拉取 ```shell zeta push zeta pull ``` ## 特点 ### 下载加速 支持 `direct`、`dragonfly`、`aria2` 三种加速器,通过 `core.accelerator` 或环境变量 `ZETA_CORE_ACCELERATOR` 配置。 | 加速器 | 说明 | | :---: | --- | | `direct` | 直接从 OSS 签名 URL 下载(AI 场景推荐) | | `dragonfly` | 使用 dragonfly 集群 P2P 加速 | | `aria2` | 使用 aria2c 多线程下载 | ```shell zeta config --global core.accelerator direct zeta config --global core.concurrenttransfers 8 # 并发下载数 (1-50) ``` ### 逐一检出 逐个检出文件并立即释放 blob 对象,大仓库可节省 **60%+** 磁盘空间。 ```shell zeta co http://zeta.example.io/zeta-poc-test/zeta-poc-test --one ``` ![](./docs/images/one-by-one.png) ### 按需获取 按需自动下载缺失对象(如 `zeta cat`、merge 场景)。禁用请设置 `ZETA_CORE_PROMISOR=0`。 ### 稀疏检出 稀疏检出允许用户只检出存储库中的部分目录,而非完整的工作区。这对于巨型存储库特别有用: ```shell # 检出指定目录 zeta co http://zeta.example.io/group/repo myrepo -s src/core -s src/utils ``` ### 检出单个文件 我们在 zeta 中可以检出单个文件,只需要在 co 的过程中添加 `--limit=0` 意味着除了空文件其他文件均不检出,然后使用 zeta checkout -- path 检出相应的文件即可: ```shell zeta co http://zeta.example.io/zeta-poc-test/zeta-poc-test --limit=0 z2 zeta checkout -- dev6.bin ``` ### 更新部分文件 有些用户仅想修改部分文件,同样可以做到,使用**检出单个文件**检出特定的文件后,修改后执行: ```shell zeta add test1/2.txt zeta commit -m "XXX" zeta push ``` ### 拉取策略 HugeSCM 支持三种拉取策略: - **merge** - 创建合并提交(默认) - **rebase** - 将本地提交变基到远程分支之上 - **fast-forward only** - 仅允许快进合并 ```shell zeta pull # merge 策略(默认) zeta pull --rebase # rebase 策略 zeta pull --ff-only # 仅快进合并 ``` ### 暂存功能 暂存功能允许临时保存工作进度: ```shell zeta stash # 暂存所有修改 zeta stash save "WIP: 功能开发中" # 带描述信息暂存 zeta stash list # 列出所有暂存 zeta stash pop # 应用并删除最近的暂存 ``` ### 分支切换 在不同分支或提交之间切换: ```shell zeta switch feature # 切换到分支 zeta switch -c new-feature # 创建并切换到新分支 zeta switch abc123 # 切换到特定提交 ``` ### 将存储库从 Git 迁移到 HugeSCM ```shell zeta-mc https://github.com/antgroup/hugescm.git hugescm-dev ``` ## CDC(内容定义分片) HugeSCM 引入了 CDC 用于高效处理大文件。与传统的固定大小分片不同,CDC 根据内容确定分片边界,实现更好的去重效果: | 场景 | 固定分片 | CDC 分片 | |------|---------|---------| | 局部修改 | 所有后续分片改变 | 仅 1-2 个分片改变 | | 增量同步 | 传输完整文件 | 仅传输变化分片 | | 去重效果 | 低 | 高 | 启用 CDC 配置: ```toml [fragment] threshold = "1GB" # 文件大小阈值 size = "1GB" # 目标分片大小(固定分片) enable_cdc = true # 启用 CDC 分片 ``` ## 与 Git 的主要差异 | 特性 | Git | HugeSCM | |-----|-----|---------| | 架构模式 | 分布式 | 集中式 | | 克隆方式 | 全量克隆 | 按需检出 | | 哈希算法 | SHA-1/SHA-256 | BLAKE3 | | 大文件支持 | Git LFS | 内置 Fragments | | 数据存储 | 本地文件系统 | DB + OSS | ### 命令对照 | Git 命令 | HugeSCM 命令 | 说明 | |---------|-------------|------| | `git clone` | `zeta checkout` (co) | 检出存储库,非全量克隆 | | `git fetch` | `zeta pull --fetch` | 仅获取数据 | | `git pull` | `zeta pull` | 拉取并合并 | | `git switch` | `zeta switch` | 切换分支 | ## 额外的工具 - hot 命令 `hot` 是 Git 存储库维护工具,用于清理、迁移和优化 Git 存储库。 ### 常见使用场景 | 任务 | 命令 | |------|------| | 查找大文件 | `hot size` / `hot smart -L20m` | | 删除敏感数据 | `hot remove path/to/secret.txt --prune` | | 迁移 SHA1 → SHA256 | `hot mc https://github.com/user/repo.git` | | 清理过期引用 | `hot prune-refs "feature/deprecated-"` | | 线性化历史 | `hot unbranch --confirm` | | 查看对象 | `hot cat HEAD --json` | 完整文档见 [docs/hot.md](./docs/hot.md)。 ## 许可证 Apache License Version 2.0, 请查看 [LICENSE](LICENSE) ================================================ FILE: VERSION ================================================ 0.23.0 ================================================ FILE: bali.toml ================================================ # https://toml.io/en/ name = "zeta" summary = "HugeSCM - A next generation cloud-based version control system" description = "HugeSCM - A next generation cloud-based version control system" package-name = "alipay-linkc-zeta" version = "0.23.0" license = "MIT" prefix = "/usr/local" packager = "江二" vendor = "蚂蚁集团代码平台团队" group = "alipay/application" authors = [""] crates = [ "cmd/zeta", # zeta client "cmd/zeta-mc", # zeta migrate tool "cmd/hot", ] [[include]] path = "LEGAL.md" destination = "share/zeta" ================================================ FILE: cmd/README.md ================================================ # command ================================================ FILE: cmd/hot/command/command.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "net/url" "os" "time" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/version" ) var ( ErrLocalEndpoint = errors.New("local endpoint") ErrWorktreeNotEmpty = errors.New("worktree not empty") ) type Globals struct { Verbose bool `short:"V" help:"Make the operation more talkative"` Version VersionFlag `short:"v" name:"version" help:"Show version number and quit"` } type VersionFlag bool func (v VersionFlag) Decode(ctx *kong.DecodeContext) error { return nil } func (v VersionFlag) IsBool() bool { return true } func (v VersionFlag) BeforeApply(app *kong.Kong, vars kong.Vars) error { fmt.Println(version.GetVersionString()) app.Exit(0) return nil } func pickURI(rawURL string) (string, error) { if git.MatchesScpLike(rawURL) { _, _, _, p := git.FindScpLikeComponents(rawURL) return p, nil } if git.MatchesScheme(rawURL) { u, err := url.Parse(rawURL) if err != nil { return "", err } return u.Path, nil } return "", ErrLocalEndpoint } func (g *Globals) RunEx(ctx context.Context, repoPath string, cmdArg0 string, args ...string) error { now := time.Now() cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: repoPath, Environ: os.Environ(), Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, cmdArg0, args...) if err := cmd.Run(); err != nil { return err } trace.DbgPrint("exec: %s spent: %v", cmd.String(), time.Since(now)) return nil } ================================================ FILE: cmd/hot/command/command_az.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "github.com/antgroup/hugescm/cmd/hot/pkg/stat" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/trace" ) type Az struct { Paths []string `arg:"" name:"path" help:"Path to repositories" default:"." type:"path"` Limit int64 `short:"L" name:"limit" optional:"" help:"Large file limit size, supported units: KB, MB, GB, K, M, G" default:"10m" type:"size"` FullPath bool `short:"F" name:"full-path" help:"Show full path"` } func (c *Az) Run(g *Globals) error { for _, p := range c.Paths { if err := c.azOnce(p); err != nil { return err } } return nil } // git cat-file --batch-check --batch-all-objects func (c *Az) azOnce(p string) error { repoPath := git.RevParseRepoPath(context.Background(), p) trace.DbgPrint("begin analysis repository: %v large file: %v", repoPath, strengthen.FormatSize(c.Limit)) return stat.Az(context.Background(), repoPath, c.Limit, c.FullPath) } ================================================ FILE: cmd/hot/command/command_cat.go ================================================ package command import ( "bytes" "context" "encoding/json" "fmt" "io" "os" "path/filepath" "strings" "charm.land/glamour/v2" "charm.land/lipgloss/v2" "github.com/alecthomas/chroma/v2" "github.com/alecthomas/chroma/v2/formatters" "github.com/alecthomas/chroma/v2/lexers" "github.com/alecthomas/chroma/v2/styles" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/hexview" "github.com/antgroup/hugescm/modules/term" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/modules/tui" ) const ( MAX_SHOW_BINARY_BLOB = 10<<20 - 8 ) type Cat struct { Object string `arg:"" name:"object" help:"The name of the object to show"` CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Type bool `name:"type" short:"t" help:"Show object type"` Size bool `name:"size" short:"s" help:"Show object size"` Textconv bool `name:"textconv" help:"Converting text to Unicode"` JSON bool `name:"json" short:"j" help:"Returns data as JSON; limited to commits, trees, and tags"` Limit int64 `name:"limit" short:"L" help:"Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" default:"-1" type:"size"` Output string `name:"output" help:"Output to a specific file instead of stdout" placeholder:""` NoAltScreen bool `name:"no-alt-screen" help:"Disable alternate screen buffer for pager"` } func (c *Cat) Run(g *Globals) error { repoPath := git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v", repoPath) d, err := git.NewDecoder(context.Background(), repoPath) if err != nil { die("new git decoder error: %v", err) return err } defer d.Close() // nolint o, err := d.Object(c.Object) if err != nil { die("open '%s' error: %v\n", c.Object, err) return err } if oo, ok := o.(*git.Object); ok { return c.formatObject(oo) } return c.showObject(o) } func (c *Cat) Println(a ...any) error { fd, _, err := c.NewFD() if err != nil { return err } defer fd.Close() // nolint _, err = fmt.Fprintln(fd, a...) return err } func (c *Cat) NewFD() (io.WriteCloser, term.Level, error) { if len(c.Output) == 0 { return &NopWriteCloser{Writer: os.Stdout}, term.StdoutLevel, nil } fd, err := os.Create(c.Output) return fd, term.LevelNone, err } const ( binaryTruncated = "*** Binary truncated ***" ) type sizer interface { Size() int64 } func (c *Cat) showObject(a any) error { if c.Size { if s, ok := a.(sizer); ok { return c.Println(s.Size()) } return nil } if c.Type { switch a.(type) { case *git.Commit: return c.Println("commit") case *git.Tag: return c.Println("tag") case *git.Tree: return c.Println("tree") } return nil } if c.JSON { fd, _, err := c.NewFD() if err != nil { return err } defer fd.Close() // nolint return json.NewEncoder(fd).Encode(a) } fd, termLevel, err := c.NewFD() if err != nil { return err } defer fd.Close() // nolint return hud.Display(fd, a, termLevel) } var markdownFiles = map[string]bool{ "README": true, "CHANGELOG": true, "CONTRIBUTING": true, "CHANGES": true, "AUTHORS": true, "HISTORY": true, } func (c *Cat) isMarkdown() bool { if _, filename, ok := strings.Cut(c.Object, ":"); ok { // Get base filename without extension base := strings.TrimSuffix(filename, filepath.Ext(filename)) ext := strings.ToLower(filepath.Ext(filename)) // Check for common markdown files by name (case-insensitive) if markdownFiles[strings.ToUpper(base)] { return true } // Check for markdown extensions return ext == ".md" || ext == ".markdown" || ext == ".mdown" || ext == ".mkd" } return false } func (c *Cat) getLexer() chroma.Lexer { _, filename, ok := strings.Cut(c.Object, ":") if !ok { return nil } lexer := lexers.Match(filename) return lexer } var termWidth = func() (width int, err error) { width, _, err = term.GetSize(int(os.Stdout.Fd())) if err == nil { return width, nil } return 0, err } func (c *Cat) markdownOut(w io.Writer, input io.Reader) error { width, _ := termWidth() if width == 0 || width > 120 { width = 80 } // Detect background color to pick appropriate style style := "light" if lipgloss.HasDarkBackground(os.Stdin, os.Stdout) { style = "dark" } r, err := glamour.NewTermRenderer( glamour.WithStylePath(style), glamour.WithWordWrap(width), ) if err != nil { return err } defer func() { _ = r.Close() }() // Write input to renderer if _, err = io.Copy(r, input); err != nil { return err } // Close to trigger rendering if err = r.Close(); err != nil { return err } // Write the rendered output to the destination if _, err = io.Copy(w, r); err != nil { return err } return nil } func (c *Cat) syntaxHighlightOut(w io.Writer, input io.Reader, termLevel term.Level, lexer chroma.Lexer) error { // Read the input into a buffer var buf bytes.Buffer if _, err := io.Copy(&buf, input); err != nil { return err } content := buf.String() // Coalesce the lexer lexer = chroma.Coalesce(lexer) // Detect background color to pick appropriate style styleName := "github" if lipgloss.HasDarkBackground(os.Stdin, os.Stdout) { styleName = "dracula" } // Get the style style := styles.Get(styleName) if style == nil { style = styles.Fallback } // Tokenize the content iterator, err := lexer.Tokenise(nil, content) if err != nil { return err } // Choose formatter based on terminal color support level var formatter chroma.Formatter switch termLevel { case term.Level16M: formatter = formatters.TTY16m case term.Level256: formatter = formatters.TTY256 case term.LevelNone: formatter = formatters.NoOp default: formatter = formatters.TTY } if err := formatter.Format(w, style, iterator); err != nil { return err } return nil } func (c *Cat) formatObject(o *git.Object) error { if c.Size { return c.Println(o.Size) } if c.Type { return c.Println("blob") } reader, charset, err := diferenco.NewUnifiedReaderEx(o, c.Textconv) if err != nil { return err } if c.Limit < 0 { c.Limit = o.Size } // Check if we should use pager (small files, no output file, color support) usePager := len(c.Output) == 0 && term.StdoutLevel != term.LevelNone && o.Size <= MAX_SHOW_BINARY_BLOB useAltScreen := !c.NoAltScreen // Binary content: always use hexview, with or without pager if charset == diferenco.BINARY { if c.Limit > MAX_SHOW_BINARY_BLOB { reader = io.MultiReader(io.LimitReader(reader, MAX_SHOW_BINARY_BLOB), strings.NewReader(binaryTruncated)) c.Limit = int64(MAX_SHOW_BINARY_BLOB + len(binaryTruncated)) } if usePager { p := tui.NewPager(term.StdoutLevel, useAltScreen) defer p.Close() // nolint return hexview.Format(reader, p, c.Limit, p.ColorMode()) } fd, _, err := c.NewFD() if err != nil { return err } defer fd.Close() // nolint return hexview.Format(reader, fd, c.Limit, term.StdoutLevel) } // Markdown and source code: only with pager if usePager { // Markdown handling if c.isMarkdown() { p := tui.NewPager(term.StdoutLevel, useAltScreen) defer p.Close() // nolint return c.markdownOut(p, io.LimitReader(reader, c.Limit)) } // Source code handling (only if not markdown) if lexer := c.getLexer(); lexer != nil { p := tui.NewPager(term.StdoutLevel, useAltScreen) defer p.Close() // nolint return c.syntaxHighlightOut(p, io.LimitReader(reader, c.Limit), p.ColorMode(), lexer) } } // Default: output directly (large files or output to file) fd, _, err := c.NewFD() if err != nil { return err } defer fd.Close() // nolint if _, err = io.Copy(fd, io.LimitReader(reader, c.Limit)); err != nil { return err } return nil } ================================================ FILE: cmd/hot/command/command_co.go ================================================ package command import ( "context" "fmt" "net/url" "os" "path" "path/filepath" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/co" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Co struct { From string `arg:"" name:"from" help:"Original repository remote URL" type:"string"` Destination string `arg:"" optional:"" name:"destination" help:"Destination for the new repository" type:"path"` Branch string `name:"branch" short:"b" help:"Instead of pointing the newly created HEAD to the branch pointed to by the cloned repository’s HEAD, point to branch instead"` Commit string `name:"commit" short:"c" help:"Instead of pointing the newly created HEAD to the branch pointed to by the cloned repository’s HEAD, point to commit instead"` Sparse []string `name:"sparse" short:"s" help:"A subset of repository files, all files are checked out by default" type:"string"` Depth int `name:"depth" short:"d" help:"Create a shallow clone with a history truncated to the specified number of commits" default:"5"` Limit int64 `name:"limit" short:"L" help:"Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" default:"-1" type:"size"` Recursive bool `name:"recursive" short:"r" help:"After the clone is created, initialize and clone submodules within based on the provided pathspec"` Values []string `short:"X" shortonly:"" help:"Override default clone/fetch configuration, format: ="` } func (c *Co) concatDestination(baseName string) (string, error) { destination := c.Destination if len(destination) == 0 { destination = strings.TrimSuffix(baseName, ".git") } if !filepath.IsAbs(destination) { cwd, err := os.Getwd() if err != nil { fmt.Fprintf(os.Stderr, "Get current workdir error: %v\n", err) return "", err } destination = filepath.Join(cwd, destination) } dirs, err := os.ReadDir(destination) if err != nil { if os.IsNotExist(err) { return destination, nil } fmt.Fprintf(os.Stderr, "readdir %s error: %v\n", destination, err) return "", err } if len(dirs) != 0 { fmt.Fprintf(os.Stderr, "fatal: destination path '%s' already exists and is not an empty directory.\n", filepath.Base(destination)) return "", ErrWorktreeNotEmpty } return destination, nil } func (c *Co) decodeRemote() (remote string, uri string, err error) { remote = c.From if git.MatchesScpLike(remote) { _, _, _, uri = git.FindScpLikeComponents(remote) return } if git.MatchesScheme(remote) { u, err := url.Parse(remote) if err != nil { return "", "", err } return remote, u.Path, nil } return remote, remote, nil } func (c *Co) Run(g *Globals) error { remote, uri, err := c.decodeRemote() if err != nil { fmt.Fprintf(os.Stderr, "bad remote '%s' error: '%v'\n", c.From, err) return err } destination, err := c.concatDestination(path.Base(uri)) if err != nil { return err } trace.DbgPrint("%s --> %s", remote, destination) return co.Co(context.Background(), &co.CoOptions{ Remote: remote, Destination: destination, Branch: c.Branch, Commit: c.Commit, Sparse: c.Sparse, Depth: c.Depth, Limit: c.Limit, Recursive: c.Recursive, Values: c.Values, }) } ================================================ FILE: cmd/hot/command/command_diff.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "encoding/json" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/diff" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/patchview" "github.com/antgroup/hugescm/modules/term" "github.com/antgroup/hugescm/modules/trace" ) type Diff struct { CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Cached bool `name:"cached" help:"Show staged changes"` Staged bool `name:"staged" help:"Same as --cached"` JSON bool `name:"json" short:"j" help:"Output patches in JSON format"` Args []string `arg:"" optional:"" name:"args" help:"Commit range or paths"` } func (c *Diff) Run(g *Globals) error { ctx := context.Background() repoPath := git.RevParseRepoPath(ctx, c.CWD) trace.DbgPrint("repository location: %v", repoPath) // Get hash format from repository formatName, err := git.RevParseHashFormat(ctx, repoPath) if err != nil { die("detect hash format: %v", err) return err } hashFormat := git.HashFormatFromName(formatName) trace.DbgPrint("hash format: %s, abbrev: %d", formatName, hashFormat.HexSize()) // Build git diff arguments args := []string{ "diff", "--patch", "--raw", fmt.Sprintf("--abbrev=%d", hashFormat.HexSize()), "--full-index", "--find-renames=50%", } if c.Cached || c.Staged { args = append(args, "--cached") } // Append user-provided arguments (commit range, paths, etc.) if len(c.Args) > 0 { args = append(args, c.Args...) } // Create and start command cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), }, "git", args...) stdout, err := cmd.StdoutPipe() if err != nil { die("create stdout pipe: %v", err) return err } defer stdout.Close() // nolint: errcheck if err := cmd.Start(); err != nil { die("start git diff: %v", err) return err } // Parse diff output parser := diff.NewParser(hashFormat, stdout, diff.Limits{}) var patches []*diferenco.Patch for parser.Parse() { p := parser.Patch() if p.Patch != nil { patches = append(patches, p.Patch) } } if err := cmd.Wait(); err != nil { die("git diff: %v", command.FromError(err)) return err } if perr := parser.Err(); perr != nil { die("parse diff: %v", perr) return perr } trace.DbgPrint("parsed %d patches", len(patches)) // Display using patchview if len(patches) == 0 { fmt.Println("No changes") return nil } // JSON output if c.JSON { encoder := json.NewEncoder(os.Stdout) encoder.SetIndent("", " ") return encoder.Encode(patches) } // Terminal not supported: fallback to plain text output if !term.IsTerminal(os.Stdout.Fd()) { encoder := diferenco.NewUnifiedEncoder(os.Stdout, diferenco.WithVCS("git")) return encoder.Encode(patches) } return patchview.Run(patches) } ================================================ FILE: cmd/hot/command/command_expire_refs.go ================================================ package command import ( "context" "fmt" "os" "path/filepath" "strings" "time" "github.com/antgroup/hugescm/cmd/hot/pkg/refs" "github.com/antgroup/hugescm/modules/fnmatch" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type ExpireRefs struct { Pattern []string `arg:"" optional:"" name:"pattern" help:"Matching pattern, all references are displayed by default"` CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Merged bool `short:"M" name:"merged" help:"Only clean up merged branches, ignoring expiration times"` Tag bool `short:"T" name:"tag" help:"Clean up expired Tags, off by default"` Expires time.Duration `short:"E" name:"expires" help:"Reference expiration time, support: m, h, d, w" type:"expire" default:"90d"` } func (c *ExpireRefs) fixup() { for i, pattern := range c.Pattern { if strings.HasSuffix(pattern, "/") { c.Pattern[i] = pattern + "*" } } } func (c *ExpireRefs) Match(name string) bool { if len(c.Pattern) == 0 { return true } for _, pattern := range c.Pattern { if fnmatch.Match(pattern, name, 0) { return true } } return false } func (c *ExpireRefs) Expire(ref *refs.Reference) bool { if strings.HasPrefix(ref.Name, "refs/tmp/") { return true } if c.Merged { return ref.Merged() } // check ref is tag and cleanup tag if ref.IsTag() && !c.Tag { return false } return time.Since(ref.Committer.When) > c.Expires } func (c *ExpireRefs) Run(g *Globals) error { c.fixup() repoPath := git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v expires: %v", repoPath, c.Expires) references, err := refs.ScanReferences(context.Background(), repoPath, c, git.OrderNone) if err != nil { fmt.Fprintf(os.Stderr, "find repo references error: %v\n", err) return err } if len(references.Items) == 0 { return nil } target := filepath.Join(repoPath, "logs/expire-refs.log") _ = os.MkdirAll(filepath.Dir(target), 0755) fd, err := os.OpenFile(target, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644) if err != nil { fmt.Fprintf(os.Stderr, "open logs error: %v\n", err) return err } defer fd.Close() // nolint _, _ = fmt.Fprintf(fd, "CLEANUP START TIME: %v\n", time.Now().Format(time.RFC3339)) u, err := git.NewRefUpdater(context.Background(), repoPath, os.Environ(), false) if err != nil { fmt.Fprintf(os.Stderr, "RefUpdater: new ref updater error: %v\n", err) return err } defer u.Close() // nolint if err := u.Start(); err != nil { fmt.Fprintf(os.Stderr, "RefUpdater: Start ref updater error: %v\n", err) return err } var total int for _, ref := range references.Items { if ref.Name == references.Current { continue } if ref.Broken { _ = refs.RemoveBrokenRef(repoPath, ref.Name) continue } if !c.Expire(ref) { continue } if err := u.Delete(git.ReferenceName(ref.Name)); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Delete %s error: %v\n", ref.Name, err) return err } total++ date := ref.Committer.When.Format(time.RFC3339) _, _ = fmt.Fprintf(fd, "%s %s %s removed\n", ref.Hash, date, ref.Name) fmt.Fprintf(os.Stderr, "\x1b[2K\rDELETE '%s' (OID: %s)", ref.ShortName, ref.Hash) } if err := u.Prepare(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Prepare error: %v\n", err) return err } if err := u.Commit(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Commit error: %v\n", err) return err } if total != 0 { fmt.Fprintf(os.Stderr, "\nExpire refs success, total: %d\n", total) } return nil } ================================================ FILE: cmd/hot/command/command_graft.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/replay" "github.com/antgroup/hugescm/cmd/hot/pkg/stat" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Graft struct { Paths []string `arg:"" name:"path" help:"Path to repositories" default:"." type:"path"` Limit int64 `short:"L" name:"limit" optional:"" help:"Large file limit size, supported units: KB, MB, GB, K, M, G" default:"20m" type:"size"` Confirm bool `short:"Y" name:"confirm" help:"Confirm rewriting local branches and tags"` Prune bool `short:"P" name:"prune" help:"Prune repository when commits are rewritten"` HeadOnly bool `short:"H" name:"head-only" help:"Graft only the default branch"` FullPath bool `short:"F" name:"full-path" help:"Show full path"` ALL bool `short:"A" name:"all" help:"Remove all large blobs"` } func (c *Graft) Run(g *Globals) error { for _, p := range c.Paths { if err := c.doOnce(g, p); err != nil { return err } } return nil } func (c *Graft) doOnce(g *Globals, p string) error { repoPath := git.RevParseRepoPath(context.Background(), p) trace.DbgPrint("check %s size ...", repoPath) e := stat.NewSizeExecutor(c.Limit, c.FullPath) if err := e.Run(context.Background(), repoPath, false); err != nil { fmt.Fprintf(os.Stderr, "check repo size error: %v\n", err) return err } if len(e.Paths()) == 0 { return nil } if len(e.Paths()) > 300 { fmt.Fprintf(os.Stderr, "%s %d\n", tr.W("You can increase the file size limit, the number of large files: "), len(e.Paths())) return nil } matcher := newMatcher(e, c.ALL) if matcher == nil { return nil } r, err := replay.NewReplayer(context.Background(), repoPath, 4, g.Verbose) if err != nil { fmt.Fprintf(os.Stderr, "new replayer error: %v\n", err) return err } defer r.Close() // nolint if err := r.Graft(matcher, c.Confirm, c.Prune, c.HeadOnly); err != nil { fmt.Fprintf(os.Stderr, "replay repo error: %v\n", err) return err } return nil } ================================================ FILE: cmd/hot/command/command_mc.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "path" "path/filepath" "strings" "time" "github.com/antgroup/hugescm/cmd/hot/pkg/mc" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" ) type Mc struct { From string `arg:"" name:"from" help:"Original repository remote URL (or filesystem path)" type:"string"` Destination string `arg:"" optional:"" name:"destination" help:"Destination where the repository is migrated" type:"path"` Format string `name:"format" default:"sha256" help:"Specifying the object format, support only: sha1 or sha256"` Bare bool `short:"b" name:"bare" optional:"" help:"Save as a bare git repository"` } // Migrator func (c *Mc) concatDestination(baseName string) (string, error) { destination := c.Destination if len(destination) == 0 { destination = strings.TrimSuffix(baseName, ".git") } if !filepath.IsAbs(destination) { cwd, err := os.Getwd() if err != nil { fmt.Fprintf(os.Stderr, "Get current workdir error: %v\n", err) return "", err } destination = filepath.Join(cwd, destination) } if c.Bare { destination += ".git" } dirs, err := os.ReadDir(destination) if err != nil { if os.IsNotExist(err) { return destination, nil } fmt.Fprintf(os.Stderr, "readdir %s error: %v\n", destination, err) return "", err } if len(dirs) != 0 { fmt.Fprintf(os.Stderr, "fatal: destination path '%s' already exists and is not an empty directory.\n", filepath.Base(destination)) return "", ErrWorktreeNotEmpty } return destination, nil } func (c *Mc) cloneAndMigrate(g *Globals, uri string) error { destination, err := c.concatDestination(path.Base(uri)) if err != nil { return err } tempDir, err := os.MkdirTemp(os.TempDir(), "clone") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return err } defer os.RemoveAll(tempDir) // nolint if err := g.RunEx(context.Background(), command.NoDir, "git", "clone", "--bare", c.From, tempDir); err != nil { fmt.Fprintf(os.Stderr, "clone error: %v", err) return err } return c.migrateFrom(g, tempDir, destination) } func (c *Mc) Run(g *Globals) error { uri, err := pickURI(c.From) if err == nil { return c.cloneAndMigrate(g, uri) } if !errors.Is(err, ErrLocalEndpoint) { fmt.Fprintf(os.Stderr, "bad remote '%s' %v\n", c.From, err) return err } absFrom, err := filepath.Abs(c.From) if err != nil { fmt.Fprintf(os.Stderr, "bad remote '%s' %v\n", c.From, err) return err } if _, err = os.Stat(c.From); err != nil { fmt.Fprintf(os.Stderr, "bad remote '%s' %v\n", c.From, err) return err } destination, err := c.concatDestination(filepath.Base(c.From) + "-sha256") if err != nil { return err } return c.migrateFrom(g, absFrom, destination) } func (c *Mc) migrateFrom(g *Globals, from, to string) error { now := time.Now() r, err := mc.NewMigrator(context.Background(), &mc.MigrateOptions{ From: from, To: to, // os.Environ(), from, to, c.Bare, 4, g.Verbose Format: c.Format, Bare: c.Bare, Verbose: g.Verbose, StepEnd: 4, }) if err != nil { fmt.Fprintf(os.Stderr, "mc %s to %s error: %v\n", from, to, err) return err } defer r.Close() // nolint if err := r.Execute(context.Background()); err != nil { fmt.Fprintf(os.Stderr, "Execute error: %v\n", err) return err } _, _ = tr.Fprintf(os.Stderr, "migrate repository to %s success, spent: %v\n", c.Format, time.Since(now)) return nil } ================================================ FILE: cmd/hot/command/command_prune_refs.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "bufio" "context" "fmt" "os" "path/filepath" "slices" "strings" "time" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" ) const ( // pruneTargetPrefix // refs/pull/${ID}/merge // refs/pull/cloudide/turbodev pruneTargetPrefix = "refs/pull/" ) var ( // remove expired prefixToPrune = []string{ "refs/heads/FASTQ", "refs/heads/conflict_fix_", "refs/heads/cooperate/cloudideantservice-", "refs/heads/cooperate/reading-FASTQ1", "refs/tags/cstone_stc_scan_", } extremePrefixToPrune = []string{ "refs/heads/FASTQ", "refs/heads/conflict_fix_", "refs/heads/cooperate/cloudideantservice-", "refs/heads/cooperate/linkc-", "refs/heads/cooperate/reading-FASTQ1", "refs/heads/cooperate/sop_", "refs/heads/eval_ai_ide_", "refs/heads/eval_codefuse_augment_", "refs/heads/eval_idea_plugin_", "refs/heads/next_", "refs/heads/next_master_dev_", "refs/heads/unit_test_temp", "refs/heads/unit_test_temp_xdev", "refs/heads/xdev/", "refs/tags/cstone_stc_scan_", } // always remove dirtyRefPrefixes = []string{ "refs/merge-requests/", "refs/tmp/", } ) var statReferencesFormatFields = []string{ "%(refname)", "%(refname:short)", "%(objectname)", "%(committername)", "%(creatordate:iso-strict)", } type Reference struct { Name string `json:"name"` ShortName string `json:"short_name"` Hash string `json:"hash"` Committer string `json:"committer"` LastUpdate time.Time `json:"last_update"` } func parseReferenceLine(referenceLine string) (*Reference, error) { elements := strings.SplitN(referenceLine, "\x00", len(statReferencesFormatFields)) if len(elements) != len(statReferencesFormatFields) { return nil, fmt.Errorf("invalid output from git for-each-ref command: %v", referenceLine) } return &Reference{ Name: elements[0], ShortName: elements[1], Hash: elements[2], Committer: elements[3], LastUpdate: git.PareTimeFallback(elements[4]), }, nil } func GetReferences(ctx context.Context, repoPath string, m func(*Reference) bool) ([]*Reference, error) { stderr := command.NewStderr() reader, err := git.NewReader(ctx, &command.RunOpts{RepoPath: repoPath, Stderr: stderr}, "for-each-ref", "--format", strings.Join(statReferencesFormatFields, "%00")) if err != nil { return nil, fmt.Errorf("run git for-each-ref error: %w", err) } defer reader.Close() // nolint references := make([]*Reference, 0, 100) scanner := bufio.NewScanner(reader) for scanner.Scan() { r, err := parseReferenceLine(scanner.Text()) if err != nil { continue } if m(r) { references = append(references, r) } } return references, nil } func isDirtyReference(name string) bool { return slices.ContainsFunc(dirtyRefPrefixes, func(prefix string) bool { return strings.HasPrefix(name, prefix) }) } func prefixesMatch(name string, prefixes []string) bool { return slices.ContainsFunc(prefixes, func(prefix string) bool { return strings.HasPrefix(name, prefix) }) } type PruneRefs struct { Prefixes []string `arg:"" optional:"" name:"prefixes" help:"Reference prefixes that need to be cleaned up"` // to targets CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Expires time.Duration `short:"e" name:"expires" help:"Reference expiration time, support: m, h, d, w" type:"expire" default:"90d"` DryRun bool `name:"dry-run" short:"n" help:"Dry run"` Default bool `short:"D" name:"default" help:"Cleanup references using default prefix"` Extreme bool `short:"E" name:"extreme" help:"Remove more dirty references"` } func (c *PruneRefs) preparePrefixes() (prefixes []string) { switch { case len(c.Prefixes) != 0: prefixes = append(prefixes, c.Prefixes...) prefixes = append(prefixes, pruneTargetPrefix) // List all references case c.Extreme: prefixes = append(prefixes, extremePrefixToPrune...) prefixes = append(prefixes, pruneTargetPrefix) case c.Default: prefixes = append(prefixes, prefixToPrune...) prefixes = append(prefixes, pruneTargetPrefix) default: prefixes = append(prefixes, pruneTargetPrefix) } return } func (c *PruneRefs) record(repoPath string, refs []*Reference) error { tempDir := filepath.Join(repoPath, "temp") if err := os.Mkdir(tempDir, 0755); err != nil && !os.IsExist(err) { fmt.Fprintf(os.Stderr, "new extraCross error: %v", err) return err } saveTo := filepath.Join(tempDir, strengthen.NewSessionID()+".refs") fd, err := os.Create(saveTo) if err != nil { fmt.Fprintf(os.Stderr, "create record json error: %v", err) return err } defer fd.Close() // nolint for _, ref := range refs { _, _ = fmt.Fprintf(fd, "%s %s\n", ref.Hash, ref.Name) } return nil } func (c *PruneRefs) pruneRefs(ctx context.Context, repoPath string, references []*Reference) error { u, err := git.NewRefUpdater(ctx, repoPath, os.Environ(), false) if err != nil { fmt.Fprintf(os.Stderr, "RefUpdater: new ref updater error: %v\n", err) return err } defer u.Close() // nolint if err := u.Start(); err != nil { fmt.Fprintf(os.Stderr, "RefUpdater: Start ref updater error: %v\n", err) return err } for _, ref := range references { if !c.DryRun { if err := u.Delete(git.ReferenceName(ref.Name)); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Delete %s error: %v\n", ref.Name, err) return err } } fmt.Fprintf(os.Stderr, "\x1b[2K\rDELETE '%s' (OID: %s Date: %s Committer: %s)", ref.ShortName, ref.Hash, ref.LastUpdate.Format(time.RFC3339), ref.Committer) } if c.DryRun { return nil } if err := u.Prepare(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Prepare error: %v\n", err) return err } if err := u.Commit(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Commit error: %v\n", err) return err } return nil } func (c *PruneRefs) Run(g *Globals) error { repoPath := git.RevParseRepoPath(context.Background(), c.CWD) prefixes := c.preparePrefixes() fmt.Fprintf(os.Stderr, "\x1b[38;2;254;225;64m%s\x1b[0m\n", W("* The following ref prefixes will be deleted:\n")) for _, p := range prefixes { fmt.Fprintf(os.Stderr, "\x1b[38;2;254;225;64m* %s\x1b[0m\n", p) } expiredAt := time.Now().Add(-c.Expires) references, err := GetReferences(context.Background(), repoPath, func(r *Reference) bool { return isDirtyReference(r.Name) || (prefixesMatch(r.Name, prefixes) && expiredAt.After(r.LastUpdate)) }) if err != nil { fmt.Fprintf(os.Stderr, "parse references error: %v\n", err) return err } if len(references) == 0 { fmt.Fprintf(os.Stderr, "%s", W("No references to be deleted\n")) return nil } if err := c.record(repoPath, references); err != nil { return err } if err := c.pruneRefs(context.Background(), repoPath, references); err != nil { return err } fmt.Fprintf(os.Stderr, "\nPrune refs success, total: %d\n", len(references)) return nil } ================================================ FILE: cmd/hot/command/command_remove.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/replay" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Remove struct { CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Paths []string `arg:"" name:"Paths" help:"Path to remove in repository, support wildcards" type:"string"` Confirm bool `short:"Y" name:"confirm" help:"Confirm rewriting local branches and tags"` Prune bool `short:"P" name:"prune" help:"Prune repository when commits are rewritten"` Graft bool `short:"G" name:"graft" help:"Grafting mode"` HeadOnly bool `short:"H" name:"head-only" help:"Graft only the default branch"` } func (c *Remove) Run(g *Globals) error { repoPath := git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v", repoPath) matcher := replay.NewMatcher(c.Paths) if c.Graft { r, err := replay.NewReplayer(context.Background(), repoPath, 4, g.Verbose) if err != nil { fmt.Fprintf(os.Stderr, "new replayer error: %v\n", err) return err } defer r.Close() // nolint if err := r.Graft(matcher, c.Confirm, c.Prune, c.HeadOnly); err != nil { fmt.Fprintf(os.Stderr, "graft repo error: %v\n", err) return err } return nil } r, err := replay.NewReplayer(context.Background(), repoPath, 3, g.Verbose) if err != nil { fmt.Fprintf(os.Stderr, "new replayer error: %v\n", err) return err } defer r.Close() // nolint if err := r.Drop(matcher, c.Confirm, c.Prune); err != nil { fmt.Fprintf(os.Stderr, "replay repo error: %v\n", err) return err } return nil } ================================================ FILE: cmd/hot/command/command_scan_refs.go ================================================ package command import ( "context" "fmt" "os" "strconv" "strings" "time" "charm.land/bubbles/v2/paginator" "charm.land/bubbles/v2/table" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "charm.land/lipgloss/v2/compat" "github.com/antgroup/hugescm/cmd/hot/pkg/refs" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/fnmatch" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/term" "github.com/antgroup/hugescm/modules/trace" ) func newModel(references *refs.References) model { p := paginator.New() p.Type = paginator.Dots p.PerPage = 20 p.ActiveDot = lipgloss.NewStyle().Foreground(compat.AdaptiveColor{Light: lipgloss.Color("235"), Dark: lipgloss.Color("252")}).Render("•") p.InactiveDot = lipgloss.NewStyle().Foreground(compat.AdaptiveColor{Light: lipgloss.Color("250"), Dark: lipgloss.Color("238")}).Render("•") p.SetTotalPages(len(references.Items)) return model{ paginator: p, references: references, } } type model struct { references *refs.References paginator paginator.Model table table.Model ready bool } func (m model) Init() tea.Cmd { return nil } func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { var cmd tea.Cmd switch msg := msg.(type) { case tea.KeyPressMsg: switch msg.String() { case "q", "esc", "ctrl+c": return m, tea.Quit case "h", "left": // Previous page if m.paginator.Page > 0 { m.paginator.PrevPage() m.ready = false } case "l", "right": // Next page if m.paginator.Page < m.paginator.TotalPages-1 { m.paginator.NextPage() m.ready = false } } } // Update table if m.ready { m.table, cmd = m.table.Update(msg) } // Build table on first render or page change if !m.ready { m.table = m.buildTable() m.ready = true } return m, cmd } func (m model) buildTable() table.Model { start, end := m.paginator.GetSliceBounds(len(m.references.Items)) // Build table columns with proper widths termWidth := getTerminalWidth() colWidths := struct { hash int date int name int leading int lagging int }{ hash: 40, // Full commit hash date: 25, leading: 8, lagging: 8, } // Width calculation: // terminal = table + lipgloss borders(2) // table = sum(colWidths) + padding + separators // For 5 columns with padding=1: sum(col) + 2*5 + 4 = sum(col) + 14 fixedWidth := colWidths.hash + colWidths.date + colWidths.leading + colWidths.lagging + 16 // 16 = padding + separators + lipgloss borders colWidths.name = max(30, min(80, termWidth-fixedWidth)) columns := []table.Column{ {Title: tr.W("Hash"), Width: colWidths.hash}, {Title: tr.W("Date"), Width: colWidths.date}, {Title: tr.W("Reference Name"), Width: colWidths.name}, {Title: tr.W("Leading"), Width: colWidths.leading}, {Title: tr.W("Lagging"), Width: colWidths.lagging}, } // Build table rows rows := make([]table.Row, 0, end-start) for _, item := range m.references.Items[start:end] { if item.Broken { rows = append(rows, table.Row{item.Hash, "", item.Name, tr.W("reference is broken"), ""}) continue } date := item.Committer.When.Local().Format(time.RFC3339) if item.Name == m.references.Current || !item.IsBranch() { rows = append(rows, table.Row{item.Hash, date, item.ShortName, "", ""}) continue } if item.Leading == 0 { rows = append(rows, table.Row{item.Hash, date, item.ShortName, "*merged", strconv.Itoa(item.Lagging)}) continue } rows = append(rows, table.Row{item.Hash, date, item.ShortName, strconv.Itoa(item.Leading), strconv.Itoa(item.Lagging)}) } // Create table // Total width must not exceed terminal width - lipgloss borders (2) totalWidth := termWidth - 2 t := table.New( table.WithColumns(columns), table.WithRows(rows), table.WithFocused(true), table.WithHeight(min(20, len(rows)+2)), table.WithWidth(totalWidth), ) // Apply styles s := table.DefaultStyles() s.Header = s.Header. BorderStyle(lipgloss.NormalBorder()). BorderForeground(lipgloss.Color("243")). BorderBottom(true). Bold(true). Foreground(lipgloss.Color("173")). Padding(0, 1) s.Cell = s.Cell.Padding(0, 1) s.Selected = s.Selected. Foreground(lipgloss.Color("230")). Background(lipgloss.Color("57")). Bold(false) t.SetStyles(s) return t } func (m model) View() tea.View { var b strings.Builder fmt.Fprintf(&b, "\n %s\x1b[38;2;32;225;215m%d\x1b[0m\n\n", tr.W("Matched references: "), len(m.references.Items)) if m.ready { // Wrap table with lipgloss to add complete borders tableStyle := lipgloss.NewStyle(). BorderStyle(lipgloss.NormalBorder()). BorderForeground(lipgloss.Color("243")) b.WriteString(tableStyle.Render(m.table.View())) b.WriteString("\n\n") b.WriteString(" " + m.paginator.View()) b.WriteString("\n\n ↑/k ↓/j: navigate • h/l ←/→: page • q: quit\n") } return tea.NewView(b.String()) } // getTerminalWidth returns the terminal width with a default fallback func getTerminalWidth() int { if width, _, err := term.GetSize(int(os.Stdout.Fd())); err == nil && width > 0 { return width } return 80 } type ScanRefs struct { Pattern []string `arg:"" optional:"" name:"pattern" help:"Matching pattern, all references are displayed by default"` CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Oldest bool `short:"O" name:"oldest" help:"Sort by time from oldest to newest"` } func (c *ScanRefs) fixup() { for i, pattern := range c.Pattern { if strings.HasSuffix(pattern, "/") { c.Pattern[i] = pattern + "*" } } } func (c *ScanRefs) Match(name string) bool { if len(c.Pattern) == 0 { return true } for _, pattern := range c.Pattern { if fnmatch.Match(pattern, name, 0) { return true } } return false } func (c *ScanRefs) Run(g *Globals) error { c.fixup() repoPath := git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v", repoPath) order := git.OrderNewest if c.Oldest { order = git.OrderOldest } references, err := refs.ScanReferences(context.Background(), repoPath, c, order) if err != nil { fmt.Fprintf(os.Stderr, "scan references error: %v\n", err) return err } if len(references.Items) == 0 { return nil } p := tea.NewProgram(newModel(references)) if _, err := p.Run(); err != nil { fmt.Fprintf(os.Stderr, "show references error: %v\n", err) return err } return nil } ================================================ FILE: cmd/hot/command/command_show.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "encoding/json" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/diff" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/patchview" "github.com/antgroup/hugescm/modules/term" "github.com/antgroup/hugescm/modules/trace" ) type Show struct { CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Commit string `arg:"" name:"commit" help:"Commit to show" optional:"" default:"HEAD"` JSON bool `name:"json" short:"j" help:"Output patches in JSON format"` } func (c *Show) Run(g *Globals) error { ctx := context.Background() repoPath := git.RevParseRepoPath(ctx, c.CWD) trace.DbgPrint("repository location: %v", repoPath) // Get hash format from repository formatName, err := git.RevParseHashFormat(ctx, repoPath) if err != nil { die("detect hash format: %v", err) return err } hashFormat := git.HashFormatFromName(formatName) trace.DbgPrint("hash format: %s, abbrev: %d", formatName, hashFormat.HexSize()) // Build git show arguments args := []string{ "show", "--patch", "--raw", fmt.Sprintf("--abbrev=%d", hashFormat.HexSize()), "--full-index", "--find-renames=50%", "--format=", c.Commit, } // Create and start command cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), }, "git", args...) stdout, err := cmd.StdoutPipe() if err != nil { die("create stdout pipe: %v", err) return err } defer stdout.Close() // nolint: errcheck if err := cmd.Start(); err != nil { die("start git show: %v", err) return err } // Parse diff output parser := diff.NewParser(hashFormat, stdout, diff.Limits{}) var patches []*diferenco.Patch for parser.Parse() { p := parser.Patch() if p.Patch != nil { patches = append(patches, p.Patch) } } if err := cmd.Wait(); err != nil { die("git show: %v", command.FromError(err)) return err } if perr := parser.Err(); perr != nil { die("parse diff: %v", perr) return perr } trace.DbgPrint("parsed %d patches", len(patches)) // Display using patchview if len(patches) == 0 { fmt.Println("No changes") return nil } // JSON output if c.JSON { encoder := json.NewEncoder(os.Stdout) encoder.SetIndent("", " ") return encoder.Encode(patches) } // Terminal not supported: fallback to plain text output if !term.IsTerminal(os.Stdout.Fd()) { encoder := diferenco.NewUnifiedEncoder(os.Stdout, diferenco.WithVCS("git")) return encoder.Encode(patches) } return patchview.Run(patches) } ================================================ FILE: cmd/hot/command/command_size.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/stat" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Size struct { Paths []string `arg:"" name:"path" help:"Path to repositories" default:"." type:"path"` Limit int64 `short:"L" name:"limit" optional:"" help:"Large file limit size, supported units: KB, MB, GB, K, M, G" default:"20m" type:"size"` Extract bool `short:"E" name:"extract" optional:"" help:"Whether large files exist in the default branch"` FullPath bool `short:"F" name:"full-path" help:"Show full path"` } func (c *Size) Run(g *Globals) error { for _, p := range c.Paths { if err := c.sizeOnce(p); err != nil { fmt.Fprintf(os.Stderr, "show repo '%s' size error: %v\n", p, err) return err } } return nil } func (c *Size) sizeOnce(p string) error { repoPath := git.RevParseRepoPath(context.Background(), p) trace.DbgPrint("check %s size ...", repoPath) e := stat.NewSizeExecutor(c.Limit, c.FullPath) if err := e.Run(context.Background(), repoPath, c.Extract); err != nil { return err } return nil } ================================================ FILE: cmd/hot/command/command_smart.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "charm.land/huh/v2" "github.com/antgroup/hugescm/cmd/hot/pkg/replay" "github.com/antgroup/hugescm/cmd/hot/pkg/stat" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Smart struct { Paths []string `arg:"" name:"path" help:"Path to repositories" default:"." type:"path"` Limit int64 `short:"L" name:"limit" optional:"" help:"Large file limit size, supported units: KB, MB, GB, K, M, G" default:"20m" type:"size"` Confirm bool `short:"Y" name:"confirm" help:"Confirm rewriting local branches and tags"` Prune bool `short:"P" name:"prune" help:"Prune repository when commits are rewritten"` FullPath bool `short:"F" name:"full-path" help:"Show full path"` ALL bool `short:"A" name:"all" help:"Remove all large blobs"` } func (c *Smart) Run(g *Globals) error { for _, p := range c.Paths { if err := c.doOnce(g, p); err != nil { return err } } return nil } func multiSelect(i int, totalBatches int, input []string) ([]string, error) { var paths []string form := huh.NewForm( huh.NewGroup( huh.NewMultiSelect[string](). Title(fmt.Sprintf("%s [%s - %d/%d]:", tr.W("Which files need to be deleted"), tr.W("Batch"), i+1, totalBatches)). Options(huh.NewOptions(input...)...). Value(&paths))) if err := form.Run(); err != nil { return nil, err } return paths, nil } func newMatcher(sz *stat.SizeExecutor, matchAll bool) replay.Matcher { if matchAll { return sz } larges := sz.Paths() selected := make([]string, 0, len(larges)) totalBatches := (len(larges) + 19) / 20 for i := range totalBatches { pathsLen := len(larges) if pathsLen == 0 { break } minGroup := min(20, pathsLen) var paths []string var err error paths, err = multiSelect(i, totalBatches, larges[0:minGroup]) if err != nil { fmt.Fprintf(os.Stderr, "multi select error: %v\n", err) return nil } larges = larges[minGroup:] selected = append(selected, paths...) } if len(selected) == 0 { return nil } fmt.Fprintf(os.Stderr, "%s %d\n", tr.W("The total number of files that will be deleted is:"), len(selected)) return replay.NewEqualer(selected) } func (c *Smart) doOnce(g *Globals, p string) error { repoPath := git.RevParseRepoPath(context.Background(), p) trace.DbgPrint("check %s size ...", p) e := stat.NewSizeExecutor(c.Limit, c.FullPath) if err := e.Run(context.Background(), repoPath, false); err != nil { fmt.Fprintf(os.Stderr, "analyze repo size error: %v\n", err) return err } if len(e.Paths()) == 0 { return nil } if len(e.Paths()) > 300 { fmt.Fprintf(os.Stderr, "%s %d\n", tr.W("You can increase the file size limit, the number of large files: "), len(e.Paths())) return nil } matcher := newMatcher(e, c.ALL) if matcher == nil { return nil } r, err := replay.NewReplayer(context.Background(), repoPath, 3, g.Verbose) if err != nil { fmt.Fprintf(os.Stderr, "new rewriter error: %v\n", err) return err } defer r.Close() // nolint if err := r.Drop(matcher, c.Confirm, c.Prune); err != nil { fmt.Fprintf(os.Stderr, "rewrite repo error: %v\n", err) return err } return nil } ================================================ FILE: cmd/hot/command/command_snapshot.go ================================================ package command import ( "bufio" "context" "errors" "fmt" "io" "os" "path/filepath" "strings" "unicode" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/env" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) const ( snapshotSummaryFormat = `%shot snapshot [] %shot snapshot [] --push [reference] %shot snapshot [] --push [] [] ` ) type Snapshot struct { Message []string `name:"message" short:"m" help:"Use the given message as the commit message. Concatenate multiple -m options as separate paragraphs"` File string `name:"file" short:"F" help:"Take the commit message from the given file. Use - to read the message from the standard input"` Parents []string `name:"parents" short:"p" help:"ID of a parent commit object"` CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Orphan bool `name:"orphan" help:"Create an orphan commit"` Push bool `name:"push" short:"P" help:"Push the worktree snapshot commit to the remote"` Force bool `name:"force" short:"f" help:"Force updates"` UnresolvedArgs []string `arg:"" optional:"" hidden:""` repoPath string `kong:"-"` worktree string `kong:"-"` } func (c *Snapshot) Summary() string { or := W(" or: ") return fmt.Sprintf(snapshotSummaryFormat, W("Usage: "), or, or) } func (c *Snapshot) Passthrough(paths []string) { c.UnresolvedArgs = append(c.UnresolvedArgs, paths...) } func messageReadFrom(r io.Reader) (string, error) { br := bufio.NewScanner(r) lines := make([]string, 0, 10) for br.Scan() { line := strings.TrimRightFunc(br.Text(), unicode.IsSpace) if strings.HasPrefix(line, "#") { break } lines = append(lines, line) } if br.Err() != nil { return "", br.Err() } var pos int for i, n := range lines { if len(n) != 0 { pos = i break } } lines = lines[pos:] if len(lines) == 0 { return "", nil } lines[0] = strings.TrimSpace(lines[0]) if lines[len(lines)-1] != "" { lines = append(lines, "") } return strings.Join(lines, "\n"), nil } func messageReadFromPath(p string) (string, error) { fd, err := os.Open(p) if err != nil { return "", err } defer fd.Close() // nolint return messageReadFrom(fd) } func genMessage(message []string) string { if len(message) == 0 { return "" } lines := make([]string, 0, 10) lines = append(lines, strings.Split(message[0], "\n")...) if len(message) > 1 { lines = append(lines, message[1:]...) } var pos int for i, n := range lines { if len(n) != 0 { pos = i break } } lines = lines[pos:] if len(lines) == 0 { return "" } lines[0] = strings.TrimSpace(lines[0]) if lines[len(lines)-1] != "" { lines = append(lines, "") } return strings.Join(lines, "\n") } func (c *Snapshot) genMessage() (message string, err error) { switch { case c.File == "-": if message, err = messageReadFrom(os.Stdin); err != nil { die("read messsage from stdin: %v", err) return } case len(c.File) != 0: if message, err = messageReadFromPath(c.File); err != nil { die("read messsage from %s: %v", c.File, err) return } default: message = genMessage(c.Message) } if len(message) == 0 { fmt.Fprintln(os.Stderr, W("Aborting commit due to empty commit message.")) return "", errors.New("not allow empty message") } return } func (c *Snapshot) snapshotWriteIndex(ctx context.Context, snapshotEnv []string, treeish string) error { psArgs := []string{"read-tree"} if len(treeish) != 0 && !git.IsHashZero(treeish) { if !git.ValidateReferenceName([]byte(treeish)) { return fmt.Errorf("bad revision name '%s'", treeish) } psArgs = append(psArgs, "--", treeish) } else { psArgs = append(psArgs, "--empty") } cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: c.repoPath, Environ: snapshotEnv, Stderr: os.Stderr, NoSetpgid: true, }, "git", psArgs...) return cmd.RunEx() } func (c *Snapshot) addALL(ctx context.Context, snapshotEnv []string) error { cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: c.worktree, Environ: snapshotEnv, Stderr: os.Stderr, NoSetpgid: true, }, "git", "add", "-A") return cmd.RunEx() } func (c *Snapshot) writeTree(ctx context.Context, snapshotEnv []string) (string, error) { cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: c.repoPath, Stderr: os.Stderr, Environ: snapshotEnv, NoSetpgid: true, }, "git", "write-tree") treeID, err := cmd.OneLine() if err != nil { return "", err } return treeID, nil } func (c *Snapshot) doSnapshot(ctx context.Context, basePoint string) (string, error) { snapshotIndex := filepath.Join(c.repoPath, "snapshot.index") // INDEX file snapshotEnv := env.SanitizeEnv("GIT_INDEX_VERSION", "GIT_INDEX_FILE") snapshotEnv = append(snapshotEnv, "GIT_INDEX_VERSION=4", "GIT_INDEX_FILE="+snapshotIndex, ) if err := c.snapshotWriteIndex(ctx, snapshotEnv, basePoint); err != nil { die("git read-tree error: %v", err) return "", err } if err := c.addALL(ctx, snapshotEnv); err != nil { die("git add error: %v", err) return "", err } treeOID, err := c.writeTree(ctx, snapshotEnv) if err != nil { die("git write-tree: %v", err) return "", err } trace.DbgPrint("new tree: %s", treeOID) message, err := c.genMessage() if err != nil { return "", err } psArgs := []string{ "commit-tree", "-F", "-", } parents := c.Parents if len(parents) == 0 && !c.Orphan { parents = append(parents, basePoint) } for _, parent := range parents { if parent == "" || git.IsHashZero(parent) { continue } psArgs = append(psArgs, "-p", parent) } psArgs = append(psArgs, treeOID) stdin := strings.NewReader(message) cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: c.repoPath, Stdin: stdin, Stderr: os.Stderr, Environ: snapshotEnv, NoSetpgid: true, }, "git", psArgs...) commitID, err := cmd.OneLine() if err != nil { die("git commit-tree error: %v", err) return "", err } return commitID, nil } func (c *Snapshot) Run(g *Globals) error { var remote, refname string if c.Push { switch len(c.UnresolvedArgs) { case 0: die("hot snapshot --push require remote refname") return errors.New("missing args") case 1: remote = "origin" refname = c.UnresolvedArgs[0] default: remote = c.UnresolvedArgs[0] refname = c.UnresolvedArgs[1] } } var err error if c.worktree, err = git.RevParseWorktree(context.Background(), c.CWD); err != nil { die("can only be run on non-bare repositories, error: %v", err) return err } c.repoPath = git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v", c.repoPath) current, basePoint, err := git.RevParseCurrent(context.Background(), os.Environ(), c.repoPath) if err != nil { die("rev-parse HEAD: %v", err) return err } trace.DbgPrint("current '%s' commit: %s", current, basePoint) commit, err := c.doSnapshot(context.Background(), basePoint) if err != nil { return err } fmt.Fprintln(os.Stderr, W("new snapshot commit:")) _, _ = fmt.Fprintln(os.Stdout, commit) if !c.Push { return nil } trace.DbgPrint("remote %s reference: %s", remote, refname) psArgs := []string{"push"} if c.Force { psArgs = append(psArgs, "-f") } psArgs = append(psArgs, remote, fmt.Sprintf("%s:%s", commit, refname)) cmd := command.NewFromOptions(context.Background(), &command.RunOpts{ RepoPath: c.repoPath, Environ: os.Environ(), Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, NoSetpgid: true, }, "git", psArgs...) if err := cmd.RunEx(); err != nil { return err } return nil } ================================================ FILE: cmd/hot/command/command_stat.go ================================================ package command import ( "context" "github.com/antgroup/hugescm/cmd/hot/pkg/stat" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Stat struct { CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Limit int64 `short:"L" name:"limit" optional:"" help:"Large file limit size, supported units: KB, MB, GB, K, M, G" default:"20m" type:"size"` } func (c *Stat) Run(g *Globals) error { repoPath := git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v", repoPath) return stat.Stat(context.Background(), &stat.StatOptions{ RepoPath: repoPath, Limit: c.Limit, }) } ================================================ FILE: cmd/hot/command/command_unbranch.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/replay" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type Unbranch struct { Revision string `arg:"" optional:"" name:"revision" help:"Linearize the specified revision history"` CWD string `short:"C" name:"cwd" help:"Specify repository location" default:"." type:"path"` Confirm bool `short:"Y" name:"confirm" help:"Confirm rewriting local branches and tags"` Prune bool `short:"P" name:"prune" help:"Prune repository when commits are rewritten"` Target string `short:"T" name:"target" help:"Save linearized branches to new target"` Keep int `short:"K" name:"keep" help:"Keep the number of commits, 0 keeps all commits"` } func (c *Unbranch) Run(g *Globals) error { if len(c.Revision) == 0 && c.Keep != 0 { fmt.Fprintf(os.Stderr, "%s\n", tr.W("unbranch unspecified branch mode is incompatible with --keep")) return errors.New("unbranch unspecified branch mode is incompatible with --keep") } if len(c.Target) != 0 { if !git.ValidateBranchName([]byte(c.Target)) { fmt.Fprintf(os.Stderr, "invalid branch name '%s'\n", c.Target) return errors.New("bad branch name") } } repoPath := git.RevParseRepoPath(context.Background(), c.CWD) trace.DbgPrint("repository location: %v", repoPath) r, err := replay.NewReplayer(context.Background(), repoPath, 2, g.Verbose) if err != nil { fmt.Fprintf(os.Stderr, "new replayer error: %v\n", err) return err } defer r.Close() // nolint if err := r.Unbranch(&replay.UnbranchOptions{ Branch: c.Revision, Target: c.Target, Confirm: c.Confirm, Prune: c.Prune, Keep: c.Keep, }); err != nil { fmt.Fprintf(os.Stderr, "Linearize repo history error: %v\n", err) return err } return nil } ================================================ FILE: cmd/hot/command/misc.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "bytes" "errors" "fmt" "io" "math" "os" "reflect" "strconv" "strings" "time" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/pkg/kong" ) var ( ErrSyntaxSize = errors.New("size synatx error") ) const ( Byte int64 = 1 << (iota * 10) KiByte MiByte GiByte TiByte PiByte EiByte ) var ( sizeRatio = map[string]int64{ "b": 1, "k": KiByte, "m": MiByte, "g": GiByte, "t": TiByte, "p": PiByte, "e": EiByte, } ) func decodeSize(text string) (int64, error) { text = strings.TrimSuffix(strings.ToLower(text), "b") for s, ratio := range sizeRatio { if strings.HasSuffix(text, s) { i, err := strconv.ParseInt(strings.TrimSpace(text[0:len(text)-len(s)]), 10, 64) if err != nil { return 0, err } return i * ratio, nil } } return strconv.ParseInt(text, 10, 64) } func SizeDecoder() kong.MapperFunc { return func(ctx *kong.DecodeContext, target reflect.Value) error { t, err := ctx.Scan.PopValue("string") if err != nil { return err } var sv string switch v := t.Value.(type) { case string: sv = v default: return fmt.Errorf("expected a string value but got %q (%T)", t, t.Value) } i, err := decodeSize(sv) if err != nil { return err } if target.Kind() != reflect.Int64 { return fmt.Errorf("internal error: type 'size' only works with fields of type int64; got %s", target.Type()) } target.SetInt(i) return nil } } var ( typeLen = map[string]int64{ "seconds": 1, "minutes": 60, "hours": 60 * 60, "days": 24 * 60 * 60, "weeks": 7 * 24 * 60 * 60, } ) func parseTime(str string) (int64, error) { if tt, err := time.Parse(time.RFC3339, str); err == nil { d := time.Until(tt) return int64(d.Seconds()), nil } if d, err := strengthen.ParseDuration(str); err == nil { return int64(d.Seconds()), nil } vv := strings.FieldsFunc(str, func(r rune) bool { return r == '.' || r == ' ' }) if len(vv) != 3 { return 0, fmt.Errorf("bad expire %s", str) } x, err := strconv.ParseInt(vv[0], 10, 64) if err != nil { return 0, err } l := typeLen[vv[1]] if l == 0 { return 0, fmt.Errorf("bad expire %s", vv[1]) } return x * l, nil } // expire func ExpireDecoder() kong.MapperFunc { return func(ctx *kong.DecodeContext, target reflect.Value) error { t, err := ctx.Scan.PopValue("string") if err != nil { return err } var sv string switch v := t.Value.(type) { case string: sv = v default: return fmt.Errorf("expected a string value but got %q (%T)", t, t.Value) } switch sv { case "never", "false": target.SetInt(math.MaxInt64) case "all", "now": target.SetInt(0) default: t, err := parseTime(sv) if err != nil { return err } target.SetInt(t * int64(time.Second)) } return nil } } type NopWriteCloser struct { io.Writer } func (NopWriteCloser) Close() error { return nil } func W(a string) string { return tr.W(a) } func die(format string, a ...any) { var b bytes.Buffer _, _ = b.WriteString(W("fatal: ")) fmt.Fprintf(&b, W(format), a...) _ = b.WriteByte('\n') _, _ = os.Stderr.Write(b.Bytes()) } ================================================ FILE: cmd/hot/command/pager.go ================================================ package command import ( "context" "io" "os" "os/exec" "github.com/antgroup/hugescm/modules/env" "github.com/antgroup/hugescm/modules/shlex" "github.com/antgroup/hugescm/modules/term" ) type Printer interface { io.WriteCloser ColorMode() term.Level EnableColor() bool } type WrapPrinter struct { io.WriteCloser } func (WrapPrinter) ColorMode() term.Level { return term.LevelNone } func (WrapPrinter) EnableColor() bool { return false } // https://github.com/sharkdp/bat/blob/master/src/less.rs func lookupPager() (string, bool) { pager, ok := os.LookupEnv("GIT_PAGER") if ok { return pager, ok } return os.LookupEnv("PAGER") } type printer struct { w io.Writer colorMode term.Level closeFn func() error } func (p *printer) EnableColor() bool { return p.colorMode != term.LevelNone } func (p *printer) ColorMode() term.Level { return p.colorMode } func (p *printer) Write(b []byte) (n int, err error) { return p.w.Write(b) } func (p *printer) Close() error { if p.closeFn == nil { return nil } return p.closeFn() } func NewPrinter(ctx context.Context) *printer { if term.StdoutLevel == term.LevelNone { return &printer{w: os.Stdout, colorMode: term.StdoutLevel} } pager, ok := lookupPager() if ok && len(pager) == 0 { // PAGER disabled return &printer{w: os.Stdout, colorMode: term.StdoutLevel} } if len(pager) == 0 { pager = "less" // search pager } pagerArgs := make([]string, 0, 4) if cmdArgs, _ := shlex.Split(pager, true); len(cmdArgs) > 0 { pager = cmdArgs[0] pagerArgs = append(pagerArgs, cmdArgs[1:]...) } pagerExe, err := env.LookupPager(pager) if err != nil { return &printer{w: os.Stdout, colorMode: term.StdoutLevel} } cmd := exec.CommandContext(ctx, pagerExe, pagerArgs...) cmd.Env = env.SanitizeEnv("PAGER", "LESS", "LV") // AVOID PAGER ENV // PAGER_ENV: LESS=FRX LV=-c cmd.Env = append(cmd.Env, "LESS=FRX", "LV=-c") stdin, err := cmd.StdinPipe() if err != nil { return &printer{w: os.Stdout, colorMode: term.StdoutLevel} } cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Start(); err != nil { _ = stdin.Close() return &printer{w: os.Stdout, colorMode: term.StdoutLevel} } return &printer{w: stdin, colorMode: term.StdoutLevel, closeFn: func() error { _ = stdin.Close() if err := cmd.Wait(); err != nil { return err } return nil }} } ================================================ FILE: cmd/hot/crate.toml ================================================ name = "hot" description = "HugeSCM - A next generation cloud-based version control system" destination = "bin" version = "0.23.0" goflags = [ "-ldflags", "-X github.com/antgroup/hugescm/pkg/version.version=$BUILD_VERSION -X github.com/antgroup/hugescm/pkg/version.buildTime=$BUILD_TIME -X github.com/antgroup/hugescm/pkg/version.buildCommit=$BUILD_COMMIT -X github.com/antgroup/hugescm/pkg/version.telemetry=true", ] ================================================ FILE: cmd/hot/hot.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "os" "github.com/antgroup/hugescm/cmd/hot/command" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/env" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/version" ) type App struct { command.Globals Cat command.Cat `cmd:"cat" help:"Provide contents or details of repository objects"` Stat command.Stat `cmd:"stat" help:"View repository status"` Size command.Size `cmd:"size" help:"Show repositories size and large files"` Remove command.Remove `cmd:"remove" help:"Remove files in repository and rewrite history"` Smart command.Smart `cmd:"smart" help:"Interactive mode to clean repository large files"` Graft command.Graft `cmd:"graft" help:"Interactive mode to clean repository large files (Grafting mode)"` Mc command.Mc `cmd:"mc" help:"Migrate a repository to the specified object format"` Unbranch command.Unbranch `cmd:"unbranch " help:"Linearize repository history"` PruneRefs command.PruneRefs `cmd:"prune-refs" help:"Prune refs by prefix"` ScanRefs command.ScanRefs `cmd:"scan-refs" help:"Scan references in a local repository"` ExpireRefs command.ExpireRefs `cmd:"expire-refs" help:"Clean up expired references"` Snapshot command.Snapshot `cmd:"snapshot" help:"Create a snapshot commit for the worktree"` Az command.Az `cmd:"az" help:"Analyze repository large files"` Co command.Co `cmd:"co" help:"EXPERIMENTAL: Clones a repository into a newly created directory"` Diff command.Diff `cmd:"diff" help:"Show changes between commits, commit and working tree, etc"` Show command.Show `cmd:"show" help:"Show the changes introduced by a commit"` Debug bool `name:"debug" help:"Enable debug mode; analyze timing"` } func main() { // delay initilaize git env _ = env.DelayInitializeEnv() // initialize locale _ = tr.DelayInitializeLocale() kong.BindW(tr.W) // replace W var app App ctx := kong.Parse(&app, kong.NamedMapper("size", command.SizeDecoder()), kong.NamedMapper("expire", command.ExpireDecoder()), kong.Name("hot"), kong.Description(tr.W("hot - Git repositories maintenance tool")), kong.UsageOnError(), kong.ConfigureHelp(kong.HelpOptions{ Compact: true, NoExpandSubcommands: true, }), kong.Vars{ "version": version.GetVersionString(), }, ) if app.Verbose { trace.EnableDebugMode() } m := strengthen.NewMeasurer("hot", app.Debug) defer m.Close() err := ctx.Run(&app.Globals) if err != nil { os.Exit(1) } } ================================================ FILE: cmd/hot/pkg/README.md ================================================ # hot pkg ================================================ FILE: cmd/hot/pkg/co/co.go ================================================ package co import ( "context" "fmt" "os" "slices" "strconv" "sync" "time" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" ) type CoOptions struct { Remote, Destination string Branch, Commit string Sparse []string Depth int Limit int64 Recursive bool Values []string } var ( newEnviron = sync.OnceValue(func() []string { env := slices.Clone(os.Environ()) if ua, ok := NewUserAgent(); ok { env = append(env, "GIT_USER_AGENT="+ua) } return env }) ) func run(ctx context.Context, repoPath string, cmdArg0 string, args ...string) error { now := time.Now() cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: repoPath, Environ: newEnviron(), Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, cmdArg0, args...) if err := cmd.Run(); err != nil { return err } trace.DbgPrint("exec: %s spent: %v", cmd.String(), time.Since(now)) return nil } func fetch(ctx context.Context, o *CoOptions) error { now := time.Now() if err := git.NewRepo(ctx, o.Destination, git.ReferenceNameDefault, false, git.HashFormatFromSize(len(o.Commit))); err != nil { fmt.Fprintf(os.Stderr, "initialize repository '%s' error: %v\n", o.Destination, err) return err } if err := run(ctx, o.Destination, "git", "config", "index.version", "4"); err != nil { fmt.Fprintf(os.Stderr, "config index v4 error: %v\n", err) return err } if err := run(ctx, o.Destination, "git", "remote", "add", "origin", o.Remote); err != nil { fmt.Fprintf(os.Stderr, "add remote error: %v\n", err) return err } if len(o.Sparse) != 0 { if err := sparseCheckout(ctx, o); err != nil { return err } } fetchArgs := make([]string, 0, 10+len(o.Values)*2) for _, v := range o.Values { fetchArgs = append(fetchArgs, "-c", v) } fetchArgs = append(fetchArgs, "fetch") if o.Depth > 0 && o.Depth < 20 { fetchArgs = append(fetchArgs, "--depth="+strconv.Itoa(o.Depth)) } fetchArgs = append(fetchArgs, "origin", o.Commit) if err := run(ctx, o.Destination, "git", fetchArgs...); err != nil { fmt.Fprintf(os.Stderr, "fetch error: %v", err) return err } // git switch [] [--no-guess] // git switch [] --detach [] // git switch [] (-c|-C) [] // git switch [] --orphan switchArgs := make([]string, 0, 10) switchArgs = append(switchArgs, "switch") if len(o.Branch) == 0 { switchArgs = append(switchArgs, "--detach", o.Commit) } else { switchArgs = append(switchArgs, "-c", o.Branch, o.Commit) } if err := run(ctx, o.Destination, "git", switchArgs...); err != nil { fmt.Fprintf(os.Stderr, "switch error: %v", err) return err } if o.Recursive { submoduleArgs := make([]string, 0, 5+len(o.Values)*2) for _, v := range o.Values { submoduleArgs = append(submoduleArgs, "-c", v) } submoduleArgs = append(submoduleArgs, "submodule", "update", "--init", "--recursive", "--recommend-shallow") if err := run(ctx, o.Destination, "git", submoduleArgs...); err != nil { fmt.Fprintf(os.Stderr, "switch error: %v", err) return err } } _, _ = tr.Fprintf(os.Stderr, "Cloning to '%s' completed, spent: %v.\n", o.Destination, time.Since(now)) return nil } func Co(ctx context.Context, o *CoOptions) error { if len(o.Commit) != 0 && !git.IsGitVersionAtLeast(git.NewVersion(2, 50, 0)) { return fetch(ctx, o) } return clone(ctx, o) } func sparseCheckout(ctx context.Context, o *CoOptions) error { now := time.Now() // https://git-scm.com/docs/git-sparse-checkout#Documentation/git-sparse-checkout.txt-emsetem cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: o.Destination, Environ: newEnviron(), Stderr: os.Stderr, Stdout: os.Stdout, NoSetpgid: true, }, "git", "sparse-checkout", "set", "--cone", "--sparse-index", "--stdin") stdin, err := cmd.StdinPipe() if err != nil { fmt.Fprintf(os.Stderr, "initialize sparse checkout error: %v\n", err) return err } if err := cmd.Start(); err != nil { fmt.Fprintf(os.Stderr, "initialize sparse checkout error: %v\n", err) _ = stdin.Close() return err } // https://git-scm.com/docs/git-sparse-checkout#Documentation/git-sparse-checkout.txt-codegitsparse-checkoutsetMYDIR1SUBDIR2code for _, s := range o.Sparse { if _, err := stdin.Write([]byte(s + "\n")); err != nil { fmt.Fprintf(os.Stderr, "initialize sparse checkout error: %v\n", err) _ = stdin.Close() _ = cmd.Wait() return err } } _ = stdin.Close() if err := cmd.Wait(); err != nil { fmt.Fprintf(os.Stderr, "initialize sparse checkout error: %v\n", err) return err } trace.DbgPrint("git space-checkout spent: %v", time.Since(now)) return nil } func clone(ctx context.Context, o *CoOptions) error { now := time.Now() cloneArgs := make([]string, 0, 20+len(o.Values)*2) for _, v := range o.Values { cloneArgs = append(cloneArgs, "-c", v) } cloneArgs = append(cloneArgs, "-c", "index.version=4", "-c", "advice.detachedHead=false", "clone") switch { case len(o.Sparse) != 0 && o.Limit >= 0: cloneArgs = append(cloneArgs, "--sparse", fmt.Sprintf("--filter=blob:limit=%d", o.Limit), "--no-checkout") case len(o.Sparse) != 0: cloneArgs = append(cloneArgs, "--sparse", "--filter=blob:none", "--no-checkout") case o.Limit >= 0: cloneArgs = append(cloneArgs, fmt.Sprintf("--filter=blob:limit=%d", o.Limit)) } switch { case len(o.Commit) != 0: cloneArgs = append(cloneArgs, "--revision", o.Commit) case len(o.Branch) != 0: cloneArgs = append(cloneArgs, "--single-branch", "--branch", o.Branch) } if o.Depth > 0 && o.Depth < 20 { cloneArgs = append(cloneArgs, "--depth="+strconv.Itoa(o.Depth)) } if o.Recursive { cloneArgs = append(cloneArgs, "recursive", "--shallow-submodules") // submodule shallow } cloneArgs = append(cloneArgs, o.Remote, o.Destination) cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: newEnviron(), Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, "git", cloneArgs...) if err := cmd.Run(); err != nil { fmt.Fprintf(os.Stderr, "clone error: %v", err) return err } if len(o.Branch) != 0 && len(o.Commit) != 0 { if err := run(ctx, o.Destination, "git", "switch", "-c", o.Branch, o.Commit); err != nil { fmt.Fprintf(os.Stderr, "switch error: %v", err) return err } } trace.DbgPrint("git clone spent: %v", time.Since(now)) if len(o.Sparse) != 0 { if err := sparseCheckout(ctx, o); err != nil { return err } if err := run(ctx, o.Destination, "git", "checkout", "HEAD"); err != nil { fmt.Fprintf(os.Stderr, "checkout error: %v\n", err) return err } } _, _ = tr.Fprintf(os.Stderr, "Cloning to '%s' completed, spent: %v.\n", o.Destination, time.Since(now)) return nil } ================================================ FILE: cmd/hot/pkg/co/misc.go ================================================ package co import ( "fmt" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/pkg/version" ) func NewUserAgent() (string, bool) { if !version.TelemetryEnabled() { return "", false } u, err := version.Uname() if err != nil { return "", false } v, err := git.VersionDetect() if err != nil { return "", false } return fmt.Sprintf("git/%s (%s; %s; %s; %s)", v, u.Node, u.Name, u.Machine, u.Release), true } ================================================ FILE: cmd/hot/pkg/co/misc_test.go ================================================ package co import ( "fmt" "os" "testing" ) func TestNewUserAgent(t *testing.T) { u, ok := NewUserAgent() if ok { fmt.Fprintf(os.Stderr, "New user-agent: %s\n", u) } } ================================================ FILE: cmd/hot/pkg/diff/diff.go ================================================ // Package diff provides a parser for git diff output. // It parses the output of: git diff --raw --full-index --find-renames // Based on gitaly's implementation (MIT License). package diff import ( "bufio" "bytes" "errors" "fmt" "io" "path/filepath" "regexp" "strconv" "strings" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/git" ) // Patch represents a single parsed diff entry, extending diferenco.Patch with git metadata. type Patch struct { *diferenco.Patch Status byte // 'A', 'D', 'M', 'R', 'C', 'T' etc. Binary bool OverflowMarker bool Collapsed bool TooLarge bool CollectAllPaths bool PatchSize int32 LinesAdded int32 LinesRemoved int32 lineCount int byteCount int } // Reset clears all fields of p in a way that lets the underlying memory be reused. func (p *Patch) Reset() { *p = Patch{Patch: &diferenco.Patch{}} } // ClearPatch clears only the patch content. func (p *Patch) ClearPatch() { if p.Patch != nil { p.Hunks = nil } } // Parser holds necessary state for parsing a diff stream. type Parser struct { hashFormat git.HashFormat limits Limits patchReader *bufio.Reader rawLines [][]byte currentPatch Patch nextPatchFromPath []byte unreadLine []byte filesProcessed int scannedLines int // Total lines scanned (never decreases) scannedBytes int // Total bytes scanned (never decreases) finished bool stopPatchCollection bool err error } // Limits holds the limits at which either parsing stops or patches are collapsed. type Limits struct { // EnforceLimits causes parsing to stop if Max{Files,Lines,Bytes} is reached. EnforceLimits bool // CollapseDiffs causes patches to be emptied after SafeMax{Files,Lines,Bytes} reached. CollapseDiffs bool // CollectAllPaths parses all diffs but info outside of path may be empty. CollectAllPaths bool // MaxFiles is the maximum number of files to parse. MaxFiles int // MaxLines is the maximum number of diff lines to parse. MaxLines int // MaxBytes is the maximum number of bytes to parse. MaxBytes int // SafeMaxFiles is the number of files after which subsequent files are collapsed. SafeMaxFiles int // SafeMaxLines is the number of lines after which subsequent files are collapsed. SafeMaxLines int // SafeMaxBytes is the number of bytes after which subsequent files are collapsed. SafeMaxBytes int // MaxPatchBytes is the maximum bytes a single patch can have. MaxPatchBytes int // MaxPatchBytesForFileExtension overrides MaxPatchBytes for specific file types. MaxPatchBytesForFileExtension map[string]int // PatchLimitsOnly uses only MaxPatchBytes limits, ignoring cumulative limits. PatchLimitsOnly bool } const ( maxFilesUpperBound = 5000 maxLinesUpperBound = 250000 maxBytesUpperBound = 5000 * 5120 // 24MB safeMaxFilesUpperBound = 500 safeMaxLinesUpperBound = 25000 safeMaxBytesUpperBound = 500 * 5120 // 2.4MB maxPatchBytesUpperBound = 512000 // 500KB ) var ( rawSHA1LineRegexp = regexp.MustCompile(`(?m)^:(\d+) (\d+) ([[:xdigit:]]{40}) ([[:xdigit:]]{40}) ([ADTUXMRC]\d*)\t(.*?)(?:\t(.*?))?$`) rawSHA256LineRegexp = regexp.MustCompile(`(?m)^:(\d+) (\d+) ([[:xdigit:]]{64}) ([[:xdigit:]]{64}) ([ADTUXMRC]\d*)\t(.*?)(?:\t(.*?))?$`) ) // NewParser returns a new Parser. func NewParser(hashFormat git.HashFormat, src io.Reader, limits Limits) *Parser { limits.enforceUpperBound() parser := &Parser{ hashFormat: hashFormat, limits: limits, } reader := bufio.NewReader(src) parser.cacheRawLines(reader) parser.patchReader = reader return parser } // Parse parses a single diff. It returns true if successful, false if finished or error. func (parser *Parser) Parse() bool { if parser.finished || len(parser.rawLines) == 0 { return false } if err := parser.initializeCurrentPatch(); err != nil { return false } if parser.nextPatchFromPath == nil { path, err := parser.readDiffHeaderFromPath() if err != nil { parser.err = err return false } parser.nextPatchFromPath = path } if !bytes.Equal(parser.nextPatchFromPath, parser.currentPatchFromPath()) { // The current diff has an empty patch return true } parser.nextPatchFromPath = nil if err := readNextDiff(parser.patchReader, &parser.currentPatch, parser.stopPatchCollection); err != nil { parser.err = err return false } parser.scannedLines += parser.currentPatch.lineCount parser.scannedBytes += parser.currentPatch.byteCount // Calculate PatchSize from hunks parser.currentPatch.PatchSize = int32(parser.currentPatch.byteCount) if parser.limits.CollapseDiffs && parser.isOverSafeLimits() && parser.currentPatch.lineCount > 0 { parser.prunePatch() parser.currentPatch.Collapsed = true if parser.limits.CollectAllPaths { parser.currentPatch.CollectAllPaths = true } } if parser.limits.EnforceLimits { maxPatchBytesExceeded := parser.limits.MaxPatchBytes > 0 && parser.currentPatch.byteCount >= parser.maxPatchBytesForCurrentFile() if maxPatchBytesExceeded { parser.prunePatch() parser.currentPatch.TooLarge = true } maxFilesExceeded := exceeded(parser.filesProcessed, parser.limits.MaxFiles) maxLinesExceeded := exceeded(parser.scannedLines, parser.limits.MaxLines) maxBytesExceeded := exceeded(parser.scannedBytes, parser.limits.MaxBytes) maxLimitsExceeded := maxLinesExceeded || maxBytesExceeded || maxFilesExceeded if maxLimitsExceeded && !parser.limits.PatchLimitsOnly { if parser.limits.CollectAllPaths { parser.currentPatch.CollectAllPaths = true parser.currentPatch.ClearPatch() parser.stopPatchCollection = true } else { parser.finished = true parser.currentPatch.Reset() } parser.currentPatch.OverflowMarker = true } } return true } // Patch returns a successfully parsed patch. Valid until next Parse() call. func (parser *Parser) Patch() *Patch { return &parser.currentPatch } // Err returns the error encountered during parsing. func (parser *Parser) Err() error { return parser.err } func (parser *Parser) currentPatchFromPath() []byte { if parser.currentPatch.From != nil { return []byte(parser.currentPatch.From.Name) } if parser.currentPatch.To != nil { return []byte(parser.currentPatch.To.Name) } return nil } func (parser *Parser) cacheRawLines(reader *bufio.Reader) { for { line, err := reader.ReadBytes('\n') if err != nil { if errors.Is(err, io.EOF) { // Handle EOF with data - last line without newline if len(line) > 0 { if bytes.HasPrefix(line, []byte(":")) { parser.rawLines = append(parser.rawLines, line) } else { parser.unreadLine = line } } } else { parser.err = err parser.finished = true } return } if !bytes.HasPrefix(line, []byte(":")) { // Store the non-raw line for later use parser.unreadLine = line return } parser.rawLines = append(parser.rawLines, line) } } func (parser *Parser) nextRawLine() []byte { if len(parser.rawLines) == 0 { return nil } line := parser.rawLines[0] parser.rawLines = parser.rawLines[1:] return line } func (parser *Parser) initializeCurrentPatch() error { parser.currentPatch.Reset() line := parser.nextRawLine() if line == nil { return nil } if err := parseRawLine(parser.hashFormat, line, &parser.currentPatch); err != nil { parser.err = err return err } if parser.currentPatch.Status == 'T' { parser.handleTypeChangeDiff() } parser.filesProcessed++ return nil } func (parser *Parser) readDiffHeaderFromPath() ([]byte, error) { var line []byte var err error for { // Use unread line if available if len(parser.unreadLine) > 0 { line = parser.unreadLine parser.unreadLine = nil } else { line, err = parser.patchReader.ReadBytes('\n') if err != nil { if errors.Is(err, io.EOF) { // Handle EOF with data - last line without newline if len(line) > 0 { // Process the last line } else { return nil, nil } } else { return nil, fmt.Errorf("read diff header line: %w", err) } } } // Skip empty lines if len(bytes.TrimSpace(line)) == 0 { continue } // Skip non-diff-header lines (index, ---, +++, new file mode, deleted file mode, etc.) if bytes.HasPrefix(line, []byte("index ")) || bytes.HasPrefix(line, []byte("---")) || bytes.HasPrefix(line, []byte("+++")) || bytes.HasPrefix(line, []byte("new file mode ")) || bytes.HasPrefix(line, []byte("deleted file mode ")) || bytes.HasPrefix(line, []byte("old mode ")) || bytes.HasPrefix(line, []byte("new mode ")) || bytes.HasPrefix(line, []byte("similarity index ")) || bytes.HasPrefix(line, []byte("copy from ")) || bytes.HasPrefix(line, []byte("copy to ")) || bytes.HasPrefix(line, []byte("rename from ")) || bytes.HasPrefix(line, []byte("rename to ")) { continue } // Hand-parse diff --git header instead of regex path, err := parseDiffHeaderPath(line) if err != nil { return nil, err } return path, nil } } // parseDiffHeaderPath hand-parses "diff --git a/path b/path" to extract the from-path // This function properly handles quoted paths with escape sequences func parseDiffHeaderPath(line []byte) ([]byte, error) { // Must start with "diff --git " if !bytes.HasPrefix(line, []byte("diff --git ")) { return nil, fmt.Errorf("not a diff --git header: %q", line) } line = line[11:] // Skip "diff --git " // Parse two paths: "a/path" "b/path" or "a/path" b/path or a/path b/path paths, err := parseTwoPaths(line) if err != nil { return nil, err } if len(paths) != 2 { return nil, fmt.Errorf("expected 2 paths in diff header, got %d", len(paths)) } // Extract first path (from-path) path1 := paths[0] // Verify it starts with "a/" if !bytes.HasPrefix(path1, []byte("a/")) { return nil, fmt.Errorf("first path must start with a/: %q", path1) } // Verify second path starts with "b/" if len(paths) > 1 && !bytes.HasPrefix(paths[1], []byte("b/")) { return nil, fmt.Errorf("second path must start with b/: %q", paths[1]) } // Strip "a/" prefix and unescape path := path1[2:] return unescape(path), nil } // parseTwoPaths parses two paths from a diff header line // Handles both quoted and unquoted paths func parseTwoPaths(line []byte) ([][]byte, error) { var paths [][]byte for len(line) > 0 && len(paths) < 2 { // Skip leading whitespace line = bytes.TrimSpace(line) if len(line) == 0 { break } var path []byte var err error if line[0] == '"' { // Quoted path: find matching quote handling escape sequences path, line, err = parseQuotedPath(line) if err != nil { return nil, err } // Unquote after extracting the path path = unquoteBytes(path) } else { // Unquoted path: find next whitespace or end path, line = parseUnquotedPath(line) } if len(path) > 0 { paths = append(paths, path) } } return paths, nil } // parseQuotedPath parses a quoted path, handling escape sequences func parseQuotedPath(line []byte) ([]byte, []byte, error) { if len(line) == 0 || line[0] != '"' { return nil, line, fmt.Errorf("expected quoted path") } // Find matching quote, handling escape sequences i := 1 for i < len(line) { if line[i] == '\\' && i+1 < len(line) { // Skip escaped character (handles \", \\, and other escapes) i += 2 continue } if line[i] == '"' { // Found matching quote path := line[:i+1] remaining := line[i+1:] return path, remaining, nil } i++ } return nil, line, fmt.Errorf("unclosed quote in path: %q", line) } // parseUnquotedPath parses an unquoted path up to next whitespace func parseUnquotedPath(line []byte) ([]byte, []byte) { i := 0 for i < len(line) && line[i] != ' ' && line[i] != '\t' { i++ } return line[:i], line[i:] } func (parser *Parser) handleTypeChangeDiff() { // Type change: split into deletion + addition // Use To.Name for synthetic add path, not From.Name newRawLine := fmt.Sprintf( ":%o %o %s %s A\t%s\n", 0, parser.currentPatch.To.Mode, parser.hashFormat.ZeroOID(), parser.currentPatch.To.Hash, parser.currentPatch.To.Name, ) parser.currentPatch.From = &diferenco.File{ Name: parser.currentPatch.From.Name, Hash: parser.currentPatch.From.Hash, Mode: 0, } parser.currentPatch.To = nil parser.rawLines = append([][]byte{[]byte(newRawLine)}, parser.rawLines...) } func parseRawLine(hashFormat git.HashFormat, line []byte, patch *Patch) error { var re *regexp.Regexp switch hashFormat { case git.HashSHA1: re = rawSHA1LineRegexp case git.HashSHA256: re = rawSHA256LineRegexp default: return fmt.Errorf("cannot parse raw diff line with unknown hash format %q", hashFormat) } matches := re.FindSubmatch(line) if len(matches) == 0 { return fmt.Errorf("raw line regexp mismatch") } oldMode, err := strconv.ParseInt(string(matches[1]), 8, 32) if err != nil { return fmt.Errorf("parse old mode: %w", err) } newMode, err := strconv.ParseInt(string(matches[2]), 8, 32) if err != nil { return fmt.Errorf("parse new mode: %w", err) } oldOID := string(matches[3]) newOID := string(matches[4]) status := matches[5][0] fromPath := unescape(unquoteBytes(matches[6])) var toPath []byte if status == 'C' || status == 'R' { if len(matches) < 8 || len(matches[7]) == 0 { return fmt.Errorf("raw line missing target path for status %c", status) } toPath = unescape(unquoteBytes(matches[7])) } else { toPath = fromPath } // Build From file info if oldOID != hashFormat.ZeroOID() { patch.From = &diferenco.File{ Name: string(fromPath), Hash: oldOID, Mode: uint32(oldMode), } } // Build To file info if newOID != hashFormat.ZeroOID() { patch.To = &diferenco.File{ Name: string(toPath), Hash: newOID, Mode: uint32(newMode), } } patch.Status = status return nil } func readNextDiff(reader *bufio.Reader, patch *Patch, skipPatch bool) error { var patchLines []string for currentPatchDone := false; !currentPatchDone || reader.Buffered() > 0; { line, err := reader.Peek(10) if errors.Is(err, io.EOF) { currentPatchDone = true } else if err != nil { return fmt.Errorf("peek diff line: %w", err) } switch { case bytes.HasPrefix(line, []byte("diff --git")): // Parse hunks before returning if !skipPatch && len(patchLines) > 0 { hunks, err := parseHunks(patchLines) if err != nil { return err } patch.Hunks = hunks } return nil case bytes.HasPrefix(line, []byte("---")) || bytes.HasPrefix(line, []byte("+++")): if len(patchLines) == 0 { if err := discardLine(reader); err != nil { return err } continue } case bytes.HasPrefix(line, []byte("@@")): if err := consumeChunkLine(reader, patch, skipPatch, false, &patchLines); err != nil { return err } case bytes.HasPrefix(line, []byte("Binary")): patch.Binary = true patch.IsBinary = true fallthrough case bytes.HasPrefix(line, []byte("-")) || bytes.HasPrefix(line, []byte("+")) || bytes.HasPrefix(line, []byte(" ")) || bytes.HasPrefix(line, []byte("\\")) || bytes.HasPrefix(line, []byte("~\n")): if err := consumeChunkLine(reader, patch, skipPatch, true, &patchLines); err != nil { return err } default: if err := discardLine(reader); err != nil { return err } } } // Parse hunks for the last patch if !skipPatch && len(patchLines) > 0 { hunks, err := parseHunks(patchLines) if err != nil { return err } patch.Hunks = hunks } return nil } func consumeChunkLine(reader *bufio.Reader, patch *Patch, skipPatch, updateStats bool, patchLines *[]string) error { var byteCount int for done := false; !done; { line, err := reader.ReadSlice('\n') if updateStats && byteCount == 0 && len(line) > 0 { switch line[0] { case '+': patch.LinesAdded++ case '-': patch.LinesRemoved++ } } byteCount += len(line) switch { case errors.Is(err, bufio.ErrBufferFull): // long line: keep reading case err != nil && !errors.Is(err, io.EOF): return fmt.Errorf("read chunk line: %w", err) default: done = true } if !skipPatch { *patchLines = append(*patchLines, string(line)) } } if updateStats { patch.byteCount += byteCount patch.lineCount++ } return nil } func discardLine(reader *bufio.Reader) error { _, err := reader.ReadBytes('\n') if err != nil && !errors.Is(err, io.EOF) { return fmt.Errorf("read line: %w", err) } return nil } func (limit *Limits) enforceUpperBound() { limit.MaxFiles = min(limit.MaxFiles, maxFilesUpperBound) limit.MaxLines = min(limit.MaxLines, maxLinesUpperBound) limit.MaxBytes = min(limit.MaxBytes, maxBytesUpperBound) limit.SafeMaxFiles = min(limit.SafeMaxFiles, safeMaxFilesUpperBound) limit.SafeMaxLines = min(limit.SafeMaxLines, safeMaxLinesUpperBound) limit.SafeMaxBytes = min(limit.SafeMaxBytes, safeMaxBytesUpperBound) limit.MaxPatchBytes = min(limit.MaxPatchBytes, maxPatchBytesUpperBound) } func (parser *Parser) prunePatch() { // Only clear patch content, do NOT decrease scannedLines/scannedBytes // Cumulative limits track what was actually read, not what is kept parser.currentPatch.ClearPatch() } // exceeded returns true if current > limit and limit > 0. // A limit of 0 means "no limit", so it never triggers exceeded. func exceeded(current, limit int) bool { return limit > 0 && current > limit } func (parser *Parser) isOverSafeLimits() bool { return exceeded(parser.filesProcessed, parser.limits.SafeMaxFiles) || exceeded(parser.scannedLines, parser.limits.SafeMaxLines) || exceeded(parser.scannedBytes, parser.limits.SafeMaxBytes) } func (parser *Parser) maxPatchBytesForCurrentFile() int { if len(parser.limits.MaxPatchBytesForFileExtension) > 0 { var toPath string if parser.currentPatch.To != nil { toPath = parser.currentPatch.To.Name } else if parser.currentPatch.From != nil { toPath = parser.currentPatch.From.Name } if toPath != "" { fileName := filepath.Base(toPath) key := filepath.Ext(fileName) if key == "" { key = fileName } if limit, ok := parser.limits.MaxPatchBytesForFileExtension[key]; ok { return limit } } } return parser.limits.MaxPatchBytes } // unescape unescapes the escape codes used by 'git diff'. func unescape(s []byte) []byte { var unescaped []byte for i := 0; i < len(s); i++ { if s[i] == '\\' { if i+3 < len(s) && isOctalDigit(s[i+1]) && isOctalDigit(s[i+2]) && isOctalDigit(s[i+3]) { octalByte, err := strconv.ParseUint(string(s[i+1:i+4]), 8, 8) if err == nil { unescaped = append(unescaped, byte(octalByte)) i += 3 continue } } if i+1 < len(s) { var unescapedByte byte switch s[i+1] { case '"', '\\', '/', '\'': unescapedByte = s[i+1] case 'a': unescapedByte = '\a' case 'b': unescapedByte = '\b' case 'f': unescapedByte = '\f' case 'n': unescapedByte = '\n' case 'r': unescapedByte = '\r' case 't': unescapedByte = '\t' case 'v': unescapedByte = '\v' default: unescaped = append(unescaped, '\\') unescapedByte = s[i+1] } unescaped = append(unescaped, unescapedByte) i++ continue } } unescaped = append(unescaped, s[i]) } return unescaped } func isOctalDigit(b byte) bool { return b >= '0' && b <= '7' } // unquoteBytes removes surrounding quotes from a byte slice func unquoteBytes(s []byte) []byte { if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { s = s[1 : len(s)-1] } return s } // parseHunks parses collected patch lines into diferenco.Hunk structures. func parseHunks(lines []string) ([]*diferenco.Hunk, error) { if len(lines) == 0 { return nil, nil } var hunks []*diferenco.Hunk var currentHunk *diferenco.Hunk for _, line := range lines { if strings.HasPrefix(line, "@@") { if currentHunk != nil { hunks = append(hunks, currentHunk) } fromLine, fromCount, toLine, toCount, section, err := parseHunkHeader(line) if err != nil { return nil, err } currentHunk = &diferenco.Hunk{ FromLine: fromLine, ToLine: toLine, Section: section, } _ = fromCount // Reserved for future validation _ = toCount // Reserved for future validation continue } if currentHunk == nil || len(line) == 0 { continue } // Skip "\ No newline at end of file" marker - it's metadata, not content if strings.HasPrefix(line, "\\ No newline at end of file") { continue } var kind diferenco.Operation switch line[0] { case '+': kind = diferenco.Insert case '-': kind = diferenco.Delete case ' ': kind = diferenco.Equal default: continue } currentHunk.Lines = append(currentHunk.Lines, diferenco.Line{ Kind: kind, Content: line[1:], }) } if currentHunk != nil { hunks = append(hunks, currentHunk) } return hunks, nil } // parseHunkHeader parses a hunk header line. // Format: @@ -start,count +start,count @@ section func parseHunkHeader(header string) (fromLine, fromCount, toLine, toCount int, section string, err error) { if !strings.HasPrefix(header, "@@ ") { return 0, 0, 0, 0, "", fmt.Errorf("malformed hunk header: %q", header) } rest := strings.TrimPrefix(header, "@@ ") before, after, ok := strings.Cut(rest, " @@") if !ok { return 0, 0, 0, 0, "", fmt.Errorf("malformed hunk header: %q", header) } body := before remain := after // skip " @@" if len(remain) > 0 && remain[0] == ' ' { section = strings.TrimRight(remain[1:], "\r\n") } fields := strings.Fields(body) if len(fields) != 2 { return 0, 0, 0, 0, "", fmt.Errorf("malformed hunk header: %q", header) } if !strings.HasPrefix(fields[0], "-") || !strings.HasPrefix(fields[1], "+") { return 0, 0, 0, 0, "", fmt.Errorf("malformed hunk header: %q", header) } fromLine, fromCount, err = parseRange(fields[0], '-') if err != nil { return 0, 0, 0, 0, "", fmt.Errorf("malformed hunk header: %q", header) } toLine, toCount, err = parseRange(fields[1], '+') if err != nil { return 0, 0, 0, 0, "", fmt.Errorf("malformed hunk header: %q", header) } return fromLine, fromCount, toLine, toCount, section, nil } // parseRange parses a line range specification. // Format: -start,count or +start,count or -start or +start func parseRange(s string, prefix byte) (start, count int, err error) { if len(s) < 2 || s[0] != prefix { return 0, 0, fmt.Errorf("invalid range: %q", s) } s = s[1:] before, after, ok := strings.Cut(s, ",") if !ok { start, err = strconv.Atoi(s) if err != nil { return 0, 0, err } return start, 1, nil } start, err = strconv.Atoi(before) if err != nil { return 0, 0, err } count, err = strconv.Atoi(after) if err != nil { return 0, 0, err } if count < 0 { return 0, 0, fmt.Errorf("invalid count: %d", count) } return start, count, nil } ================================================ FILE: cmd/hot/pkg/diff/parser_test.go ================================================ package diff import ( "bytes" "fmt" "strings" "testing" "github.com/antgroup/hugescm/modules/git" ) // rawLine generates a raw diff line with proper tab character func rawLine(oldMode, newMode int, oldOID, newOID, status, path string) string { return fmt.Sprintf(":%06o %06o %s %s %s\t%s\n", oldMode, newMode, oldOID, newOID, status, path) } // rawLineRename generates a raw diff line for rename/copy with from and to paths func rawLineRename(oldMode, newMode int, oldOID, newOID, status, fromPath, toPath string) string { return fmt.Sprintf(":%06o %06o %s %s %s\t%s\t%s\n", oldMode, newMode, oldOID, newOID, status, fromPath, toPath) } var sha1ZeroOID = "0000000000000000000000000000000000000000" func TestParserBasic(t *testing.T) { input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "main.go") + "diff --git a/main.go b/main.go\n" + "index abcdef12..12345678 100644\n" + "--- a/main.go\n" + "+++ b/main.go\n" + "@@ -1,3 +1,4 @@\n" + " package main\n" + "+import \"fmt\"\n" + " func main() {}\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) count := 0 for parser.Parse() { patch := parser.Patch() count++ t.Logf("Patch %d: status=%c, from=%v, to=%v, hunks=%d, binary=%v", count, patch.Status, patch.From, patch.To, len(patch.Hunks), patch.Binary) } if err := parser.Err(); err != nil { t.Fatalf("parser error: %v", err) } if count != 1 { t.Errorf("expected 1 patch, got %d", count) } } func TestParserModify(t *testing.T) { input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "main.go") + "diff --git a/main.go b/main.go\n" + "--- a/main.go\n" + "+++ b/main.go\n" + "@@ -1,3 +1,4 @@\n" + " package main\n" + "+import \"fmt\"\n" + " func main() {}\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.Status != 'M' { t.Errorf("expected status M, got %c", patch.Status) } if patch.From == nil || patch.From.Name != "main.go" { t.Errorf("expected from file 'main.go', got %v", patch.From) } if patch.To == nil || patch.To.Name != "main.go" { t.Errorf("expected to file 'main.go', got %v", patch.To) } if len(patch.Hunks) == 0 { t.Error("expected hunks to be parsed") } } func TestParserAdd(t *testing.T) { input := rawLine(0, 0100644, sha1ZeroOID, "1234567890abcdef1234567890abcdef12345678", "A", "new.go") + "diff --git a/new.go b/new.go\n" + "--- /dev/null\n" + "+++ b/new.go\n" + "@@ -0,0 +1,2 @@\n" + "+package main\n" + "+func newFunc() {}\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.Status != 'A' { t.Errorf("expected status A, got %c", patch.Status) } if patch.From != nil { t.Errorf("expected from file to be nil for new file, got %v", patch.From) } if patch.To == nil || patch.To.Name != "new.go" { t.Errorf("expected to file 'new.go', got %v", patch.To) } } func TestParserDelete(t *testing.T) { input := rawLine(0100644, 0, "abcdef1234567890abcdef1234567890abcdef12", sha1ZeroOID, "D", "old.go") + "diff --git a/old.go b/old.go\n" + "--- a/old.go\n" + "+++ /dev/null\n" + "@@ -1,2 +0,0 @@\n" + "-package main\n" + "-func oldFunc() {}\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.Status != 'D' { t.Errorf("expected status D, got %c", patch.Status) } if patch.From == nil || patch.From.Name != "old.go" { t.Errorf("expected from file 'old.go', got %v", patch.From) } if patch.To != nil { t.Errorf("expected to file to be nil for deleted file, got %v", patch.To) } } func TestParserBinary(t *testing.T) { input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "image.png") + "diff --git a/image.png b/image.png\n" + "Binary files a/image.png and b/image.png differ\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if !patch.Binary { t.Error("expected binary flag to be true") } if !patch.IsBinary { t.Error("expected IsBinary to be true") } } func TestParserLimits(t *testing.T) { input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "file1.go") + "diff --git a/file1.go b/file1.go\n" + "--- a/file1.go\n" + "+++ b/file1.go\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" + rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "file2.go") + "diff --git a/file2.go b/file2.go\n" + "--- a/file2.go\n" + "+++ b/file2.go\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" limits := Limits{ EnforceLimits: true, MaxFiles: 1, } parser := NewParser(git.HashSHA1, strings.NewReader(input), limits) count := 0 for parser.Parse() { count++ } if count > 2 { t.Errorf("expected at most 2 patches with limit, got %d", count) } } func TestParseHunks(t *testing.T) { lines := []string{ "--- a/main.go\n", "+++ b/main.go\n", "@@ -1,5 +1,6 @@\n", " package main\n", "\n", "+import \"fmt\"\n", " func main() {\n", "- println(\"hello\")\n", "+ fmt.Println(\"hello world\")\n", " }\n", } hunks, err := parseHunks(lines) if err != nil { t.Fatalf("parseHunks error: %v", err) } if len(hunks) != 1 { t.Fatalf("expected 1 hunk, got %d", len(hunks)) } hunk := hunks[0] if hunk.FromLine != 1 { t.Errorf("expected FromLine=1, got %d", hunk.FromLine) } if hunk.ToLine != 1 { t.Errorf("expected ToLine=1, got %d", hunk.ToLine) } if len(hunk.Lines) == 0 { t.Fatal("expected hunk to have lines") } var added, removed int for _, line := range hunk.Lines { switch line.Kind { case 1: // Insert added++ case -1: // Delete removed++ } } if added != 2 { t.Errorf("expected 2 added lines, got %d", added) } if removed != 1 { t.Errorf("expected 1 removed line, got %d", removed) } } func TestParseHunksWithSection(t *testing.T) { lines := []string{ "@@ -1,3 +1,4 @@ function main() {\n", " package main\n", "+import \"fmt\"\n", " func main() {}\n", } hunks, err := parseHunks(lines) if err != nil { t.Fatalf("parseHunks error: %v", err) } if len(hunks) != 1 { t.Fatalf("expected 1 hunk, got %d", len(hunks)) } hunk := hunks[0] if hunk.Section != "function main() {" { t.Errorf("expected Section='function main() {', got %q", hunk.Section) } if hunk.FromLine != 1 { t.Errorf("expected FromLine=1, got %d", hunk.FromLine) } if hunk.ToLine != 1 { t.Errorf("expected ToLine=1, got %d", hunk.ToLine) } } func TestParseHunksWithEmptySection(t *testing.T) { lines := []string{ "@@ -1,3 +1,4 @@\n", " package main\n", "+import \"fmt\"\n", " func main() {}\n", } hunks, err := parseHunks(lines) if err != nil { t.Fatalf("parseHunks error: %v", err) } if len(hunks) != 1 { t.Fatalf("expected 1 hunk, got %d", len(hunks)) } hunk := hunks[0] if hunk.Section != "" { t.Errorf("expected empty Section, got %q", hunk.Section) } } func TestUnescape(t *testing.T) { tests := []struct { input string expect string }{ {"simple.txt", "simple.txt"}, {"file\\040with\\040spaces.txt", "file with spaces.txt"}, {"file\\twith\\ttabs.txt", "file\twith\ttabs.txt"}, {"file\\nwith\\nnewline.txt", "file\nwith\nnewline.txt"}, {"file\\\"quotes\\\".txt", "file\"quotes\".txt"}, {"file\\\\backslash.txt", "file\\backslash.txt"}, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { result := unescape([]byte(tt.input)) if string(result) != tt.expect { t.Errorf("unescape(%q) = %q, want %q", tt.input, result, tt.expect) } }) } } func TestLimitsEnforceUpperBound(t *testing.T) { limits := Limits{ MaxFiles: 10000, MaxLines: 500000, MaxBytes: 100 * 1024 * 1024, SafeMaxFiles: 1000, SafeMaxLines: 50000, SafeMaxBytes: 10 * 1024 * 1024, MaxPatchBytes: 1024 * 1024, } limits.enforceUpperBound() if limits.MaxFiles > maxFilesUpperBound { t.Errorf("MaxFiles should be <= %d, got %d", maxFilesUpperBound, limits.MaxFiles) } } // TestParserConsecutiveEmptyPatches tests consecutive files with mode changes only (no diff content) func TestParserConsecutiveEmptyPatches(t *testing.T) { // Two files with only mode changes - no actual diff content input := rawLine(0100644, 0100755, "abcdef1234567890abcdef1234567890abcdef12", "abcdef1234567890abcdef1234567890abcdef12", "M", "script.sh") + rawLine(0100644, 0100755, "1234567890abcdef1234567890abcdef12345678", "1234567890abcdef1234567890abcdef12345678", "M", "tool.sh") parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) count := 0 for parser.Parse() { patch := parser.Patch() count++ path := "" if patch.From != nil { path = patch.From.Name } else if patch.To != nil { path = patch.To.Name } t.Logf("Patch %d: status=%c, path=%s, binary=%v, hunks=%d", count, patch.Status, path, patch.Binary, len(patch.Hunks)) // Mode-only changes should have no hunks if len(patch.Hunks) > 0 { t.Errorf("patch %d: expected no hunks for mode-only change, got %d", count, len(patch.Hunks)) } } if err := parser.Err(); err != nil { t.Fatalf("parser error: %v", err) } if count != 2 { t.Errorf("expected 2 patches for mode-only changes, got %d", count) } } // TestParserQuotedPaths tests handling of paths with special characters func TestParserQuotedPaths(t *testing.T) { // Paths with spaces and special characters are quoted in git diff input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "file with spaces.go") + "diff --git \"a/file with spaces.go\" \"b/file with spaces.go\"\n" + "index abcdef12..12345678 100644\n" + "--- \"a/file with spaces.go\"\n" + "+++ \"b/file with spaces.go\"\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.Status != 'M' { t.Errorf("expected status M, got %c", patch.Status) } // Path should be correctly extracted (unquoted) if patch.From == nil || patch.From.Name != "file with spaces.go" { t.Errorf("expected from file 'file with spaces.go', got %v", patch.From) } if patch.To == nil || patch.To.Name != "file with spaces.go" { t.Errorf("expected to file 'file with spaces.go', got %v", patch.To) } t.Logf("Quoted path parsed: %s", patch.To.Name) } // TestParserQuotedPathsWithEscapes tests handling of quoted paths with escape sequences func TestParserQuotedPathsWithEscapes(t *testing.T) { tests := []struct { name string input string fromPath string toPath string }{ { name: "quoted path with spaces", input: rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "foo bar.go") + "diff --git \"a/foo bar.go\" \"b/foo bar.go\"\n" + "index abcdef12..12345678 100644\n" + "--- \"a/foo bar.go\"\n" + "+++ \"b/foo bar.go\"\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n", fromPath: "foo bar.go", toPath: "foo bar.go", }, { name: "quoted path with octal escape", input: rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "foo bar.go") + "diff --git \"a/foo\\040bar.go\" \"b/foo\\040bar.go\"\n" + "index abcdef12..12345678 100644\n" + "--- \"a/foo\\040bar.go\"\n" + "+++ \"b/foo\\040bar.go\"\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n", fromPath: "foo bar.go", toPath: "foo bar.go", }, { name: "quoted path with escaped quote", input: rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "foo\"bar.go") + "diff --git \"a/foo\\\"bar.go\" \"b/foo\\\"bar.go\"\n" + "index abcdef12..12345678 100644\n" + "--- \"a/foo\\\"bar.go\"\n" + "+++ \"b/foo\\\"bar.go\"\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n", fromPath: "foo\"bar.go", toPath: "foo\"bar.go", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { parser := NewParser(git.HashSHA1, strings.NewReader(tt.input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.From == nil || patch.From.Name != tt.fromPath { t.Errorf("expected from file %q, got %v", tt.fromPath, patch.From) } if patch.To == nil || patch.To.Name != tt.toPath { t.Errorf("expected to file %q, got %v", tt.toPath, patch.To) } t.Logf("Parsed quoted path with escapes: %s -> %s", patch.From.Name, patch.To.Name) }) } } // TestParserRenameWithPatch tests rename operations with content changes func TestParserRenameWithPatch(t *testing.T) { // Rename with content modification - R100 means 100% similarity input := rawLineRename(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "R100", "old.go", "new.go") + "diff --git a/old.go b/new.go\n" + "similarity index 100%\n" + "rename from old.go\n" + "rename to new.go\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.Status != 'R' { t.Errorf("expected status R, got %c", patch.Status) } if patch.From == nil || patch.From.Name != "old.go" { t.Errorf("expected from file 'old.go', got %v", patch.From) } if patch.To == nil || patch.To.Name != "new.go" { t.Errorf("expected to file 'new.go', got %v", patch.To) } t.Logf("Rename: %s -> %s, similarity=100%%", patch.From.Name, patch.To.Name) } // TestParserCopyWithPatch tests copy operations func TestParserCopyWithPatch(t *testing.T) { input := rawLineRename(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "C100", "original.go", "copy.go") + "diff --git a/original.go b/copy.go\n" + "similarity index 100%\n" + "copy from original.go\n" + "copy to copy.go\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if patch.Status != 'C' { t.Errorf("expected status C, got %c", patch.Status) } if patch.From == nil || patch.From.Name != "original.go" { t.Errorf("expected from file 'original.go', got %v", patch.From) } if patch.To == nil || patch.To.Name != "copy.go" { t.Errorf("expected to file 'copy.go', got %v", patch.To) } t.Logf("Copy: %s -> %s, similarity=100%%", patch.From.Name, patch.To.Name) } // TestParserNoNewlineAtEOF tests handling of files without newline at end func TestParserNoNewlineAtEOF(t *testing.T) { input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "file.go") + "diff --git a/file.go b/file.go\n" + "--- a/file.go\n" + "+++ b/file.go\n" + "@@ -1,2 +1,2 @@\n" + " line1\n" + "-line2\n" + "\\ No newline at end of file\n" + "+line2new\n" + "\\ No newline at end of file\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { if err := parser.Err(); err != nil { t.Fatalf("expected to parse one patch, error: %v", err) } t.Fatal("expected to parse one patch") } patch := parser.Patch() if len(patch.Hunks) == 0 { t.Fatal("expected hunks to be parsed") } // The "No newline at end of file" marker should not create extra lines hunk := patch.Hunks[0] var deleteCount, insertCount int for _, line := range hunk.Lines { if line.Kind == -1 { deleteCount++ continue } if line.Kind == 1 { insertCount++ } } if deleteCount != 1 { t.Errorf("expected 1 deleted line, got %d", deleteCount) } if insertCount != 1 { t.Errorf("expected 1 inserted line, got %d", insertCount) } t.Logf("No-newline-at-EOF handled correctly: %d deletes, %d inserts", deleteCount, insertCount) } // TestParserTypeChange tests type-change (file to symlink, etc.) handling func TestParserTypeChange(t *testing.T) { // Type change: regular file to symlink input := rawLine(0100644, 0120755, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "T", "link") + "diff --git a/link b/link\n" + "deleted file mode 100644\n" + "index abcdef12..12345678\n" + "--- a/link\n" + "+++ /dev/null\n" + "@@ -1 +0,0 @@\n" + "-content\n" + "diff --git a/link b/link\n" + "new file mode 120755\n" + "index 00000000..12345678\n" + "--- /dev/null\n" + "+++ b/link\n" + "@@ -0,0 +1 @@\n" + "+content\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) patches := make([]*Patch, 0) for parser.Parse() { patches = append(patches, parser.Patch()) } if err := parser.Err(); err != nil { t.Fatalf("parser error: %v", err) } // Type change may produce multiple patches t.Logf("Type-change produced %d patches", len(patches)) for i, p := range patches { t.Logf(" Patch %d: status=%c", i+1, p.Status) } } // TestParserEnforceLimitsZeroMeansUnlimited verifies that zero-value limits mean "no limit" func TestParserEnforceLimitsZeroMeansUnlimited(t *testing.T) { input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "main.go") + "diff --git a/main.go b/main.go\n" + "--- a/main.go\n" + "+++ b/main.go\n" + "@@ -1 +1 @@\n" + "-old\n" + "+new\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{ EnforceLimits: true, // all max values left as zero - should mean "no limit" }) if !parser.Parse() { t.Fatalf("expected first patch to parse, err=%v", parser.Err()) } patch := parser.Patch() if patch.OverflowMarker { t.Fatalf("did not expect overflow marker with zero-value limits") } if patch.From == nil || patch.From.Name != "main.go" { t.Errorf("expected from file 'main.go', got %v", patch.From) } } // TestParserPatchObjectIsReused verifies that Patch() returns a reused object // This test documents the API behavior that callers should not retain the pointer func TestParserPatchObjectIsReused(t *testing.T) { // Note: raw lines must come BEFORE all patch content in git diff --raw --patch output input := rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "file1.go") + rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", "file2.go") + "diff --git a/file1.go b/file1.go\n" + "--- a/file1.go\n" + "+++ b/file1.go\n" + "@@ -1 +1 @@\n-old\n+new\n" + "diff --git a/file2.go b/file2.go\n" + "--- a/file2.go\n" + "+++ b/file2.go\n" + "@@ -1 +1 @@\n-old\n+new\n" parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) if !parser.Parse() { t.Fatalf("first parse failed: %v", parser.Err()) } first := parser.Patch() if first.From == nil || first.From.Name != "file1.go" { t.Fatalf("expected first patch to be file1.go, got %+v", first.From) } if !parser.Parse() { t.Fatalf("second parse failed: %v", parser.Err()) } // first has now been overwritten because Patch() is reused if first.From == nil || first.From.Name != "file2.go" { t.Fatalf("expected reused patch object to now point to file2.go, got %+v", first.From) } } func BenchmarkParser(b *testing.B) { var buf bytes.Buffer for i := range 100 { buf.WriteString(rawLine(0100644, 0100644, "abcdef1234567890abcdef1234567890abcdef12", "1234567890abcdef1234567890abcdef12345678", "M", fmt.Sprintf("file%d.go", i%10))) fmt.Fprintf(&buf, "diff --git a/file%d.go b/file%d.go\n", i%10, i%10) buf.WriteString("--- a/file.go\n+++ b/file.go\n@@ -1,3 +1,4 @@\n package main\n func main() {\n+println(\"test\")\n }\n") } input := buf.String() for b.Loop() { parser := NewParser(git.HashSHA1, strings.NewReader(input), Limits{}) for parser.Parse() { } } } ================================================ FILE: cmd/hot/pkg/hud/bar.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package hud import ( "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/progressbar" ) type ProgressBar struct { bar *progressbar.ProgressBar total int stepCurrent int stepEnd int } func NewBar(description string, total int, stepCurrent, stepEnd int, verbose bool) *ProgressBar { if verbose { return &ProgressBar{} } bar := progressbar.NewOptions(total, progressbar.OptionEnableColorCodes(true), progressbar.OptionSetDescription(fmt.Sprintf("\x1b[38;2;72;198;239m[%d/%d]\x1b[0m %s...", stepCurrent, stepEnd, description)), progressbar.OptionFullWidth(), progressbar.OptionSetTheme(progressbar.Theme{ Saucer: "\x1b[38;2;72;198;239m#\x1b[0m", SaucerHead: "\x1b[38;2;72;198;239m>\x1b[0m", SaucerPadding: " ", BarStart: "[", BarEnd: "]", })) return &ProgressBar{bar: bar, total: total, stepCurrent: stepCurrent, stepEnd: stepEnd} } func (b *ProgressBar) Add(n int) { if b.bar != nil { _ = b.bar.Add(n) } } func (b *ProgressBar) Done() { if b.bar == nil { return } _ = b.bar.Finish() if b.total <= 0 { fmt.Fprintf(os.Stderr, "\n\x1b[38;2;72;198;239m[%d/%d]\x1b[0m %s.\n", b.stepCurrent, b.stepEnd, tr.W("processing completed")) return } fmt.Fprintf(os.Stderr, "\n\x1b[38;2;72;198;239m[%d/%d]\x1b[0m %s, %s: %d\n", b.stepCurrent, b.stepEnd, tr.W("processing completed"), tr.W("total"), b.total) } ================================================ FILE: cmd/hot/pkg/hud/display.go ================================================ package hud import ( "fmt" "io" "slices" "strings" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/term" ) func typePadding(e *git.TreeEntry, padding int) string { t := e.Type() if padding > len(t) { return t + strings.Repeat(" ", padding-len(t)) } return t } func encodeEntry(w io.Writer, e *git.TreeEntry, t string, v term.Level) error { switch e.Filemode { case git.Symlink: if _, err := fmt.Fprintf(w, "%s %s %s %s\n", e.Filemode, v.Purple(t), e.Hash, v.Purple(e.Name)); err != nil { return err } case git.Executable: if _, err := fmt.Fprintf(w, "%s %s %s %s\n", e.Filemode, v.Red(t), e.Hash, v.Red(e.Name)); err != nil { return err } case git.Regular: if _, err := fmt.Fprintf(w, "%s %s %s %s\n", e.Filemode, t, e.Hash, e.Name); err != nil { return err } case git.Dir: if _, err := fmt.Fprintf(w, "%s %s %s %s\n", e.Filemode, v.Blue(t), e.Hash, v.Blue(e.Name)); err != nil { return err } case git.Submodule: if _, err := fmt.Fprintf(w, "%s %s %s %s\n", e.Filemode, v.Yellow(t), e.Hash, v.Yellow(e.Name)); err != nil { return err } default: if _, err := fmt.Fprintf(w, "%s %s %s %s\n", e.Filemode, t, e.Hash, e.Name); err != nil { return err } } return nil } const ( commitTypeName = "commit" ) func encodeTree(w io.Writer, t *git.Tree, v term.Level) error { p := 0 if v != term.LevelNone && slices.IndexFunc(t.Entries, func(e *git.TreeEntry) bool { return e.Filemode == git.Submodule }) != -1 { p = len(commitTypeName) // commit } for _, e := range t.Entries { if err := encodeEntry(w, e, typePadding(e, p), v); err != nil { return err } } return nil } func encodeTag(w io.Writer, t *git.Tag, v term.Level) error { headers := []string{ fmt.Sprintf("%s %s", v.Blue("object"), v.Green(t.Object)), fmt.Sprintf("%s %s", v.Blue("type"), v.Green(t.Type)), fmt.Sprintf("%s %s", v.Blue("tag"), v.Green(t.Name)), fmt.Sprintf("%s %s", v.Blue("tagger"), v.Green(t.Tagger.String())), } _, err := fmt.Fprintf(w, "%s\n\n%s", strings.Join(headers, "\n"), t.Content) return err } func encodeCommit(w io.Writer, c *git.Commit, v term.Level) (err error) { if _, err = fmt.Fprintf(w, "%s %s\n", v.Blue("tree"), v.Green(c.Tree)); err != nil { return err } for _, parent := range c.Parents { if _, err = fmt.Fprintf(w, "%s %s\n", v.Blue("parent"), v.Green(parent)); err != nil { return err } } if _, err = fmt.Fprintf(w, "%s %s\n%s %s\n", v.Blue("author"), v.Green(c.Author.String()), v.Blue("committer"), v.Green(c.Committer.String())); err != nil { return err } for _, hdr := range c.ExtraHeaders { if _, err = fmt.Fprintf(w, "%s %s\n", v.Blue(hdr.K), strings.ReplaceAll(hdr.V, "\n", "\n ")); err != nil { return err } } // c.Message is built from messageParts in the Decode() function. // // Since each entry in messageParts _does not_ contain its trailing LF, // append an empty string to capture the final newline. if _, err = fmt.Fprintf(w, "\n%s", c.Message); err != nil { return err } return nil } func Display(w io.Writer, a any, v term.Level) error { switch o := a.(type) { case *git.Commit: return encodeCommit(w, o, v) case *git.Tag: return encodeTag(w, o, v) case *git.Tree: return encodeTree(w, o, v) } _, err := fmt.Fprintln(w, a) return err } ================================================ FILE: cmd/hot/pkg/mc/migrate.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package mc import ( "bufio" "context" "encoding/hex" "errors" "fmt" "os" "path/filepath" "strconv" "strings" "sync" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/trace" ) type Migrator struct { from string to string // mu guards entries and commits (see below) mu *sync.Mutex // objects is a mapping of old objects SHAs (SHA1) to new ones (SHA256), where the ASCII // hex encoding of the SHA1 values are used as map keys. objects map[string][]byte odb *git.ODB newODB *git.ODB worktree string stepEnd int stepCurrent int verbose bool } func (m *Migrator) uncache(from []byte) ([]byte, bool) { m.mu.Lock() c, ok := m.objects[hex.EncodeToString(from)] m.mu.Unlock() return c, ok } func (m *Migrator) cache(from, to []byte) { m.mu.Lock() m.objects[hex.EncodeToString(from)] = to m.mu.Unlock() } type MigrateOptions struct { From string To string Format string Bare bool Verbose bool StepEnd int } func NewMigrator(ctx context.Context, opts *MigrateOptions) (*Migrator, error) { fromPath := git.RevParseRepoPath(ctx, opts.From) current, err := git.RevParseCurrentName(ctx, nil, opts.From) if err != nil { return nil, err } oldFormat := git.HashFormatOK(fromPath) newFormat := git.HashFormatFromName(opts.Format) if oldFormat == newFormat { return nil, fmt.Errorf("source repository object format is already: %s", opts.Format) } odb, err := git.NewODB(fromPath, oldFormat) if err != nil { return nil, err } if err := git.NewRepo(ctx, opts.To, current, opts.Bare, newFormat); err != nil { _ = odb.Close() return nil, err } toPath := git.RevParseRepoPath(ctx, opts.To) newODB, err := git.NewODB(toPath, newFormat) if err != nil { _ = odb.Close() return nil, err } r := &Migrator{ from: fromPath, to: toPath, mu: new(sync.Mutex), objects: make(map[string][]byte), odb: odb, newODB: newODB, stepEnd: opts.StepEnd, stepCurrent: 1, verbose: opts.Verbose, } if !opts.Bare { r.worktree = opts.To } return r, nil } func (m *Migrator) Close() error { if m.newODB != nil { _ = m.newODB.Close() } if m.odb != nil { _ = m.odb.Close() } return nil } // getAllCommits: Return all branch/tags commit reverse order func (m *Migrator) getAllCommits(ctx context.Context) ([][]byte, error) { // --topo-order is required to ensure topological order. reader, err := git.NewReader(ctx, &command.RunOpts{RepoPath: m.from}, "rev-list", "--reverse", "--topo-order", "--all") if err != nil { return nil, err } defer reader.Close() // nolint sr := bufio.NewScanner(reader) var commits [][]byte for sr.Scan() { oid, err := hex.DecodeString(strings.TrimSpace(sr.Text())) if err != nil { continue } commits = append(commits, oid) } return commits, nil } func (m *Migrator) hashObject(oid []byte) ([]byte, error) { br, err := m.odb.Blob(oid) if err != nil { return nil, err } defer br.Close() // nolint return m.newODB.WriteBlob(&gitobj.Blob{ Size: br.Size, Contents: br.Contents, }) } func countObjects(ctx context.Context, repoPath string) int { reader, err := git.NewReader(ctx, &command.RunOpts{RepoPath: repoPath}, "count-objects", "-v") if err != nil { return -1 } defer reader.Close() // nolint nums := make(map[string]int) br := bufio.NewScanner(reader) for br.Scan() { k, v, ok := strings.Cut(br.Text(), ":") if !ok { continue } n, err := strconv.Atoi(strings.TrimSpace(v)) if err != nil { return -1 } nums[k] = n } if total := nums["count"] + nums["in-pack"]; total != 0 { return total } return -1 } func (m *Migrator) hashObjects(ctx context.Context) error { if !git.IsGitVersionAtLeast(git.NewVersion(2, 35, 0)) { return errors.New("require Git 2.35.0 or later") } args := []string{"cat-file", "--batch-check", "--batch-all-objects"} if git.IsGitVersionAtLeast(git.NewVersion(2, 42, 0)) { args = append(args, "--unordered") } reader, err := git.NewReader(ctx, &command.RunOpts{RepoPath: m.from}, args...) if err != nil { return fmt.Errorf("start git cat-file error %w", err) } defer reader.Close() // nolint br := bufio.NewScanner(reader) objectsCount := countObjects(ctx, m.from) b := hud.NewBar(tr.W("fast rewrite objects"), objectsCount, m.stepCurrent, m.stepEnd, m.verbose) m.stepCurrent++ // format: 1a1db8dba9f976364fb6dab3e29deaf0f1140ed8 blob 5155 for br.Scan() { line := br.Text() sv := strings.Fields(line) if len(sv) < 3 { b.Add(1) continue } if sv[1] != "blob" { b.Add(1) continue } oid, err := hex.DecodeString(sv[0]) if err != nil { return fmt.Errorf("git cat-file decode hex error %w", err) } newOID, err := m.hashObject(oid) if err != nil { return fmt.Errorf("convert blob from sha1 to sha256 error %w", err) } m.cache(oid, newOID) b.Add(1) } b.Done() return nil } func (m *Migrator) rewriteTree(commitOID []byte, treeOID []byte) ([]byte, error) { tree, err := m.odb.Tree(treeOID) if err != nil { return nil, err } var oid []byte var ok bool entries := make([]*gitobj.TreeEntry, 0, len(tree.Entries)) for _, e := range tree.Entries { switch e.Type() { case gitobj.BlobObjectType: if oid, ok = m.uncache(e.Oid); !ok { if oid, err = m.hashObject(e.Oid); err != nil { return nil, fmt.Errorf("rewrite %s error: %w", hex.EncodeToString(e.Oid), err) } m.cache(e.Oid, oid) } entries = append(entries, &gitobj.TreeEntry{Name: e.Name, Oid: oid, Filemode: e.Filemode}) case gitobj.TreeObjectType: if oid, ok = m.uncache(e.Oid); !ok { if oid, err = m.rewriteTree(commitOID, e.Oid); err != nil { return nil, fmt.Errorf("rewrite %s error: %w", hex.EncodeToString(e.Oid), err) } m.cache(e.Oid, oid) } entries = append(entries, &gitobj.TreeEntry{Name: e.Name, Oid: oid, Filemode: e.Filemode}) default: // FIXME: git currently does not support managing sha1 submodules in sha256 repositories // if e.Type() == gitobj.CommitObjectType { // newOID := make([]byte, len(e.Oid)) // copy(newOID, e.Oid) // entries = append(entries, &gitobj.TreeEntry{Name: e.Name, Oid: newOID, Filemode: e.Filemode}) // continue // } fmt.Fprintf(os.Stderr, "\nTreeEntry type '%s' not supported for migration\n", e.Type()) } } return m.newODB.WriteTree(&gitobj.Tree{Entries: entries}) } func (m *Migrator) rewriteCommits(ctx context.Context) error { commits, err := m.getAllCommits(ctx) if err != nil { return fmt.Errorf("commits to migrate error: %w", err) } b := hud.NewBar(tr.W("rewrite commits"), len(commits), m.stepCurrent, m.stepEnd, m.verbose) m.stepCurrent++ trace.DbgPrint("commits: %v", len(commits)) for _, oid := range commits { oc, err := m.odb.Commit(oid) if err != nil { return err } var newTree []byte var ok bool if newTree, ok = m.uncache(oc.TreeID); !ok { if newTree, err = m.rewriteTree(oid, oc.TreeID); err != nil { return err } m.cache(oc.TreeID, newTree) } // Create a new list of parents from the original commit to // point at the rewritten parents in order to create a // topologically equivalent DAG. // // This operation is safe since we are visiting the commits in // reverse topological order and therefore have seen all parents // before children (in other words, r.uncacheCommit(...) will // always return a value, if the prospective parent is a part of // the migration). rewrittenParents := make([][]byte, 0, len(oc.ParentIDs)) for _, sha1Parent := range oc.ParentIDs { rewrittenParent, ok := m.uncache(sha1Parent) if !ok { // If we haven't seen the parent before, this // means that we're doing a partial migration // and the parent that we're looking for isn't // included. // // Use the original parent to properly link // history across the migration boundary. continue } rewrittenParents = append(rewrittenParents, rewrittenParent) } // Construct a new commit using the original header information, // but the rewritten set of parents as well as root tree. rewrittenCommit := &gitobj.Commit{ Author: oc.Author, Committer: oc.Committer, ExtraHeaders: oc.ExtraHeaders, Message: oc.Message, ParentIDs: rewrittenParents, TreeID: newTree, } var newSha []byte if newSha, err = m.newODB.WriteCommit(rewrittenCommit); err != nil { return err } // Cache that commit so that we can reassign children of this // commit. m.cache(oid, newSha) b.Add(1) } b.Done() return nil } // getReferences returns a list of references to migrate, or an error if loading // those references failed. func (m *Migrator) getReferences(ctx context.Context) ([]*git.Reference, error) { refs, err := git.ParseReferences(ctx, m.from, git.OrderNone) if err != nil { return nil, err } references := make([]*git.Reference, 0, len(refs)) for _, ref := range refs { if ref.Name.IsRemote() { continue } references = append(references, ref) } return references, nil } func (m *Migrator) encodeTag(tag *gitobj.Tag, newObj []byte) ([]byte, error) { newTag, err := m.newODB.WriteTag(&gitobj.Tag{ Object: newObj, ObjectType: tag.ObjectType, Name: tag.Name, Tagger: tag.Tagger, Message: tag.Message, }) if err != nil { return nil, fmt.Errorf("could not rewrite tag: %s", tag.Name) } return newTag, nil } func (m *Migrator) rewriteTag(oid []byte) ([]byte, error) { tag, err := m.odb.Tag(oid) if err != nil { return nil, err } if tag.ObjectType == gitobj.TagObjectType { newTag, err := m.rewriteTag(tag.Object) if err != nil { return nil, err } return m.encodeTag(tag, newTag) } if tag.ObjectType == gitobj.CommitObjectType { if to, ok := m.uncache(tag.Object); ok { return m.encodeTag(tag, to) } } return oid, nil } func (m *Migrator) rewriteOneRef(ref *git.Reference) ([]byte, error) { oid, err := hex.DecodeString(ref.Target) if err != nil { return nil, fmt.Errorf("could not decode: '%s'", ref.Target) } if newOID, ok := m.uncache(oid); ok { return newOID, nil } if ref.ObjectType == git.CommitObject { // BUGS: We have completed the conversion of all commits return nil, nil } return m.rewriteTag(oid) } func (m *Migrator) reconstruct(ctx context.Context) error { refs, err := m.getReferences(ctx) if err != nil { return err } if len(refs) == 0 { fmt.Fprintf(os.Stderr, "%s", tr.W("No references to be deleted\n")) return nil } b := hud.NewBar(tr.W("rewrite references"), len(refs), m.stepCurrent, m.stepEnd, m.verbose) m.stepCurrent++ var oid []byte u, err := git.NewRefUpdater(ctx, m.to, nil, false) if err != nil { return err } defer u.Close() // nolint if err := u.Start(); err != nil { fmt.Fprintf(os.Stderr, "RefUpdater: Start ref updater error: %v\n", err) return err } for _, ref := range refs { if oid, err = m.rewriteOneRef(ref); err != nil { return fmt.Errorf("rewrite one ref '%s' error: %w", ref.Name, err) } if oid == nil { continue } if err := u.Create(ref.Name, hex.EncodeToString(oid)); err != nil { return fmt.Errorf("update-ref '%s' error: %w", ref.Name, err) } b.Add(1) } if err := u.Prepare(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Prepare error: %v\n", err) return err } if err := u.Commit(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Commit error: %v\n", err) return err } b.Done() return nil } func (m *Migrator) Execute(ctx context.Context) error { if err := m.hashObjects(ctx); err != nil { return err } if err := m.rewriteCommits(ctx); err != nil { return err } if err := m.reconstruct(ctx); err != nil { return err } return m.cleanup(ctx) } func (m *Migrator) reset(ctx context.Context) error { cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), RepoPath: m.worktree, Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, "git", "reset", "--hard") if err := cmd.Run(); err != nil { fmt.Fprintf(os.Stderr, "checkout error: %v", err) return err } return nil } func (m *Migrator) cleanup(ctx context.Context) error { cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), RepoPath: m.to, Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, "git", "-c", "repack.writeBitmaps=true", "-c", "pack.packSizeLimit=16g", "gc") if err := cmd.Run(); err != nil { return fmt.Errorf("run git gc error: %w", err) } diskSize, err := strengthen.Du(filepath.Join(m.to, "objects")) if err != nil { return fmt.Errorf("du repo size error: %w", err) } fmt.Fprintf(os.Stderr, "\x1b[38;2;72;198;239m[%d/%d]\x1b[0m %s: \x1b[38;2;32;225;215m%s\x1b[0m %s: \x1b[38;2;72;198;239m%s\x1b[0m\n", m.stepCurrent, m.stepEnd, tr.W("Repository"), m.to, tr.W("size"), strengthen.FormatSize(diskSize)) if len(m.worktree) != 0 { _ = m.reset(ctx) } return nil } ================================================ FILE: cmd/hot/pkg/refs/refs.go ================================================ package refs import ( "context" "encoding/hex" "fmt" "os" "path/filepath" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" ) // CommitGPGSignature represents a git commit signature part. type CommitGPGSignature struct { Signature string Payload string // TODO check if can be reconstruct from the rest of commit information to not have duplicate data } type Reference struct { Name string ShortName string Hash string Peeling string Tree string Parents []string Author *git.Signature Committer *git.Signature Message string Leading int // leading > mainline Lagging int // lagging < mainline Broken bool } func (r *Reference) Merged() bool { return r.IsBranch() && r.Leading == 0 } func (r *Reference) IsBranch() bool { return strings.HasPrefix(r.Name, "refs/heads/") } func (r *Reference) IsTag() bool { return strings.HasPrefix(r.Name, "refs/tags/") } type Matcher interface { Match(string) bool } type References struct { BasePoint string Current string Items []*Reference } func (r *References) resolveRefCommit(odb *git.ODB, ref *git.Reference) ([]byte, *gitobj.Commit, error) { sha, err := hex.DecodeString(ref.Target) if err != nil { return nil, nil, fmt.Errorf("could not decode: %q", ref.Target) } for range 20 { obj, err := odb.Object(sha) if err != nil { return nil, nil, fmt.Errorf("open git object error: %w", err) } if obj.Type() == gitobj.CommitObjectType { return sha, obj.(*gitobj.Commit), nil } if obj.Type() != gitobj.TagObjectType { return nil, nil, fmt.Errorf("oid: %s unsupport object type: %s", hex.EncodeToString(sha), obj.Type()) } tag := obj.(*gitobj.Tag) sha = tag.Object } return nil, nil, fmt.Errorf("ref '%s' recursion depth is not supported", ref.Name) } func (r *References) resolve(ctx context.Context, repoPath string, odb *git.ODB, ref *git.Reference) error { sha, cc, err := r.resolveRefCommit(odb, ref) if err != nil { r.Items = append(r.Items, &Reference{Name: ref.Name.String(), Hash: ref.Target, Broken: true}) return err } reference := &Reference{ Name: ref.Name.String(), ShortName: ref.ShortName, Hash: ref.Target, Tree: hex.EncodeToString(cc.TreeID), Message: cc.Message, Author: git.SignatureFromLine(cc.Author), Committer: git.SignatureFromLine(cc.Committer), } for _, p := range cc.ParentIDs { reference.Parents = append(reference.Parents, hex.EncodeToString(p)) } if peeling := hex.EncodeToString(sha); peeling != ref.Target { reference.Peeling = peeling } if reference.Hash != r.BasePoint && ref.Name.IsBranch() { reference.Leading, reference.Lagging, _ = git.RevDivergingCount(ctx, repoPath, reference.Hash, r.BasePoint) } r.Items = append(r.Items, reference) return nil } func ScanReferences(ctx context.Context, repoPath string, m Matcher, order git.Order) (*References, error) { odb, err := git.NewODB(repoPath, git.HashFormatOK(repoPath)) if err != nil { return nil, err } defer odb.Close() // nolint refs, err := git.ParseReferences(ctx, repoPath, order) if err != nil { return nil, err } b := hud.NewBar(tr.W("scan references"), len(refs), 1, 1, false) hash, refname, _ := git.ParseReference(ctx, repoPath, "HEAD") r := &References{ BasePoint: hash, Current: refname, Items: make([]*Reference, 0, 200), } for _, ref := range refs { b.Add(1) if !m.Match(ref.Name.String()) { continue } if err := r.resolve(ctx, repoPath, odb, ref); err != nil { fmt.Fprintf(os.Stderr, "Parse ref: %s error: %v\n", ref.Name, err) } } b.Done() return r, nil } func RemoveBrokenRef(repoPath string, refName string) error { refPath := filepath.Join(repoPath, refName) return os.Remove(refPath) } ================================================ FILE: cmd/hot/pkg/replay/cache.go ================================================ // Copyright (c) 2014- GitHub, Inc. and Git LFS contributors // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "encoding/hex" "fmt" "github.com/antgroup/hugescm/modules/git/gitobj" ) // cacheEntry caches then given "from" entry so that it is always rewritten as // a *TreeEntry equivalent to "to". func (r *Replayer) cacheEntry(path string, from, to *gitobj.TreeEntry) *gitobj.TreeEntry { r.mu.Lock() defer r.mu.Unlock() r.entries[r.entryKey(path, from)] = to return to } // uncacheEntry returns a *TreeEntry that is cached from the given *TreeEntry // "from". That is to say, it returns the *TreeEntry that "from" should be // rewritten to, or nil if none could be found. func (r *Replayer) uncacheEntry(path string, from *gitobj.TreeEntry) *gitobj.TreeEntry { r.mu.Lock() defer r.mu.Unlock() return r.entries[r.entryKey(path, from)] } // entryKey returns a unique key for a given *TreeEntry "e". func (r *Replayer) entryKey(path string, e *gitobj.TreeEntry) string { return fmt.Sprintf("%s:%x", path, e.Oid) } // cacheEntry caches then given "from" commit so that it is always rewritten as // a *git/gitobj.Commit equivalent to "to". func (r *Replayer) cacheCommit(from, to []byte) { r.mu.Lock() defer r.mu.Unlock() r.commits[hex.EncodeToString(from)] = to } // uncacheCommit returns a *git/gitobj.Commit that is cached from the given // *git/gitobj.Commit "from". That is to say, it returns the *git/gitobj.Commit that // "from" should be rewritten to and true, or nil and false if none could be // found. func (r *Replayer) uncacheCommit(from []byte) ([]byte, bool) { r.mu.Lock() defer r.mu.Unlock() c, ok := r.commits[hex.EncodeToString(from)] return c, ok } func copyEntry(e *gitobj.TreeEntry) *gitobj.TreeEntry { if e == nil { return nil } oid := make([]byte, len(e.Oid)) copy(oid, e.Oid) return &gitobj.TreeEntry{ Filemode: e.Filemode, Name: e.Name, Oid: oid, } } func copyEntryMode(e *gitobj.TreeEntry, mode int32) *gitobj.TreeEntry { copied := copyEntry(e) copied.Filemode = mode return copied } ================================================ FILE: cmd/hot/pkg/replay/cleanup.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "fmt" "os" "path/filepath" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/tui" ) func (r *Replayer) cleanup(prune bool) error { if !prune { if err := tui.AskConfirm(&prune, "%s", tr.W("Do you want to prune the repository right away")); err != nil { return err } if !prune { return nil } } cmd := command.NewFromOptions(r.ctx, &command.RunOpts{ Environ: os.Environ(), RepoPath: r.repoPath, Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, "git", "-c", "repack.writeBitmaps=true", "-c", "pack.packSizeLimit=16g", "gc", "--prune=now", "--aggressive") if err := cmd.Run(); err != nil { return fmt.Errorf("run git gc error: %w", err) } diskSize, err := strengthen.Du(filepath.Join(r.repoPath, "objects")) if err != nil { return fmt.Errorf("du repo size error: %w", err) } fmt.Fprintf(os.Stderr, "\x1b[38;2;72;198;239m[%d/%d]\x1b[0m %s: \x1b[38;2;32;225;215m%s\x1b[0m %s: \x1b[38;2;72;198;239m%s\x1b[0m\n", r.stepCurrent, r.stepEnd, tr.W("Repository"), r.repoPath, tr.W("size"), strengthen.FormatSize(diskSize)) return nil } ================================================ FILE: cmd/hot/pkg/replay/drop.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "errors" "fmt" "path" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/modules/tui" ) func (r *Replayer) rewriteTree(m Matcher, commitOID []byte, treeOID []byte, parent string) ([]byte, error) { tree, err := r.odb.Tree(treeOID) if err != nil { return nil, err } entries := make([]*gitobj.TreeEntry, 0, len(tree.Entries)) for _, entry := range tree.Entries { name := path.Join(parent, entry.Name) // matched path if m.Match(entry, name) { continue } if entry.Type() == gitobj.BlobObjectType { entries = append(entries, copyEntry(entry)) continue } // If this is a symlink, skip it if entry.Filemode == 0120000 { entries = append(entries, copyEntry(entry)) continue } if cached := r.uncacheEntry(name, entry); cached != nil { entries = append(entries, copyEntryMode(cached, entry.Filemode)) continue } var oid []byte switch entry.Type() { case gitobj.TreeObjectType: oid, err = r.rewriteTree(m, commitOID, entry.Oid, name) default: oid = entry.Oid } if err != nil { return nil, err } entries = append(entries, r.cacheEntry(name, entry, &gitobj.TreeEntry{ Filemode: entry.Filemode, Name: entry.Name, Oid: oid, })) } rewritten := &gitobj.Tree{Entries: entries} if tree.Equal(rewritten) { return treeOID, nil } return r.odb.WriteTree(rewritten) } func (r *Replayer) rewriteCommits(m Matcher) error { commits, err := r.commitsToRewrite() if err != nil { return fmt.Errorf("commits to rewrite error: %w", err) } b := hud.NewBar(tr.W("rewrite commits"), len(commits), r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ trace.DbgPrint("commits: %v", len(commits)) for _, oid := range commits { original, err := r.odb.Commit(oid) if err != nil { return err } rewrittenTree, err := r.rewriteTree(m, oid, original.TreeID, "") if err != nil { return err } // Create a new list of parents from the original commit to // point at the rewritten parents in order to create a // topologically equivalent DAG. // // This operation is safe since we are visiting the commits in // reverse topological order and therefore have seen all parents // before children (in other words, r.uncacheCommit(...) will // always return a value, if the prospective parent is a part of // the migration). rewrittenParents := make([][]byte, 0, len(original.ParentIDs)) for _, originalParent := range original.ParentIDs { rewrittenParent, ok := r.uncacheCommit(originalParent) if !ok { // If we haven't seen the parent before, this // means that we're doing a partial migration // and the parent that we're looking for isn't // included. // // Use the original parent to properly link // history across the migration boundary. rewrittenParent = originalParent } rewrittenParents = append(rewrittenParents, rewrittenParent) } // Construct a new commit using the original header information, // but the rewritten set of parents as well as root tree. rewrittenCommit := &gitobj.Commit{ Author: original.Author, Committer: original.Committer, ExtraHeaders: original.ExtraHeaders, Message: original.Message, ParentIDs: rewrittenParents, TreeID: rewrittenTree, } var newSha []byte if original.Equal(rewrittenCommit) { newSha = make([]byte, len(oid)) copy(newSha, oid) } else { if newSha, err = r.odb.WriteCommit(rewrittenCommit); err != nil { return err } } // Cache that commit so that we can reassign children of this // commit. r.cacheCommit(oid, newSha) b.Add(1) } b.Done() return nil } func (r *Replayer) Drop(m Matcher, confirm bool, prune bool) error { if !confirm { if !git.IsBareRepository(r.ctx, r.repoPath) { // core.bare if err := tui.AskConfirm(&confirm, "%s", tr.W("Repository not bare repository, continue to rewrite")); err != nil { return err } if !confirm { return nil } } } if err := r.rewriteCommits(m); err != nil { return err } if !confirm { if err := tui.AskConfirm(&confirm, "%s", tr.W("Do you want to rewrite local branches and tags")); err != nil { return err } if !confirm { return nil } } refs, err := r.referencesToRewrite() if err != nil { return errors.New("could not find refs to update") } updater := &refUpdater{ CacheFn: r.uncacheCommit, References: refs, RepoPath: r.repoPath, odb: r.odb, } b := hud.NewBar(tr.W("rewrite references"), len(refs), r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ if err := updater.UpdateRefs(r.ctx, b); err != nil { return errors.New("could not update refs") } b.Done() return r.cleanup(prune) } ================================================ FILE: cmd/hot/pkg/replay/graft.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "encoding/hex" "errors" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/tui" ) func (r *Replayer) resolveCommit(ref *git.Reference) ([]byte, *gitobj.Commit, error) { sha, err := hex.DecodeString(ref.Target) if err != nil { return nil, nil, fmt.Errorf("could not decode: %q", ref.Target) } for range 20 { obj, err := r.odb.Object(sha) if err != nil { return nil, nil, fmt.Errorf("open git object error: %w", err) } if obj.Type() == gitobj.CommitObjectType { return sha, obj.(*gitobj.Commit), nil } if obj.Type() != gitobj.TagObjectType { return nil, nil, fmt.Errorf("oid: %s unsupported object type: %s", hex.EncodeToString(sha), obj.Type()) } tag := obj.(*gitobj.Tag) sha = tag.Object } return nil, nil, fmt.Errorf("ref '%s' recursion depth is not supported", ref.Name) } // graft HEAD func (r *Replayer) graftHEAD() error { _, oldRev, err := git.RevParseCurrent(r.ctx, os.Environ(), r.repoPath) if err != nil { return err } oid, err := hex.DecodeString(oldRev) if err != nil { return err } original, err := r.odb.Commit(oid) if err != nil { return err } rewrittenParents := make([][]byte, 0, len(original.ParentIDs)) for _, originalParent := range original.ParentIDs { rewrittenParent, ok := r.uncacheCommit(originalParent) if !ok { // If we haven't seen the parent before, this // means that we're doing a partial migration // and the parent that we're looking for isn't // included. // // Use the original parent to properly link // history across the migration boundary. rewrittenParent = originalParent } rewrittenParents = append(rewrittenParents, rewrittenParent) } // Construct a new commit using the original header information, // but the rewritten set of parents as well as root tree. rewrittenCommit := &gitobj.Commit{ Author: original.Author, Committer: original.Committer, ExtraHeaders: original.ExtraHeaders, Message: original.Message, ParentIDs: rewrittenParents, TreeID: original.TreeID, } var newSha []byte if original.Equal(rewrittenCommit) { newSha = make([]byte, len(oid)) copy(newSha, oid) } else { newSha, err = r.odb.WriteCommit(rewrittenCommit) if err != nil { return err } } // Cache that commit so that we can reassign children of this // commit. r.cacheCommit(oid, newSha) return nil } func (r *Replayer) graftCommits(refs []*git.Reference, headOnly bool) error { if headOnly { b := hud.NewBar(tr.W("graft commits"), 1, r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ if err := r.graftHEAD(); err != nil { return err } b.Done() return nil } b := hud.NewBar(tr.W("graft commits"), len(refs), r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ for _, ref := range refs { oid, original, err := r.resolveCommit(ref) if err != nil { return err } rewrittenParents := make([][]byte, 0, len(original.ParentIDs)) for _, originalParent := range original.ParentIDs { rewrittenParent, ok := r.uncacheCommit(originalParent) if !ok { // If we haven't seen the parent before, this // means that we're doing a partial migration // and the parent that we're looking for isn't // included. // // Use the original parent to properly link // history across the migration boundary. rewrittenParent = originalParent } rewrittenParents = append(rewrittenParents, rewrittenParent) } // Construct a new commit using the original header information, // but the rewritten set of parents as well as root tree. rewrittenCommit := &gitobj.Commit{ Author: original.Author, Committer: original.Committer, ExtraHeaders: original.ExtraHeaders, Message: original.Message, ParentIDs: rewrittenParents, TreeID: original.TreeID, } var newSha []byte if original.Equal(rewrittenCommit) { newSha = make([]byte, len(oid)) copy(newSha, oid) } else { newSha, err = r.odb.WriteCommit(rewrittenCommit) if err != nil { return err } } // Cache that commit so that we can reassign children of this // commit. r.cacheCommit(oid, newSha) b.Add(1) } b.Done() return nil } func (r *Replayer) Graft(m Matcher, confirm bool, prune bool, headOnly bool) error { if err := r.rewriteCommits(m); err != nil { return err } if !confirm { if err := tui.AskConfirm(&confirm, "%s", tr.W("Do you want to rewrite local branches and tags")); err != nil { return err } if !confirm { return nil } } refs, err := r.referencesToRewrite() if err != nil { return errors.New("could not find refs to update") } if err := r.graftCommits(refs, headOnly); err != nil { return err } updater := &refUpdater{ CacheFn: r.uncacheCommit, References: refs, RepoPath: r.repoPath, odb: r.odb, } b := hud.NewBar(tr.W("rewrite references"), len(refs), r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ if err := updater.UpdateRefs(r.ctx, b); err != nil { return errors.New("could not update refs") } b.Done() return r.cleanup(prune) } ================================================ FILE: cmd/hot/pkg/replay/misc.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "fmt" "os" "runtime" "strings" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/wildmatch" ) type Matcher interface { Match(entry *gitobj.TreeEntry, absPath string) bool } type equaler struct { paths map[string]any } func NewEqualer(paths []string) Matcher { e := &equaler{ paths: make(map[string]any), } for _, p := range paths { e.paths[p] = nil } return e } func (e *equaler) Match(entry *gitobj.TreeEntry, absPath string) bool { if _, ok := e.paths[absPath]; ok { return true } return false } var ( caseInsensitive = func() bool { return runtime.GOOS == "windows" || runtime.GOOS == "darwin" }() escapeChars = func() string { switch runtime.GOOS { case "windows": return "*?[]" default: } return "*?[]\\" }() ) func systemCaseEqual(a, b string) bool { if caseInsensitive { return strings.EqualFold(a, b) } return a == b } type matcher struct { prefix []string ws []*wildmatch.Wildmatch } func NewMatcher(patterns []string) Matcher { m := &matcher{} for _, pattern := range patterns { if len(pattern) == 0 { continue } if !strings.ContainsAny(pattern, escapeChars) { m.prefix = append(m.prefix, strings.TrimSuffix(pattern, "/")) continue } w, err := wildmatch.NewWildmatch(pattern, wildmatch.SystemCase, wildmatch.Contents) if err != nil { fmt.Fprintf(os.Stderr, "Ignore bad wildcard '%s' error: %v\n", pattern, err) continue } m.ws = append(m.ws, w) } return m } func (m *matcher) Match(entry *gitobj.TreeEntry, absPath string) bool { if len(m.ws) == 0 && len(m.prefix) == 0 { return true } for _, p := range m.prefix { prefixLen := len(p) if len(absPath) >= prefixLen && systemCaseEqual(absPath[0:prefixLen], p) && (len(absPath) == prefixLen || absPath[prefixLen] == '/') { return true } } for _, w := range m.ws { if w.Match(absPath) { return true } } return false } ================================================ FILE: cmd/hot/pkg/replay/replay.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "bufio" "context" "encoding/hex" "strings" "sync" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" ) type Replayer struct { ctx context.Context repoPath string // mu guards entries and commits (see below) mu *sync.Mutex // entries is a mapping of old tree entries to new (rewritten) ones. // Since TreeEntry contains a []byte (and is therefore not a key-able // type), a unique TreeEntry -> string function is used for map keys. entries map[string]*gitobj.TreeEntry // commits is a mapping of old commit SHAs to new ones, where the ASCII // hex encoding of the SHA1 values are used as map keys. commits map[string][]byte // odb is the *ObjectDatabase from which blobs, commits, and trees are // loaded from. odb *git.ODB stepEnd int stepCurrent int verbose bool } func NewReplayer(ctx context.Context, repoPath string, stepEnd int, verbose bool) (*Replayer, error) { odb, err := git.NewODB(repoPath, git.HashFormatOK(repoPath)) if err != nil { return nil, err } return &Replayer{ ctx: ctx, repoPath: repoPath, mu: new(sync.Mutex), entries: make(map[string]*gitobj.TreeEntry), commits: map[string][]byte{}, odb: odb, stepEnd: stepEnd, stepCurrent: 1, verbose: verbose, }, nil } func (r *Replayer) Close() error { if r.odb != nil { return r.odb.Close() } return nil } func (r *Replayer) referencesToRewrite() ([]*git.Reference, error) { refs, err := git.ParseReferences(r.ctx, r.repoPath, git.OrderNone) if err != nil { return nil, err } references := make([]*git.Reference, 0, len(refs)) for _, ref := range refs { if ref.Name.IsRemote() { continue } references = append(references, ref) } return references, nil } // Return all branch/tags commit reverse order func (r *Replayer) commitsToRewrite() ([][]byte, error) { // --topo-order is required to ensure topological order. reader, err := git.NewReader(r.ctx, &command.RunOpts{RepoPath: r.repoPath}, "rev-list", "--reverse", "--topo-order", "--all") if err != nil { return nil, err } defer reader.Close() // nolint sr := bufio.NewScanner(reader) var commits [][]byte for sr.Scan() { oid, err := hex.DecodeString(strings.TrimSpace(sr.Text())) if err != nil { continue } commits = append(commits, oid) } return commits, nil } ================================================ FILE: cmd/hot/pkg/replay/unbranch.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "bufio" "encoding/hex" "errors" "fmt" "os" "slices" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/modules/tui" ) // 4MB size limit for squashed commit message const maxSizeForSquashedCommitMessage = 4 << 20 func (r *Replayer) makeSquashMessage0(commits []string, message string) (string, error) { messages := []string{message} messageSize := len(message) for idx, s := range commits { if messageSize > maxSizeForSquashedCommitMessage { oversizeNotice := fmt.Sprintf("\n\n...\n %d more commit(s) ignored to avoid oversized message\n", len(commits)-idx) message := strings.Join(messages, "\n") return message[:maxSizeForSquashedCommitMessage] + oversizeNotice, nil } oid, err := hex.DecodeString(s) if err != nil { return "", err } cc, err := r.odb.Commit(oid) if err != nil { return "", err } if len(cc.ParentIDs) > 1 { // skip commit message for merge commit continue } messages = append(messages, "* "+cc.Subject()) // 3 more chars[ *\n] will be appended for each message messageSize += 3 + len(cc.Message) } return strings.Join(messages, "\n"), nil } func (r *Replayer) makeSquashMessage(cc *gitobj.Commit) (string, error) { commits, err := git.RevUniqueList(r.ctx, r.repoPath, hex.EncodeToString(cc.ParentIDs[0]), hex.EncodeToString(cc.ParentIDs[1])) if err != nil { return "", err } // already merged if len(commits) == 0 { return cc.Message, nil } return r.makeSquashMessage0(commits, cc.Message) } // --first-parent // Return all branch/tags commit reverse order func (r *Replayer) commitsToLinear(revision string) ([][]byte, error) { psArgs := []string{"rev-list", "--reverse", "--topo-order", "--first-parent"} if len(revision) == 0 { psArgs = append(psArgs, "--all") } else { psArgs = append(psArgs, revision) } // --topo-order is required to ensure topological order. reader, err := git.NewReader(r.ctx, &command.RunOpts{RepoPath: r.repoPath}, psArgs...) if err != nil { return nil, err } defer reader.Close() // nolint sr := bufio.NewScanner(reader) var commits [][]byte for sr.Scan() { oid, err := hex.DecodeString(strings.TrimSpace(sr.Text())) if err != nil { continue } commits = append(commits, oid) } return commits, nil } func (r *Replayer) unbranch(revision string, keep int) ([]byte, error) { commits, err := r.commitsToLinear(revision) if err != nil { return nil, fmt.Errorf("commits to linear error: %w", err) } if keep > 0 && keep < len(commits) { commits = commits[len(commits)-keep:] } if len(commits) == 0 { return nil, errors.New("missing commits") } top := slices.Clone(commits[len(commits)-1]) b := hud.NewBar(tr.W("rewrite commits"), len(commits), r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ trace.DbgPrint("commits: %v", len(commits)) for _, oid := range commits { original, err := r.odb.Commit(oid) if err != nil { return nil, err } message := original.Message rewrittenParents := make([][]byte, 0, len(original.ParentIDs)) if len(original.ParentIDs) > 0 { if rewrittenParent, ok := r.uncacheCommit(original.ParentIDs[0]); ok { rewrittenParents = append(rewrittenParents, rewrittenParent) } } if len(original.ParentIDs) > 1 { if m, err := r.makeSquashMessage(original); err == nil { message = m } } // Construct a new commit using the original header information, // but the rewritten set of parents as well as root tree. rewrittenCommit := &gitobj.Commit{ Author: original.Author, Committer: original.Committer, ExtraHeaders: original.ExtraHeaders, Message: message, ParentIDs: rewrittenParents, TreeID: original.TreeID, } var newSha []byte if original.Equal(rewrittenCommit) { newSha = make([]byte, len(oid)) copy(newSha, oid) } else { if newSha, err = r.odb.WriteCommit(rewrittenCommit); err != nil { return nil, err } } // Cache that commit so that we can reassign children of this // commit. r.cacheCommit(oid, newSha) b.Add(1) } b.Done() return top, nil } type UnbranchOptions struct { Branch string Target string Confirm bool Prune bool Keep int } func (r *Replayer) Unbranch(o *UnbranchOptions) error { top, err := r.unbranch(o.Branch, o.Keep) if err != nil { return err } if len(o.Branch) != 0 { return r.unbranchOne(o, top) } if !o.Confirm { var confirm bool if err := tui.AskConfirm(&confirm, "%s", tr.W("Do you want to rewrite local branches and tags")); err != nil { return err } if !confirm { return nil } } refs, err := r.referencesToRewrite() if err != nil { return errors.New("could not find refs to update") } updater := &refUpdater{ CacheFn: r.uncacheCommit, References: refs, RepoPath: r.repoPath, odb: r.odb, } b := hud.NewBar(tr.W("rewrite references"), len(refs), r.stepCurrent, r.stepEnd, r.verbose) r.stepCurrent++ if err := updater.UpdateRefs(r.ctx, b); err != nil { return errors.New("could not update refs") } b.Done() return r.cleanup(o.Prune) } func (r *Replayer) unbranchOne(o *UnbranchOptions, top []byte) error { newOID, ok := r.uncacheCommit(top) if !ok { return fmt.Errorf("find migrate commit error, origin: %s", hex.EncodeToString(top)) } newRev := hex.EncodeToString(newOID) var oldRev, refname string ref, err := git.ReferencePrefixMatch(r.ctx, r.repoPath, o.Branch) switch { case git.IsErrNotExist(err): if len(o.Target) == 0 { _, _ = fmt.Fprintf(os.Stdout, "Dangling: %s\n", newRev) return nil } oldRev = git.ConformingHashZero(newRev) refname = git.JoinBranchPrefix(o.Target) case err != nil: return err case len(o.Target) != 0: oldRev = git.ConformingHashZero(newRev) refname = git.JoinBranchPrefix(o.Target) default: oldRev = ref.Target refname = ref.Name.String() } fmt.Fprintf(os.Stderr, "Update '%s' %s --> %s\n", refname, oldRev, newRev) if err := git.UpdateRef(r.ctx, r.repoPath, refname, oldRev, newRev, false); err != nil { return err } return nil } ================================================ FILE: cmd/hot/pkg/replay/update.go ================================================ // Copyright (c) 2014- GitHub, Inc. and Git LFS contributors // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package replay import ( "bytes" "context" "encoding/hex" "fmt" "os" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/hud" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/trace" ) // refUpdater is a type responsible for moving references from one point in the // Git object graph to another. type refUpdater struct { // CacheFn is a function that returns the SHA1 transformation from an // original hash to a new one. It specifies a "bool" return value // signaling whether or not that given "old" SHA1 was migrated. CacheFn func(old []byte) ([]byte, bool) // References is a set of *git.Ref's to migrate. References []*git.Reference // RepoPath is the given directory on disk in which the repository is // located. RepoPath string odb *git.ODB } // UpdateRefs performs the reference update(s) from existing locations (see: // Refs) to their respective new locations in the graph (see CacheFn). // // It creates reflog entries as well as stderr log entries as it progresses // through the reference updates. // // It returns any error encountered, or nil if the reference update(s) was/were // successful. func (r *refUpdater) UpdateRefs(ctx context.Context, b *hud.ProgressBar) error { var maxNameLen int for _, ref := range r.References { maxNameLen = max(maxNameLen, len(ref.Name)) } u, err := git.NewRefUpdater(ctx, r.RepoPath, nil, false) if err != nil { return err } defer u.Close() // nolint if err := u.Start(); err != nil { fmt.Fprintf(os.Stderr, "RefUpdater: Start ref updater error: %v\n", err) return err } seen := make(map[git.ReferenceName]bool) for _, ref := range r.References { if err := r.updateOneRef(u, maxNameLen, seen, ref); err != nil { return err } b.Add(1) } if err := u.Prepare(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Prepare error: %v\n", err) return err } if err := u.Commit(); err != nil { fmt.Fprintf(os.Stderr, "\x1b[2K\rRefUpdater: Commit error: %v\n", err) return err } return nil } func (r *refUpdater) updateOneTag(tag *gitobj.Tag, toObj []byte) ([]byte, error) { newTag, err := r.odb.WriteTag(&gitobj.Tag{ Object: toObj, ObjectType: tag.ObjectType, Name: tag.Name, Tagger: tag.Tagger, Message: tag.Message, }) if err != nil { return nil, fmt.Errorf("could not rewrite tag: %s", tag.Name) } return newTag, nil } func (r *refUpdater) rewriteTag(oid []byte) ([]byte, error) { tag, err := r.odb.Tag(oid) if err != nil { return nil, err } if tag.ObjectType == gitobj.TagObjectType { newTag, err := r.rewriteTag(tag.Object) if err != nil { return nil, err } return r.updateOneTag(tag, newTag) } if tag.ObjectType == gitobj.CommitObjectType { if to, ok := r.CacheFn(tag.Object); ok { return r.updateOneTag(tag, to) } } return oid, nil } func (r *refUpdater) updateOneRef(u *git.RefUpdater, maxNameLen int, seen map[git.ReferenceName]bool, ref *git.Reference) error { sha, err := hex.DecodeString(ref.Target) if err != nil { return fmt.Errorf("could not decode: %q", ref.Target) } if seen[ref.Name] { return nil } seen[ref.Name] = true to, ok := r.CacheFn(sha) if ref.ObjectType == git.TagObject { newTag, err := r.rewriteTag(sha) if err != nil { return err } ok = !bytes.Equal(newTag, sha) to = newTag } if !ok { return nil } if err := u.Update(ref.Name, hex.EncodeToString(to), ref.Target); err != nil { return err } namePadding := max(maxNameLen-len(ref.Name), 0) trace.DbgPrint(" %s%s\t%s -> %x", ref.Name, strings.Repeat(" ", namePadding), ref.Target, to) return nil } ================================================ FILE: cmd/hot/pkg/stat/az.go ================================================ package stat import ( "context" "fmt" "os" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/deflect" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" ) func showHugeObjects(ctx context.Context, repoPath string, objects map[string]int64, fullPath bool) error { su := newSummer(fullPath) psArgs := []string{"rev-list", "--objects", "--all"} if err := su.resolveName(ctx, repoPath, objects, psArgs, su.printName); err != nil { fmt.Fprintf(os.Stderr, "hot az: resolve file name error: %v\n", err) return err } if err := su.drawInteractive(fmt.Sprintf("%s - %s", tr.W("Descending order by total size"), tr.W("All Branches and Tags"))); err != nil { return err } return nil } func Az(ctx context.Context, repoPath string, limit int64, fullPath bool) error { objects := make(map[string]int64) au := deflect.NewAuditor(repoPath, git.HashFormatOK(repoPath), &deflect.Option{ Limit: limit, OnOversized: func(oid string, size int64) error { objects[oid] = size return nil }, }) if err := au.Execute(); err != nil { fmt.Fprintf(os.Stderr, "hot az: check large file: %v\n", err) return err } _ = showHugeObjects(ctx, repoPath, objects, fullPath) fmt.Fprintf(os.Stderr, "%s%s\n", tr.W("Size: "), blue(strengthen.FormatSize(au.Size()))) return nil } ================================================ FILE: cmd/hot/pkg/stat/color.go ================================================ package stat import ( "fmt" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/term" ) func red(s string) string { switch term.StderrLevel { case term.Level16M: return "\x1b[38;2;247;112;98m" + s + "\x1b[0m" case term.Level256: return "\x1b[31m" + s + "\x1b[0m" } return s } func yellow(s string) string { switch term.StderrLevel { case term.Level16M: return "\x1b[38;2;254;225;64m" + s + "\x1b[0m" case term.Level256: return "\x1b[33m" + s + "\x1b[0m" default: } return s } func green(s string) string { switch term.StderrLevel { case term.Level16M: return "\x1b[38;2;67;233;123m" + s + "\x1b[0m" case term.Level256: return "\x1b[32m" + s + "\x1b[0m" default: } return s } func colorE(s string) string { switch term.StderrLevel { case term.Level16M: return "\x1b[38;2;250;112;154m" + s + "\x1b[0m" case term.Level256: return "\x1b[31m" + s + "\x1b[0m" default: } return s } func blue(s string) string { switch term.StderrLevel { case term.Level16M: return "\x1b[38;2;0;201;255m" + s + "\x1b[0m" case term.Level256: return "\x1b[34m" + s + "\x1b[0m" default: } return s } func green2(s string) string { switch term.StderrLevel { case term.Level16M: return "\x1b[38;2;32;225;215m" + s + "\x1b[0m" case term.Level256: return "\x1b[32m" + s + "\x1b[0m" default: } return s } func colorSize(i int64) string { return blue(strengthen.FormatSize(i)) } func colorSizeU(i uint64) string { return blue(strengthen.FormatSizeU(i)) } func colorInt[I int | uint64 | int64](i I) string { return blue(fmt.Sprintf("%d", i)) } ================================================ FILE: cmd/hot/pkg/stat/draw.go ================================================ package stat import ( "bufio" "context" "fmt" "os" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" ) type Item struct { Path string Total int64 Count int } // Exports support sort type Items []Item // Len len exports func (m Items) Len() int { return len(m) } // Less less func (m Items) Less(i, j int) bool { return m[i].Total > m[j].Total } // Swap function func (m Items) Swap(i, j int) { m[i], m[j] = m[j], m[i] } type sizeCounter struct { sum int64 count int } type summer struct { files map[string]*sizeCounter total int64 count int fullPath bool } func newSummer(fullPath bool) *summer { return &summer{files: make(map[string]*sizeCounter), fullPath: fullPath} } func (s *summer) add(file string, size int64) { s.total += size s.count++ if sz, ok := s.files[file]; ok { sz.sum += size sz.count++ return } s.files[file] = &sizeCounter{sum: size, count: 1} } type Printer func(string, string, int64) func (s *summer) printName(name, oid string, size int64) { if len(name) == 0 { fmt.Fprintf(os.Stderr, "%s <%s> %s: %s\n", yellow(oid), blue("dangle"), tr.W("size"), red(strengthen.FormatSize(size))) return } displayName := name if !s.fullPath { displayName = truncatePath(name, 100) } fmt.Fprintf(os.Stderr, "%s [%s] %s: %s\n", yellow(oid), blue(displayName), tr.W("size"), red(strengthen.FormatSize(size))) } func (s *summer) resolveName(ctx context.Context, repoPath string, seen map[string]int64, psArgs []string, fn Printer) error { if git.IsGitVersionAtLeast(git.NewVersion(2, 35, 0)) { psArgs = append(psArgs, "--filter=object:type=blob") } cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: repoPath, Environ: os.Environ(), }, "git", psArgs...) out, err := cmd.StdoutPipe() if err != nil { return err } defer out.Close() // nolint if err := cmd.Start(); err != nil { return err } br := bufio.NewScanner(out) for br.Scan() { oid, name, _ := strings.Cut(br.Text(), " ") if size, ok := seen[oid]; ok { if fn != nil { fn(name, oid, size) } s.add(name, size) } } return nil } ================================================ FILE: cmd/hot/pkg/stat/size.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package stat import ( "bufio" "context" "encoding/hex" "errors" "fmt" "io" "os" "path/filepath" "strconv" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/gitobj" "github.com/antgroup/hugescm/modules/strengthen" ) type SizeExecutor struct { limit int64 paths []string objects map[string]int64 fullPath bool } func NewSizeExecutor(size int64, fullPath bool) *SizeExecutor { return &SizeExecutor{limit: size, objects: make(map[string]int64), fullPath: fullPath} } // BLOB filter func (e *SizeExecutor) Match(entry *gitobj.TreeEntry, absPath string) bool { if _, ok := e.objects[hex.EncodeToString(entry.Oid)]; ok { return true } return false } func (e *SizeExecutor) Paths() []string { return e.paths } // git cat-file --batch-check --batch-all-objects func (e *SizeExecutor) Run(ctx context.Context, repoPath string, extract bool) error { if !git.IsGitVersionAtLeast(git.NewVersion(2, 35, 0)) { return errors.New("require Git 2.35.0 or later") } args := []string{"cat-file", "--batch-check", "--batch-all-objects"} if git.IsGitVersionAtLeast(git.NewVersion(2, 42, 0)) { args = append(args, "--unordered") } reader, err := git.NewReader(ctx, &command.RunOpts{RepoPath: repoPath}, args...) if err != nil { return fmt.Errorf("start git cat-file error %w", err) } defer reader.Close() // nolint br := bufio.NewReader(reader) for { line, err := br.ReadString('\n') if errors.Is(err, io.EOF) { // always endswith '\n' break } if err != nil { return fmt.Errorf("git cat-file readline error %w", err) } line = line[:len(line)-1] sv := strings.Split(line, " ") if len(sv) < 3 { continue } if sv[1] != "blob" { continue } sz, err := strconv.ParseInt(sv[2], 10, 64) if err != nil { continue } if sz >= e.limit { e.objects[sv[0]] = sz } } su := newSummer(e.fullPath) psArgs := []string{"rev-list", "--objects", "--all"} if err := su.resolveName(ctx, repoPath, e.objects, psArgs, su.printName); err != nil { fmt.Fprintf(os.Stderr, "hot size: resolve file name error: %v", err) return err } if err := su.drawInteractive(fmt.Sprintf("%s - %s", tr.W("Descending order by total size"), tr.W("All Branches and Tags"))); err != nil { return err } if extract { e.currentCheck(ctx, repoPath, e.objects) } // COPY to files for p := range su.files { e.paths = append(e.paths, p) } diskSize, err := strengthen.Du(filepath.Join(repoPath, "objects")) if err != nil { fmt.Fprintf(os.Stderr, "hot size: check repo disk usage error: %v", err) return err } fmt.Fprintf(os.Stderr, "%s: %s %s: %s\n", tr.W("Repository"), green2(repoPath), tr.W("size"), blue(strengthen.FormatSize(diskSize))) return nil } func (e *SizeExecutor) currentCheck(ctx context.Context, repoPath string, objects map[string]int64) { su := newSummer(e.fullPath) psArgs := []string{"rev-list", "--objects", "HEAD"} if err := su.resolveName(ctx, repoPath, objects, psArgs, nil); err != nil { fmt.Fprintf(os.Stderr, "hot size: resolve file name error: %v", err) return } if err := su.drawInteractive(fmt.Sprintf("%s - %s", tr.W("Descending order by total size"), tr.W("Default Branch"))); err != nil { return } } ================================================ FILE: cmd/hot/pkg/stat/stat.go ================================================ package stat import ( "bufio" "context" "fmt" "io" "net/url" "os" "path/filepath" "regexp" "strings" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/deflect" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/stats" ) var ( emailRegex = regexp.MustCompile(`^[A-Za-z\d]+([-_.][A-Za-z\d]+)*@([A-Za-z\d]+[-.])+[A-Za-z\d]{2,4}$`) ) type StatOptions struct { RepoPath string Limit int64 } type Values map[string]string func listConfig(ctx context.Context, repoPath string) (Values, error) { var stderr strings.Builder cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), RepoPath: repoPath, Stderr: &stderr, }, "git", "config", "list", "-z") stdout, err := cmd.StdoutPipe() if err != nil { return nil, err } defer stdout.Close() // nolint if err := cmd.Start(); err != nil { return nil, err } defer cmd.Wait() // nolint vs := make(Values) br := bufio.NewReader(stdout) for { line, err := br.ReadString(0) if err != nil && err != io.EOF { return nil, err } // line including '\n' always >= 1 if len(line) == 0 { break } line = line[0 : len(line)-1] k, v, ok := strings.Cut(line, "\n") if !ok { continue } vs[strings.ToLower(k)] = v } return vs, nil } func scanIdentity(vs Values) { if name, ok := vs["user.name"]; !ok { _, _ = tr.Fprintf(os.Stderr, "error: '%s' is not configured correctly\n", colorE("user.name")) } else { fmt.Fprintf(os.Stderr, "%s 'user.name' --> '%s' ✅\n", tr.W("check"), blue(name)) } email, ok := vs["user.email"] if !ok { _, _ = tr.Fprintf(os.Stderr, "error: '%s' is not configured correctly\n", colorE("user.email")) return } if !emailRegex.MatchString(email) { _, _ = tr.Fprintf(os.Stderr, "error: invalid email '%s' (from user.email)\n", colorE(email)) return } fmt.Fprintf(os.Stderr, "%s 'user.email' --> '%s' ✅\n", tr.W("check"), blue(email)) } func safePassword(s string) string { if len(s) < 5 { return strings.Repeat("x", 5) } return s[0:2] + strings.Repeat("x", len(s)-2) } func checkRemote(vs Values) { remote, ok := vs["remote.origin.url"] if !ok { return } u, err := url.Parse(remote) if err != nil { if git.MatchesScpLike(remote) { fmt.Fprintf(os.Stderr, "%s %s ✅\n", tr.W("remote:"), blue(remote)) return } fmt.Fprintf(os.Stderr, "parse remote '%s' error: %s\n", colorE(remote), err) return } username := u.User.Username() password, ok := u.User.Password() if ok { newPassword := safePassword(password) u.User = url.UserPassword(username, newPassword) _, _ = tr.Fprintf(os.Stderr, "insecure remote: remote url contains the password '%s' ❌\n", colorE(newPassword)) fmt.Fprintf(os.Stderr, "%s %s ❌ (%s)\n", tr.W("remote:"), colorE(u.String()), tr.W("sanitized")) return } fmt.Fprintf(os.Stderr, "%s %s ✅\n", tr.W("remote:"), blue(u.String())) } func partialClone(vs Values) (sparse bool, partial bool) { if v, ok := vs["core.sparsecheckout"]; ok && strings.EqualFold(v, "true") { fmt.Fprintf(os.Stderr, "%s: %s\n", tr.W("sparse checkout"), tr.W("enabled")) sparse = true } if v, ok := vs["remote.origin.promisor"]; ok && strings.EqualFold(v, "true") { fmt.Fprintf(os.Stderr, "%s: %s\n", tr.W("partial checkout"), tr.W("enabled")) partial = true } return } func parseShallowCommit(repoPath string) string { p := filepath.Join(repoPath, "shallow") data, err := os.ReadFile(p) if err != nil { return "" } return strings.TrimSpace(string(data)) } func Stat(ctx context.Context, o *StatOptions) error { _, _ = tr.Fprintf(os.Stderr, "Location: %s\n", blue(o.RepoPath)) if version, err := git.VersionDetect(); err == nil { _, _ = tr.Fprintf(os.Stderr, "Git Version: %s\n", blue(version.String())) } vs, err := listConfig(ctx, o.RepoPath) if err != nil { fmt.Fprintf(os.Stderr, "list git config error: %v\n", err) return err } scanIdentity(vs) shaFormat, refFormat := git.ExtensionsFormat(o.RepoPath) if defaultBranch, ok := vs["init.defaultbranch"]; ok { fmt.Fprintf(os.Stderr, "%s 'init.defaultBranch' --> '%s' ✅\n", tr.W("check"), blue(defaultBranch)) } if defaultObjectFormat, ok := vs["init.defaultobjectformat"]; ok { fmt.Fprintf(os.Stderr, "%s 'init.defaultObjectFormat' --> '%s' ✅\n", tr.W("check"), blue(defaultObjectFormat)) } if defaultRefFormat, ok := vs["init.defaultrefformat"]; ok { fmt.Fprintf(os.Stderr, "%s 'init.defaultRefFormat' --> '%s' ✅\n", tr.W("check"), blue(defaultRefFormat)) } if hooksPath, ok := vs["core.hookspath"]; ok { _, _ = tr.Fprintf(os.Stderr, "warning: '%s' is set to '%s', which may affect Git LFS\n", yellow("core.hooksPath"), yellow(hooksPath)) } _, _ = tr.Fprintf(os.Stderr, "Repository object format (sha format): %s ✅\n", blue(shaFormat.String())) _, _ = tr.Fprintf(os.Stderr, "Repository references backend (ref format): %s ✅\n", blue(refFormat)) checkRemote(vs) var careful bool sparse, partial := partialClone(vs) careful = sparse || partial shallow := parseShallowCommit(o.RepoPath) if len(shallow) != 0 { _, _ = tr.Fprintf(os.Stderr, "shallow clone started at: %s\n", shallow) } if current, oid, err := git.RevParseCurrent(ctx, nil, o.RepoPath); err == nil { refname := git.ReferenceName(current) if refname.IsBranch() { fmt.Fprintf(os.Stderr, "%s: %s (commit: %s)\n", tr.W("On branch"), blue(refname.BranchName()), green(oid[:9])) } else { fmt.Fprintf(os.Stderr, "%s %s\n", tr.W("HEAD detached at"), blue(oid)) } } si, err := stats.Status(ctx, o.RepoPath, refFormat) if err != nil { fmt.Fprintf(os.Stderr, "status error: %v\n", err) return err } if si.References.ReferenceBackendName == "reftable" { _, _ = tr.Fprintf(os.Stdout, "references (reftable) tables total: %s\n", colorInt(len(si.References.ReftableTables))) } else { _, _ = tr.Fprintf(os.Stdout, "loose references total: %s\n", colorInt(si.References.LooseReferencesCount)) _, _ = tr.Fprintf(os.Stdout, "packed references size: %s\n", colorSizeU(si.References.PackedReferencesSize)) } // The loose objects size includes objects which are older than the grace period and thus // stale, so we need to subtract the size of stale objects from the overall size. recentLooseObjectsSize := si.LooseObjects.Size - si.LooseObjects.StaleSize // The packfiles size includes the size of cruft packs that contain unreachable objects, so // we need to subtract the size of cruft packs from the overall size. recentPackfilesSize := si.Packfiles.Size - si.Packfiles.CruftSize _, _ = tr.Fprintf(os.Stdout, "loose objects total: %s\n", colorInt(si.LooseObjects.Count)) _, _ = tr.Fprintf(os.Stdout, "packfiles count: %s\n", colorInt(si.Packfiles.Count)) _, _ = tr.Fprintf(os.Stdout, "objects size: %s\n", colorSizeU(si.LooseObjects.Size+si.Packfiles.Size)) _, _ = tr.Fprintf(os.Stdout, "recent size: %s\n", colorSizeU(recentLooseObjectsSize+recentPackfilesSize)) _, _ = tr.Fprintf(os.Stdout, "stale size: %s\n", colorSizeU(si.LooseObjects.StaleSize+si.Packfiles.CruftSize)) _, _ = tr.Fprintf(os.Stdout, "keep size: %s\n", colorSizeU(si.Packfiles.KeepSize)) if si.LFS.Count != 0 { _, _ = tr.Fprintf(os.Stdout, "downloaded lfs count: %s\n", colorInt(si.LFS.Count)) _, _ = tr.Fprintf(os.Stdout, "downloaded lfs size: %s\n", colorSizeU(si.LFS.Size)) } objects := make(map[string]int64) au := deflect.NewAuditor(o.RepoPath, shaFormat, &deflect.Option{ Limit: o.Limit, OnOversized: func(oid string, size int64) error { objects[oid] = size return nil }, }) if err := au.Execute(); err != nil { fmt.Fprintf(os.Stderr, "hot stat: check large file: %v\n", err) return err } fmt.Fprintf(os.Stderr, "%s%s\n", tr.W("repository disk size: "), colorSize(au.Size())) if !careful { _ = showHugeObjects(ctx, o.RepoPath, objects, false) } return nil } ================================================ FILE: cmd/hot/pkg/stat/stat_test.go ================================================ package stat import ( "fmt" "os" "testing" ) func TestCheckEmail(t *testing.T) { ss := []string{ // valid "test@example.com", "john.doe@sub.domain.co.uk", "user+tag@gmail.com", "user_123@my-website.io", "a@b.co", "no-reply@this-domain-does-not-exist.com", // invalid "plainaddress", "@missing-local-part.com", "user@.com", // start dot "user@domain-.com", // domain end '-' "user@domain.c", // TLD short "user@domain..com", // dot/dot " leading.space@domain.com", // leading space } for _, s := range ss { if emailRegex.MatchString(s) { fmt.Fprintf(os.Stderr, "valid: %s\n", s) continue } fmt.Fprintf(os.Stderr, "invalid: %s\n", s) } } func TestSafePassword(t *testing.T) { ss := []string{ "1", "hellow222", "jkac", } for _, s := range ss { fmt.Fprintf(os.Stderr, "%s\n", safePassword(s)) } } func TestListConfig(t *testing.T) { vals, err := listConfig(t.Context(), "/tmp/jack") if err != nil { return } for k, v := range vals { fmt.Fprintf(os.Stderr, "%s = %s\n", k, v) } checkRemote(vals) } func TestTruncateName(t *testing.T) { sss := []string{ "cmd/hot/pkg/size/render.go", "Understand that enabling this registry setting will only affect applications that have been", "", "ProjectContractChargingPeriodProjectAccountReferenceVMFactoryBuilderStrategyDevOptsClassV2.md", "HasThisTypePatternTriedToSneakInSomeGenericOrParameterizedTypePatternMatchingStuffAnywhereVisitor", "doc/org.aspectj/aspectjweaver/1.8.10/org/aspectj/weaver/patterns/HasThisTypePatternTriedToSneakInSomeGenericOrParameterizedTypePatternMatchingStuffAnywhereVisitor.html", "doc/org.aspectj/aspectjweaver/1.8.10/org/aspectj/weaver/patterns/HasThisTypePatternTriedToSneakInSomeGenericOrParameterizedTypePatternMatching/StuffAnywhereVisitor.html", } for _, s := range sss { fmt.Fprintf(os.Stderr, "%s\n", truncatePath(s, 80)) } } ================================================ FILE: cmd/hot/pkg/stat/table.go ================================================ package stat import ( "fmt" "os" "sort" "strconv" "strings" "charm.land/lipgloss/v2" "charm.land/lipgloss/v2/table" "github.com/antgroup/hugescm/cmd/hot/pkg/tr" "github.com/antgroup/hugescm/modules/strengthen" "github.com/clipperhouse/displaywidth" "golang.org/x/term" ) // drawInteractive renders the table statically (no interaction needed) func (s *summer) drawInteractive(title string) error { if len(s.files) == 0 { return nil } // Build and sort items items := make(Items, 0, len(s.files)) for n, i := range s.files { items = append(items, Item{Path: n, Total: i.sum, Count: i.count}) } sort.Sort(items) // Get terminal width termWidth := getTerminalWidth() // Calculate path column width dynamically // Formula: termWidth - (# col) - (count col) - (size col) - borders - padding // # col: ~6 chars, count col: ~12 chars, size col: ~14 chars, borders: 8, padding: 8 fixedWidth := 6 + 12 + 14 + 8 + 8 pathWidth := min(max(termWidth-fixedWidth, 20), 100) // Build rows (including total row) rows := make([][]string, 0, len(items)+1) for i, item := range items { displayPath := item.Path if !s.fullPath { displayPath = truncatePath(item.Path, pathWidth) } rows = append(rows, []string{ strconv.Itoa(i + 1), displayPath, strconv.Itoa(item.Count), strengthen.FormatSize(item.Total), }) } // Add total row (bold) totalRow := []string{ strings.ToUpper(tr.W("total")), "", strconv.Itoa(s.count), strengthen.FormatSize(s.total), } rows = append(rows, totalRow) // Color scheme optimized for file size statistics // Using warm, attention-grabbing colors while maintaining readability headerColor := lipgloss.Color("173") // Warm coral/salmon - stands out but not harsh totalColor := lipgloss.Color("215") // Warm gold/amber - indicates summary/importance borderColor := lipgloss.Color("243") // Medium gray - visible but not distracting // Create table with warm color scheme t := table.New(). Border(lipgloss.NormalBorder()). BorderStyle(lipgloss.NewStyle().Foreground(borderColor)). Headers("#", tr.W("Path"), tr.W("Modifications"), tr.W("Cumulative Size")). Rows(rows...). StyleFunc(func(row, col int) lipgloss.Style { switch { case row == table.HeaderRow: // Header: warm coral for clear structure return lipgloss.NewStyle(). Foreground(headerColor). Bold(true). Padding(0, 1) case row == len(items): // Total row: warm gold to highlight summary return lipgloss.NewStyle(). Foreground(totalColor). Bold(true). Padding(0, 1) default: // Regular rows: default terminal color return lipgloss.NewStyle(). Padding(0, 1) } }) // Print title with proper spacing if title != "" { titleStyle := lipgloss.NewStyle(). Bold(true). Foreground(lipgloss.Color("15")) fmt.Println() fmt.Println(titleStyle.Render(title)) fmt.Println() } // Print table fmt.Println(t) return nil } // getTerminalWidth returns the terminal width, with a sensible default func getTerminalWidth() int { // Try to get terminal width if width, _, err := term.GetSize(int(os.Stdout.Fd())); err == nil && width > 0 { return width } // Default to 80 if we can't detect return 80 } func truncatePath(path string, maxWidth int) string { if maxWidth <= 0 { return "" } if displaywidth.String(path) <= maxWidth { return path } if maxWidth == 1 { return "…" } target := maxWidth - 1 runes := []rune(path) width := 0 cut := len(runes) for i := len(runes) - 1; i >= 0; i-- { w := displaywidth.Rune(runes[i]) if width+w > target { break } width += w cut = i } return "…" + string(runes[cut:]) } ================================================ FILE: cmd/hot/pkg/tr/README.md ================================================ # translate ================================================ FILE: cmd/hot/pkg/tr/languages/zh-CN.toml ================================================ "hot - Git repositories maintenance tool" = "hot - Git 存储库维护工具" "Show context-sensitive help" = "显示上下文相关的帮助" "Make the operation more talkative" = "展示操作的更多细节" "Show version number and quit" = "展示版本信息并退出" "Enable debug mode; analyze timing" = "开启调试模式分析时间消耗" "Commands:" = "命令:" "Arguments:" = "参数:" "Flags:" = "标志:" "Usage: " = "用法:" " or: " = " 或:" "Aborting" = "正在终止" "error: " = "错误:" "fatal: " = "致命错误:" "hint: " = "提示:" "Run \"%s --help\" for more information." = "运行 \"%s --help\" 以获取更多信息。" "Run \"%s --help\" for more information on a command." = "运行 \"%s --help\" 以获取有关命令的更多信息。" "Show repositories size and large files" = "展示存储库体积和大文件" "Remove files in repository and rewrite history" = "删除存储库中的文件并重写历史" "Show full path" = "展示完整路径" "Scan references in a local repository" = "扫描本地存储库中的引用" "Sort by time from oldest to newest" = "按照时间从旧到新排序" "Interactive mode to clean repository large files" = "交互模式清理存储库大文件" "Interactive mode to clean repository large files (Grafting mode)" = "交互模式清理存储库大文件(嫁接模式)" "Prune repository when commits are rewritten" = "提交被重写后修剪存储库" "Grafting mode" = "嫁接模式" "Graft only the default branch" = "仅嫁接默认分支" "Remove all large blobs" = "删除所有大文件" "Path to repositories" = "存储库路径" "Large file limit size, supported units: KB, MB, GB, K, M, G" = "大文件限制大小,支持的单位:KB, MB, GB, K, M, G" "Whether large files exist in the default branch" = "大文件是否存在于默认分支" "Specify repository location" = "指定存储库位置" "Matching pattern, all references are displayed by default" = "匹配模式,默认展示所有引用" "Path to remove in repository, support wildcards" = "存储库中需要删除的路径,支持 Git 风格通配符" "Confirm rewriting local branches and tags" = "确认重写本地分支和标签(默认 false)" "Descending order by total size" = "按总大小降序排列" "All Branches and Tags" = "所有的分支和标签" "Default Branch" = "默认分支" "Show default branch large files:" = "展示默认分支大文件:" "Which files need to be deleted" = "哪些文件需要删除" "Batch" = "批次" "You can increase the file size limit, the number of large files:" = "你可以调高文件大小限制,大文件数量:" "The total number of files that will be deleted is:" = "将要删除的文件总数为:" Path = "路径" "Cumulative Size" = "累计大小" Repository = "存储库" Modifications = "修改次数" size = "大小" "Do you want to rewrite local branches and tags" = "是否重写本地分支和标签" "rewrite commits" = "重写提交" "rewrite references" = "重写引用" "processing completed" = "处理完成" "graft commits" = "嫁接提交" "total" = "总计" "Do you want to prune the repository right away" = "是否马上修剪存储库" "Repository not bare repository, continue to rewrite" = "此存储库不是裸存储库,是否继续执行" "Matched references: " = "匹配到的引用:" "Reference Name" = "引用名称" Hash = "哈希" Leading = "领先" Lagging = "落后" Date = "日期" "reference is broken" = "引用已损坏" "scan references" = "扫描引用" "Clean up expired references" = "清理过期引用" "Only clean up merged branches, ignoring expiration times" = "仅清理被合并的分支,忽略过期时间" "Clean up expired Tags, off by default" = "清理过期的标签,默认不清理" "Reference expiration time, support: m, h, d, w" = "引用过期时间,支持:m, h, d, w (分钟/小时/天/周)" "Migrate a repository to the specified object format" = "迁移存储库对象格式到指定对象格式" "migrate repository from %s to %s success, spent: %v\n" = "成功将存储库对象格式从 %s 迁移到 %s, 耗时: %v\n" "Specifying the object format, support only: sha1 or sha256" = "指定对象格式,仅支持:sha1 or sha256" "Original repository remote URL (or filesystem path)" = "原始存储库远程 URL(或文件系统路径)" "Destination where the repository is migrated" = "迁移完的存储库目的地" "Save as a bare git repository" = "保存为裸 Git 存储库" "fast rewrite objects" = "快速重写 objects" "Original repository remote URL" = "原始存储库远程地址" "Destination for the new repository" = "新存储库的目的地" "migrate repository to %s success, spent: %v\n" = "成功将存储库对象格式迁移到 %s, 耗时: %v\n" # co "EXPERIMENTAL: Clones a repository into a newly created directory" = "EXPERIMENTAL: 将存储库克隆到新创建的目录中" "A subset of repository files, all files are checked out by default" = "存储库文件的子集,默认检出所有文件" "Instead of pointing the newly created HEAD to the branch pointed to by the cloned repository’s HEAD, point to branch instead" = "不要将新创建的 HEAD 指向克隆存储库 HEAD 所指向的分支,而是指向 分支" "Instead of pointing the newly created HEAD to the branch pointed to by the cloned repository’s HEAD, point to commit instead" = "不要将新创建的 HEAD 指向克隆存储库 HEAD 所指向的分支,而是指向 提交" "Create a shallow clone with a history truncated to the specified number of commits" = "创建一个浅克隆,其历史记录被截断为指定的提交次数" "Cloning to '%s' completed, spent: %v.\n" = "克隆到:'%s' 完成,耗时:%v。\n" "After the clone is created, initialize and clone submodules within based on the provided pathspec" = "创建克隆后,根据提供的路径规范初始化并克隆其中的子模块" "Override default clone/fetch configuration, format: =" = "覆盖默认 clone/fetch 配置,格式:<名称>=<取值>" # unbranch "Linearize repository history" = "线性化存储库历史" "Linearize the specified revision history" = "线性化指定版本历史" "Save linearized branches to new target" = "保存线性化分支到新目标" "Keep the number of commits, 0 keeps all commits" = "保留 commit 数量,0 保留所有 commits" "unbranch unspecified branch mode is incompatible with --keep" = "unbranch 未指定分支模式与 --keep 不兼容" "Prune all unreachable objects from the object database" = "从对象数据库中删除所有无法访问的对象" # snapshot "Create a snapshot commit for the worktree" = "为工作区创建快照提交" "Create an orphan commit" = "创建一个孤儿提交" "Push the worktree snapshot commit to the remote" = "将工作区快照提交推送到远程" "ID of a parent commit object" = "父提交对象 ID" "Use the given message as the commit message. Concatenate multiple -m options as separate paragraphs" = "使用给定的消息作为提交说明。多个 -m 选项的值会作为独立段落合并" "Take the commit message from the given file. Use - to read the message from the standard input" = "从给定文件中获取提交消息。 使用 - 从标准输入读取消息" "Force updates" = "强制更新" "Aborting commit due to empty commit message." = "终止提交因为提交说明为空。" "new snapshot commit:" = "新的快照提交:" "Cleanup unnecessary files and optimize the local repository" = "清除不必要的文件和优化本地仓库" # az "Analyze repository large files" = "分析存储大文件" # prune-refs "Prune refs by prefix" = "清理指定前缀的引用" "Reference prefixes that need to be cleaned up" = "需要清理的引用前缀" "Cleanup references using default prefix" = "清理默认前缀的引用" "Remove more dirty references" = "删除更多的脏引用" "Dry run" = "演习" # cat "Provide contents or details of repository objects" = "提供存储库对象的内容或类型和大小信息" "The name of the object to show" = "要显示的对象的名称。" "Show object type" = "显示对象的类型" "Show object size" = "显示对象的大小" "Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" = "省略大于 n 字节或单位的 blob。n 可以为零。支持的单位:KB, MB, GB, K, M, G" "Returns data as JSON; limited to commits, trees, and tags" = "仅提交、树、标签数据以 JSON 格式返回" "Converting text to Unicode" = "将文本转为 Unicode" "Output to a specific file instead of stdout" = "输出到特定文件而不是 stdout" "Disable alternate screen buffer for pager" = "禁用 pager 的备用屏幕缓冲区" # diff "Show changes between commits, commit and working tree, etc" = "显示提交之间、提交与工作区等的变更" "Commit range or paths" = "提交范围或路径" "Show staged changes" = "显示暂存区的变更" "Same as --cached" = "同 --cached" "Output patches in JSON format" = "以 JSON 格式输出补丁" # show "Show the changes introduced by a commit" = "显示提交引入的变更" "Commit to show" = "要显示的提交" # stat "View repository status" = "查看存储库状态" "Git Version: %s\n" = "Git 版本:%s\n" "Location: %s\n" = "位置:%s\n" "error: '%s' is not configured correctly\n" = "错误:未正确配置 '%s'\n" "error: invalid email '%s' (from user.email)\n" = "错误:无效的邮件地址 '%s'(来源 user.email)\n" "check" = "检查" "warning: '%s' is set to '%s', which may affect Git LFS\n" = "警告:'%s' 已设为 '%s',可能影响 Git LFS\n" "Repository object format (sha format): %s ✅\n" = "存储库对象格式 (sha format):%s ✅\n" "Repository references backend (ref format): %s ✅\n" = "存储库引用后端 (ref format):%s ✅\n" "remote:" = "远程:" "sanitized" = "已消毒" "insecure remote: remote url contains the password '%s' ❌\n" = "不安全的远程:远程 URL 包含密码 '%s' ❌\n" "sparse checkout" = "稀疏检出" "partial checkout" = "部分检出" "enabled" = "已开启" "shallow clone started at: %s\n" = "浅表克隆起始于:%s\n" "On branch" = "位于分支" "HEAD detached at" = "头指针分离于" "Size: " = "大小:" "references (reftable) tables total: %s\n" = "引用 (reftable) tables 总计: %s\n" "loose references total: %s\n" = "松散引用总计:%s\n" "packed references size: %s\n" = "打包引用大小:%s\n" "loose objects total: %s\n" = "松散对象总计:%s\n" "packfiles count: %s\n" = "打包文件数量:%s\n" "objects size: %s\n" = "对象体积总计:%s\n" "recent size: %s\n" = "对象最近大小:%s\n" "stale size: %s\n" = "对象陈旧大小:%s\n" "keep size: %s\n" = "对象保留大小:%s\n" "downloaded lfs count: %s\n" = "已下载大文件数量:%s\n" "downloaded lfs size: %s\n" = "已下载大文件体积:%s\n" "repository disk size: " = "仓库磁盘占用:" # errors "hot snapshot --push require remote refname" = "hot snapshot --push 需要远程引用名称" "can only be run on non-bare repositories, error: %v" = "只能在非裸存储库上运行,错误:%v" "new git decoder error: %v" = "新建 git 解码器错误:%v" "open '%s' error: %v\n" = "打开 '%s' 错误:%v\n" "read messsage from stdin: %v" = "从标准输入读取消息错误:%v" "read messsage from %s: %v" = "从 %s 读取消息错误:%v" "git read-tree error: %v" = "git read-tree 错误:%v" "git add error: %v" = "git add 错误:%v" "git write-tree: %v" = "git write-tree 错误:%v" "git commit-tree error: %v" = "git commit-tree 错误:%v" "rev-parse HEAD: %v" = "解析 HEAD 错误:%v" "No references to be deleted\n" = "没有需要删除的引用\n" "* The following ref prefixes will be deleted:\n" = "* 以下引用前缀将被删除:\n" ================================================ FILE: cmd/hot/pkg/tr/tr.go ================================================ package tr import ( "embed" "fmt" "io" "path" "strings" "github.com/antgroup/hugescm/modules/locale" "github.com/pelletier/go-toml/v2" ) //go:embed languages var langFS embed.FS var ( langTable = make(map[string]any) ) func parseLocale() string { t, err := locale.Detect() if err != nil { return "en-US" } lang := t.String() switch { case strings.HasPrefix(lang, "zh-Hans"): return "zh-CN" // TODO FIXME } return lang } func DelayInitializeLocale() error { fd, err := langFS.Open(path.Join("languages", parseLocale()+".toml")) if err != nil { return err } defer fd.Close() // nolint if err := toml.NewDecoder(fd).Decode(&langTable); err != nil { return err } return nil } func DefaultLocaleName() string { return parseLocale() } func W(k string) string { if v, ok := langTable[k]; ok { if s, ok := v.(string); ok { return s } } return k } func Fprintf(w io.Writer, format string, a ...any) (n int, err error) { return fmt.Fprintf(w, W(format), a...) } ================================================ FILE: cmd/hot/pkg/tr/tr_test.go ================================================ package tr import ( "fmt" "os" "runtime" "testing" ) func TestFS(t *testing.T) { _ = DelayInitializeLocale() langTable["ok"] = "确定" fmt.Fprintf(os.Stderr, "load ok=%s\n", W("ok")) fmt.Fprintf(os.Stderr, "%s\n", W("Descending order by total size:")) _, _ = Fprintf(os.Stderr, "current os '%s'\n", runtime.GOOS) } func TestLANG(t *testing.T) { _ = os.Setenv("LC_ALL", "zh_CN.UTF8") _ = DelayInitializeLocale() fmt.Fprintf(os.Stderr, "load ok={%v}\n", W("ok")) _, _ = Fprintf(os.Stderr, "current os '%s'\n", runtime.GOOS) } ================================================ FILE: cmd/hot/winres.toml ================================================ # icon = "res/bali.ico" manifest = """data: HugeSCM true """ [FixedFileInfo] FileFlagsMask = "3f" FileFlags = "00" FileOS = "40004" FileType = "01" FileSubType = "00" [FixedFileInfo.FileVersion] Major = 0 Minor = 0 Patch = 0 Build = 0 [FixedFileInfo.ProductVersion] Major = 0 Minor = 0 Patch = 0 Build = 0 [StringFileInfo] Comments = "" CompanyName = "AntGroup Inc" FileDescription = "hot - Git repositories maintenance tool" FileVersion = "" InternalName = "hot.exe" LegalCopyright = "Copyright \u00A9 2026. AntGroup Inc" LegalTrademarks = "" OriginalFilename = "hot.exe" PrivateBuild = "" ProductName = "HugeSCM" ProductVersion = "" SpecialBuild = "" [VarFileInfo] [VarFileInfo.Translation] LangID = "0409" CharsetID = "04B0" ================================================ FILE: cmd/zeta/crate.toml ================================================ name = "zeta" description = "HugeSCM - A next generation cloud-based version control system" destination = "bin" version = "0.23.0" goflags = [ "-ldflags", "-X github.com/antgroup/hugescm/pkg/version.version=$BUILD_VERSION -X github.com/antgroup/hugescm/pkg/version.buildTime=$BUILD_TIME -X github.com/antgroup/hugescm/pkg/version.buildCommit=$BUILD_COMMIT", ] ================================================ FILE: cmd/zeta/main.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "errors" "os" "time" "github.com/antgroup/hugescm/modules/env" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/command" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/tr" "github.com/antgroup/hugescm/pkg/version" "github.com/antgroup/hugescm/pkg/zeta" ) type App struct { command.Globals Checkout command.Checkout `cmd:"checkout" aliases:"co" help:"Checkout remote, switch branches, or restore worktree files"` Switch command.Switch `cmd:"switch" help:"Switch branches"` Add command.Add `cmd:"add" help:"Add file contents to the index"` Status command.Status `cmd:"status" help:"Show the working tree status"` Restore command.Restore `cmd:"restore" help:"Restore working tree files"` Fetch command.Fetch `cmd:"fetch" help:"Download objects and reference from remote"` Commit command.Commit `cmd:"commit" help:"Record changes to the repository"` Push command.Push `cmd:"push" help:"Update remote refs along with associated objects"` Branch command.Branch `cmd:"branch" help:"List, create, or delete branches"` Tag command.Tag `cmd:"tag" help:"List, create, or delete tags"` Pull command.Pull `cmd:"pull" help:"Fetch from and integrate with remote"` Merge command.Merge `cmd:"merge" help:"Join two development histories together"` Rebase command.Rebase `cmd:"rebase" help:"Reapply commits on top of another base tip"` Config command.Config `cmd:"config" help:"Get and set repository or global options"` CatFile command.Cat `cmd:"cat-file" aliases:"cat" help:"Provide contents or details of repository objects"` Log command.Log `cmd:"log" help:"Show commit logs"` GC command.GC `cmd:"gc" help:"Cleanup unnecessary files and optimize the local repository"` Reset command.Reset `cmd:"reset" help:"Reset current HEAD to the specified state"` Diff command.Diff `cmd:"diff" help:"Show changes between commits, commit and working tree, etc"` Clean command.Clean `cmd:"clean" help:"Remove untracked files from the working tree"` LsTree command.LsTree `cmd:"ls-tree" help:"List the contents of a tree object"` MergeTree command.MergeTree `cmd:"merge-tree" help:"Perform merge without touching index or working tree"` RM command.Remove `cmd:"rm" help:"Remove files from the working tree and from the index"` Stash command.Stash `cmd:"stash" help:"Stash the changes in a dirty working directory away"` RevParse command.RevParse `cmd:"rev-parse" help:"Pick out and massage parameters"` ForEachRef command.ForEachRef `cmd:"for-each-ref" help:"Output information on each ref"` Remote command.Remote `cmd:"remote" help:"Manage of tracked repository"` CheckIgnore command.CheckIgnore `cmd:"check-ignore" help:"Debug zetaignore / exclude files"` Init command.Init `cmd:"init" help:"Create an empty zeta repository"` MergeBase command.MergeBase `cmd:"merge-base" help:"Find optimal common ancestors for merge"` LsFiles command.LsFiles `cmd:"ls-files" help:"Show information about files in the index and the working tree"` HashObject command.HashObject `cmd:"hash-object" help:"Compute hash or create object"` MergeFile command.MergeFile `cmd:"merge-file" help:"Run a three-way file merge"` Show command.Show `cmd:"show" help:"Show various types of objects"` Version command.Version `cmd:"version" help:"Display version information"` CherryPick command.CherryPick `cmd:"cherry-pick" help:"EXPERIMENTAL: Apply the changes introduced by some existing commit"` Revert command.Revert `cmd:"revert" help:"EXPERIMENTAL: Revert commit"` Rename command.Rename `cmd:"rename" help:"EXPERIMENTAL: Rename a file"` Debug bool `name:"debug" help:"Enable debug mode; analyze timing"` } func main() { _ = env.DelayInitializeEnv() // initialize locale _ = tr.Initialize() kong.BindW(tr.W) // replace W var app App ctx := kong.Parse(&app, kong.NamedMapper("size", command.SizeDecoder()), kong.NamedMapper("expire", command.ExpireDecoder()), kong.Name("zeta"), kong.Description(tr.W("HugeSCM - A next generation cloud-based version control system")), kong.UsageOnError(), kong.ConfigureHelp(kong.HelpOptions{ Compact: true, NoExpandSubcommands: true, }), kong.Vars{ "version": version.GetVersionString(), }, ) now := time.Now() m := strengthen.NewMeasurer("zeta", app.Debug) if app.Verbose { trace.EnableDebugMode() } err := ctx.Run(&app.Globals) m.Close() if app.Verbose { trace.DbgPrint("time spent: %v", time.Since(now)) } if err == nil { return } if e, ok := errors.AsType[*zeta.ErrExitCode](err); ok { os.Exit(e.ExitCode) } os.Exit(127) } ================================================ FILE: cmd/zeta/winres.toml ================================================ # icon = "res/bali.ico" manifest = """data: HugeSCM true """ [FixedFileInfo] FileFlagsMask = "3f" FileFlags = "00" FileOS = "40004" FileType = "01" FileSubType = "00" [FixedFileInfo.FileVersion] Major = 0 Minor = 0 Patch = 0 Build = 0 [FixedFileInfo.ProductVersion] Major = 0 Minor = 0 Patch = 0 Build = 0 [StringFileInfo] Comments = "" CompanyName = "AntGroup Inc" FileDescription = "HugeSCM - A next generation cloud-based version control system" FileVersion = "" InternalName = "zeta.exe" LegalCopyright = "Copyright \u00A9 2026. HugeSCM contributors" LegalTrademarks = "" OriginalFilename = "zeta.exe" PrivateBuild = "" ProductName = "HugeSCM" ProductVersion = "" SpecialBuild = "" [VarFileInfo] [VarFileInfo.Translation] LangID = "0409" CharsetID = "04B0" ================================================ FILE: cmd/zeta-mc/crate.toml ================================================ name = "zeta-mc" description = "zeta-mc - Migrate Git repository to zeta" destination = "bin" version = "0.23.0" goflags = [ "-ldflags", "-X github.com/antgroup/hugescm/pkg/version.version=$BUILD_VERSION -X github.com/antgroup/hugescm/pkg/version.buildTime=$BUILD_TIME -X github.com/antgroup/hugescm/pkg/version.buildCommit=$BUILD_COMMIT", ] ================================================ FILE: cmd/zeta-mc/main.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "os" "github.com/antgroup/hugescm/modules/env" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/tr" "github.com/antgroup/hugescm/pkg/version" ) func main() { // delay initialize git env _ = env.DelayInitializeEnv() // initialize locale _ = tr.Initialize() kong.BindW(tr.W) // replace W var app App ctx := kong.Parse(&app, kong.Name("zeta-mc"), kong.Description(tr.W("zeta-mc - Migrate Git repository to zeta")), kong.UsageOnError(), kong.ConfigureHelp(kong.HelpOptions{ Compact: true, }), kong.Vars{ "version": version.GetVersionString(), }, ) if app.Verbose { trace.EnableDebugMode() } m := strengthen.NewMeasurer("zeta-mc", app.Debug) defer m.Close() err := ctx.Run(&app.Globals) if err != nil { os.Exit(1) } } ================================================ FILE: cmd/zeta-mc/migrate.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "errors" "bytes" "context" "fmt" "os" "path" "path/filepath" "strings" "time" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/pkg/migrate" "github.com/antgroup/hugescm/pkg/tr" ) type App struct { Globals From string `arg:"" name:"from" help:"Original repository remote URL (or filesystem path)" type:"string"` Destination string `arg:"" optional:"" name:"destination" help:"Destination where the repository is migrated" type:"path"` Values []string `short:"X" shortonly:"" help:"Override default configuration, format: ="` Squeeze bool `name:"squeeze" short:"s" help:"Squeeze mode, compressed metadata"` LFS bool `name:"lfs" help:"Migrate all LFS objects to zeta"` Quiet bool `name:"quiet" help:"Operate quietly. Progress is not reported to the standard error stream"` Debug bool `name:"debug" help:"Enable debug mode; analyze timing"` } func die_error(format string, a ...any) { var b bytes.Buffer _, _ = b.WriteString(tr.W("error: ")) fmt.Fprintf(&b, tr.W(format), a...) _ = b.WriteByte('\n') _, _ = os.Stderr.Write(b.Bytes()) } func (c *App) concatDestination(baseName string) (string, error) { destination := c.Destination if len(destination) == 0 { destination = strings.TrimSuffix(baseName, ".git") } if !filepath.IsAbs(destination) { cwd, err := os.Getwd() if err != nil { fmt.Fprintf(os.Stderr, "Get current workdir error: %v\n", err) return "", err } destination = filepath.Join(cwd, destination) } dirs, err := os.ReadDir(destination) if err != nil { if os.IsNotExist(err) { return destination, nil } fmt.Fprintf(os.Stderr, "readdir %s error: %v\n", destination, err) return "", err } if len(dirs) != 0 { die_error("destination path '%s' already exists and is not an empty directory.", filepath.Base(destination)) return "", ErrWorktreeNotEmpty } return destination, nil } func (c *App) cloneAndMigrate(g *Globals, uri string) error { destination, err := c.concatDestination(path.Base(uri)) if err != nil { return err } tempDir, err := os.MkdirTemp(os.TempDir(), "clone") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return err } defer os.RemoveAll(tempDir) // nolint if err := g.RunEx(command.NoDir, "git", "clone", "--bare", c.From, tempDir); err != nil { fmt.Fprintf(os.Stderr, "clone error: %v", err) return err } return c.migrateFrom(g, tempDir, destination) } func (c *App) Run(g *Globals) error { uri, err := pickURI(c.From) if err == nil { return c.cloneAndMigrate(g, uri) } if !errors.Is(err, ErrLocalEndpoint) { fmt.Fprintf(os.Stderr, "bad remote '%s' %v\n", c.From, err) return err } absFrom, err := filepath.Abs(c.From) if err != nil { fmt.Fprintf(os.Stderr, "bad remote '%s' %v\n", c.From, err) return err } if _, err = os.Stat(c.From); err != nil { fmt.Fprintf(os.Stderr, "bad remote '%s' %v\n", c.From, err) return err } destination, err := c.concatDestination(filepath.Base(c.From) + "-zeta") if err != nil { return err } return c.migrateFrom(g, absFrom, destination) } func (c *App) migrateFrom(g *Globals, from, to string) error { if c.LFS { fmt.Fprintf(os.Stderr, "Fetch all lfs objects ...\n") if err := g.RunEx(from, "git", "lfs", "fetch", "--all"); err != nil { fmt.Fprintf(os.Stderr, "git lfs fetch error: %v", err) } } now := time.Now() r, err := migrate.NewMigrator(context.Background(), &migrate.MigrateOptions{ Environ: os.Environ(), From: from, To: to, Squeeze: c.Squeeze, LFS: c.LFS, StepEnd: 4, Values: c.Values, Quiet: c.Quiet, Verbose: g.Verbose, }) if err != nil { fmt.Fprintf(os.Stderr, "NewRewriter error: %v\n", err) return err } defer r.Close() // nolint if err := r.Execute(context.Background()); err != nil { fmt.Fprintf(os.Stderr, "Execute error: %v\n", err) return err } _, _ = tr.Fprintf(os.Stderr, "Migrate '%s' from git to zeta success, spent: %v\n", c.From, time.Since(now)) return nil } ================================================ FILE: cmd/zeta-mc/msic.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "context" "errors" "fmt" "net/url" "os" "time" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/version" ) type Globals struct { Verbose bool `short:"V" name:"verbose" help:"Make the operation more talkative"` Version VersionFlag `short:"v" name:"version" help:"Show version number and quit"` } type VersionFlag bool func (v VersionFlag) Decode(ctx *kong.DecodeContext) error { return nil } func (v VersionFlag) IsBool() bool { return true } func (v VersionFlag) BeforeApply(app *kong.Kong, vars kong.Vars) error { fmt.Println(version.GetVersionString()) app.Exit(0) return nil } var ( ErrLocalEndpoint = errors.New("local endpoint") ErrWorktreeNotEmpty = errors.New("worktree not empty") ) func pickURI(rawURL string) (string, error) { if git.MatchesScpLike(rawURL) { _, _, _, p := git.FindScpLikeComponents(rawURL) return p, nil } if git.MatchesScheme(rawURL) { u, err := url.Parse(rawURL) if err != nil { return "", err } return u.Path, nil } return "", ErrLocalEndpoint } func (g *Globals) RunEx(repoPath string, cmdArg0 string, args ...string) error { now := time.Now() cmd := command.NewFromOptions(context.Background(), &command.RunOpts{ RepoPath: repoPath, Environ: os.Environ(), Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, NoSetpgid: true, }, cmdArg0, args...) if err := cmd.Run(); err != nil { return err } trace.DbgPrint("exec: %s spent: %v", cmd.String(), time.Since(now)) return nil } ================================================ FILE: cmd/zeta-mc/winres.toml ================================================ # icon = "res/bali.ico" manifest = """data: HugeSCM Migrate true """ [FixedFileInfo] FileFlagsMask = "3f" FileFlags = "00" FileOS = "40004" FileType = "01" FileSubType = "00" [FixedFileInfo.FileVersion] Major = 0 Minor = 0 Patch = 0 Build = 0 [FixedFileInfo.ProductVersion] Major = 0 Minor = 0 Patch = 0 Build = 0 [StringFileInfo] Comments = "" CompanyName = "AntGroup Inc" FileDescription = "HugeSCM - A next generation cloud-based version control system" FileVersion = "" InternalName = "zeta-mc.exe" LegalCopyright = "Copyright \u00A9 2026. HugeSCM contributors" LegalTrademarks = "" OriginalFilename = "zeta-mc.exe" PrivateBuild = "" ProductName = "HugeSCM" ProductVersion = "" SpecialBuild = "" [VarFileInfo] [VarFileInfo.Translation] LangID = "0409" CharsetID = "04B0" ================================================ FILE: cmd/zeta-serve/command_encrypt.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "fmt" "io" "os" "path/filepath" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/pkg/serve" "github.com/pelletier/go-toml/v2" ) type pseudoConfig struct { X25519Key string `toml:"x25519_key,omitempty"` } func (pc *pseudoConfig) Decode(cfg string, expandEnv bool) error { r, err := serve.NewExpandReader(cfg, expandEnv) if err != nil { return err } defer r.Close() // nolint if err := toml.NewDecoder(r).Decode(pc); err != nil { return err } return nil } type Encrypt struct { Source string `arg:"" name:"source" help:"source text to be encrypted"` FromEnv bool `short:"s" name:"from-env" help:"read source text from environment variable"` FromFile bool `short:"p" name:"from-file" help:"read source text from a file"` Config string `short:"c" name:"config" optional:"" help:"Location of server config file" type:"path"` Destination string `short:"d" name:"destination" optional:"" help:"save variable to specified file"` } func (c *Encrypt) Run(globals *Globals) error { var pc pseudoConfig if err := pc.Decode(c.Config, globals.ExpandEnv); err != nil { fmt.Fprintf(os.Stderr, "load config error: %v\n", err) return err } source, err := func() (string, error) { if c.FromFile { fd, err := os.Open(c.Source) if err != nil { return "", err } defer fd.Close() // nolint si, err := fd.Stat() if err != nil { return "", err } if sz := si.Size(); sz > serve.MiByte { return "", fmt.Errorf("file size too large: %s", strengthen.FormatSize(sz)) } b, err := io.ReadAll(fd) if err != nil { return "", err } return string(b), nil } if c.FromEnv { return os.Getenv(c.Source), nil } return c.Source, nil }() if err != nil { fmt.Fprintf(os.Stderr, "read from file error: %v\n", err) return err } secret, err := serve.Encrypt(pc.X25519Key, source) if err != nil { fmt.Fprintf(os.Stderr, "encrypt error: %v\n", err) return err } if len(c.Destination) == 0 { _, _ = fmt.Fprintln(os.Stdout, secret) return nil } if err := os.MkdirAll(filepath.Dir(c.Destination), 0755); err != nil { fmt.Fprintf(os.Stderr, "write secret error: %v\n", err) return err } fd, err := os.Create(c.Destination) if err != nil { fmt.Fprintf(os.Stderr, "create secret file error: %v\n", err) return err } defer fd.Close() // nolint if _, err := fd.WriteString(secret); err != nil { fmt.Fprintf(os.Stderr, "write secret to file error: %v\n", err) return err } return nil } ================================================ FILE: cmd/zeta-serve/command_httpd.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "errors" "context" "net/http" "github.com/antgroup/hugescm/pkg/serve/httpserver" "github.com/sirupsen/logrus" ) type HTTPD struct { Config string `short:"c" name:"config" help:"Location of server config file" default:"~/config/zeta-serve-httpd.toml" type:"path"` } func (c *HTTPD) Run(globals *Globals) error { sc, err := httpserver.NewServerConfig(c.Config, globals.ExpandEnv) if err != nil { logrus.Errorf("zeta-seve httpd load server config error: %v", err) return err } srv, err := httpserver.NewServer(sc) if err != nil { logrus.Errorf("zeta-seve httpd new httpd server error: %v", err) return err } closer := newCloser() go closer.listenSignal(context.Background(), srv) if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { logrus.Errorf("zeta-seve httpd listen server error: %v", err) return err } <-closer.ch logrus.Infof("zeta-seve httpd exited") return nil } ================================================ FILE: cmd/zeta-serve/command_keygen.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "crypto/ecdh" "crypto/ecdsa" "crypto/ed25519" "crypto/elliptic" "crypto/rand" "crypto/rsa" "crypto/x509" "encoding/pem" "errors" "fmt" "os" "strings" "golang.org/x/crypto/ssh" ) type Keygen struct { Type string `name:"type" short:"t" help:"Generate private key type" default:"RSA"` BitSize int `name:"bitSize" help:"Generates a random RSA private key of the given bit size" default:"2048"` } func (c *Keygen) genRAS() error { if c.BitSize < 1024 { c.BitSize = 2048 } // Generate RSA key. key, err := rsa.GenerateKey(rand.Reader, c.BitSize) if err != nil { fmt.Fprintf(os.Stderr, "GenKey error: %v\n", err) return err } // Encode private key to PKCS#1 ASN.1 PEM. keyPEM := pem.EncodeToMemory( &pem.Block{ Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key), }, ) _, _ = fmt.Fprint(os.Stdout, string(keyPEM)) return nil } func (c *Keygen) genED25519() error { _, privateKey, err := ed25519.GenerateKey(rand.Reader) if err != nil { fmt.Fprintf(os.Stderr, "GenKey error: %v\n", err) return err } block, err := ssh.MarshalPrivateKey(privateKey, "") if err != nil { fmt.Fprintf(os.Stderr, "GenKey error: %v\n", err) return err } _, _ = fmt.Fprint(os.Stdout, string(pem.EncodeToMemory(block))) return nil } func (c *Keygen) genECDSA() error { privateKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) if err != nil { fmt.Fprintf(os.Stderr, "GenKey error: %v\n", err) return err } block, err := ssh.MarshalPrivateKey(privateKey, "") if err != nil { fmt.Fprintf(os.Stderr, "GenKey error: %v\n", err) return err } _, _ = fmt.Fprint(os.Stdout, string(pem.EncodeToMemory(block))) return nil } func (c *Keygen) genX25519() error { privateKey, err := ecdh.X25519().GenerateKey(rand.Reader) if err != nil { return fmt.Errorf("GenKey error: %w", err) } privateKeyBytes, err := x509.MarshalPKCS8PrivateKey(privateKey) if err != nil { return fmt.Errorf("GenKeyError: %w", err) } _, _ = fmt.Fprint(os.Stdout, string(pem.EncodeToMemory(&pem.Block{ Type: "PRIVATE KEY", Bytes: privateKeyBytes, }))) return nil } func (c *Keygen) Run(g *Globals) error { switch strings.ToUpper(c.Type) { case "RSA": return c.genRAS() case "ED25519": return c.genED25519() case "ECDSA": return c.genECDSA() case "X25519": return c.genX25519() default: fmt.Fprintf(os.Stderr, "unsupported key type: %v\n", c.Type) return errors.New("unsupported key type") } } ================================================ FILE: cmd/zeta-serve/command_sshd.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "errors" "context" "net/http" "github.com/antgroup/hugescm/pkg/serve/sshserver" "github.com/sirupsen/logrus" ) type SSHD struct { Config string `short:"c" name:"config" help:"Location of server config file" default:"~/config/zeta-serve-sshd.toml" type:"path"` } func (c *SSHD) Run(globals *Globals) error { sc, err := sshserver.NewServerConfig(c.Config, globals.ExpandEnv) if err != nil { logrus.Errorf("zeta-seve sshd load server config error: %v", err) return err } srv, err := sshserver.NewServer(sc) if err != nil { logrus.Errorf("zeta-seve sshd new sshd server error: %v", err) return err } closer := newCloser() go closer.listenSignal(context.Background(), srv) if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { logrus.Errorf("zeta-seve sshd listen server error: %v", err) return err } <-closer.ch logrus.Infof("zeta-seve sshd exited") return nil } ================================================ FILE: cmd/zeta-serve/global.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "fmt" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/version" ) type Globals struct { Verbose bool `short:"V" name:"verbose" help:"Make the operation more talkative"` ExpandEnv bool `short:"E" name:"expand-env" help:"Replaces $${var} or $$var in the config file according to the values of the current environment variables."` Version VersionFlag `short:"v" name:"version" help:"Show version number and quit"` } type VersionFlag bool func (v VersionFlag) Decode(ctx *kong.DecodeContext) error { return nil } func (v VersionFlag) IsBool() bool { return true } func (v VersionFlag) BeforeApply(app *kong.Kong, vars kong.Vars) error { fmt.Println(version.GetVersionString()) app.Exit(0) return nil } ================================================ FILE: cmd/zeta-serve/main.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "os" "time" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/version" ) type App struct { Globals HTTPD HTTPD `cmd:"httpd" help:"start zeta-serve httpd server"` SSHD SSHD `cmd:"sshd" help:"start zeta-serve sshd server"` Keygen Keygen `cmd:"keygen" help:"Generates a random private key"` Encrypt Encrypt `cmd:"encrypt" help:"Encrypting Data Using RSA Key"` } func main() { var app App ctx := kong.Parse(&app, kong.Name("zeta-serve"), kong.Description("HugeSCM - A next generation cloud-based version control system"), kong.UsageOnError(), kong.ConfigureHelp(kong.HelpOptions{ Compact: true, }), kong.Vars{ "version": version.GetVersionString(), }, ) now := time.Now() if app.Verbose { trace.EnableDebugMode() } err := ctx.Run(&app.Globals) if app.Verbose { trace.DbgPrint("time spent: %v", time.Since(now)) } if err != nil { //fmt.Fprintf(os.Stderr, "%v\n", err) os.Exit(1) } } ================================================ FILE: cmd/zeta-serve/shutdown.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package main import ( "context" ) type Shutdowner interface { Shutdown(ctx context.Context) error } type closer struct { ch chan bool } func newCloser() *closer { return &closer{ch: make(chan bool, 1)} } ================================================ FILE: cmd/zeta-serve/shutdown_other.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 //go:build darwin || linux || freebsd || netbsd || openbsd || dragonfly package main import ( "context" "os" "os/signal" "syscall" "time" "github.com/sirupsen/logrus" ) func (c *closer) listenSignal(ctx context.Context, srv Shutdowner) { quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGUSR2) signal := <-quit logrus.Infof("zeta-serve receive signal: %v, exiting ...", signal) newCtx, cancelCtx := context.WithTimeout(ctx, time.Minute*6) defer cancelCtx() _ = srv.Shutdown(newCtx) c.ch <- true } ================================================ FILE: cmd/zeta-serve/shutdown_windows.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 //go:build windows package main import ( "context" "os" "os/signal" "syscall" "time" "github.com/sirupsen/logrus" ) func (c *closer) listenSignal(ctx context.Context, srv Shutdowner) { quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) signal := <-quit logrus.Infof("zeta-serve receive signal: %v, exiting ...", signal) newCtx, cancelCtx := context.WithTimeout(ctx, time.Minute*6) defer cancelCtx() _ = srv.Shutdown(newCtx) c.ch <- true } ================================================ FILE: docs/README.md ================================================ # Zeta 文档中心 欢迎来到 Zeta 文档中心!Zeta 是面向 AI 场景的巨型存储库版本控制系统。 --- ## 文档索引 ### 设计与架构 | 文档 | 描述 | |------|------| | [design.md](design.md) | HugeSCM 设计哲学 - 核心设计理念、架构概述、与 Git 的差异 | | [object-format.md](object-format.md) | 对象格式详解 - Blob、Tree、Commit、Fragments 等对象的二进制格式 | | [pack-format.md](pack-format.md) | Pack 文件格式 - 对象打包机制和索引格式 | | [protocol.md](protocol.md) | 传输协议规范 - HTTP/SSH 协议、授权、元数据和文件传输 | | [version-negotiation.md](version-negotiation.md) | 版本协商机制 - 基线管理、检出、拉取、推送流程 | ### 配置参考 | 文档 | 描述 | |------|------| | [config.md](config.md) | 配置文件说明 - 支持的配置项和环境变量 | ### 功能使用 | 文档 | 描述 | |------|------| | [switch.md](switch.md) | 分支切换 - switch 命令详解,切换分支和提交 | | [stash.md](stash.md) | 暂存功能 - stash 命令详解,临时保存工作进度 | | [sparse-checkout.md](sparse-checkout.md) | 稀疏检出 - 按需检出指定目录 | | [pull-strategy.md](pull-strategy.md) | 拉取策略 - merge、rebase、fast-forward 策略详解 | ### 高级特性 | 文档 | 描述 | |------|------| | [cdc.md](cdc.md) | CDC 分片 - Content-Defined Chunking 实现原理和配置 | | [hot.md](hot.md) | hot 命令 - Git 存储库维护工具,清理大文件、删除敏感数据、迁移对象格式 | --- ## 快速开始 ### 1. 安装 安装最新版本的 Golang 后,使用以下命令构建: ```sh # 使用 bali 构建 bali -T linux -A amd64 # 或使用 make make build ``` ### 2. 配置 ```shell # 设置用户信息 zeta config --global user.email 'your@email.com' zeta config --global user.name 'Your Name' # 开启 OSS 直连下载(推荐) zeta config --global core.accelerator direct ``` ### 3. 检出存储库 ```shell # 检出存储库 zeta checkout http://zeta.example.io/group/repo my-repo # 稀疏检出指定目录 zeta checkout http://zeta.example.io/group/repo my-repo -s dir1 # 逐一检出模式(节省磁盘空间) zeta checkout http://zeta.example.io/group/repo my-repo --one ``` ### 4. 基本工作流 ```shell # 查看状态 zeta status # 添加修改 zeta add # 提交 zeta commit -m "提交信息" # 推送 zeta push # 拉取更新 zeta pull ``` --- ## 核心概念 ### 数据分离架构 HugeSCM 采用**数据分离原则**: ``` +------------------+ +------------------+ | 元数据数据库 | | 对象存储/OSS | | (分布式数据库) | | (分布式文件系统) | +------------------+ +------------------+ ↑ ↑ │ │ commit/tree blob 数据 fragments/tag (压缩存储) ``` ### Fragments 对象 针对巨型文件,HugeSCM 引入 **Fragments** 对象: - 将大文件分割为多个 Blob 存储 - 支持 CDC(Content-Defined Chunking)智能分片 - 增量传输,减少带宽消耗 ### CDC 分片优势 | 场景 | 传统固定分片 | CDC 分片 | |------|------------|---------| | 局部修改 | 所有后续分片改变 | 仅 1-2 个分片改变 | | 增量同步 | 传输完整文件 | 仅传输变化分片 | | 去重效果 | 低 | 高 | 启用 CDC: ```toml [fragment] threshold = "1GB" # 文件大小阈值 size = "4GB" # 目标分片大小 enable_cdc = true # 启用 CDC 分片 ``` --- ## 适用场景 ### AI 大模型研发 - 存储 checkpoint 文件(数十 GB 到数百 GB) - 模型版本管理和增量更新 - 多团队协作 ### 游戏研发 - 大型二进制资源管理 - 美术资产版本控制 ### 数据集存储 - 大规模数据集版本管理 - 数据标注协作 --- ## 与 Git 的主要差异 | 特性 | Git | HugeSCM | |-----|-----|---------| | 架构模式 | 分布式 | 集中式 | | 克隆方式 | 全量克隆 | 按需检出 | | 哈希算法 | SHA-1/SHA-256 | BLAKE3 | | 大文件支持 | Git LFS | 内置 Fragments | | 数据存储 | 本地文件系统 | DB + OSS | ### 命令对照 | Git 命令 | HugeSCM 命令 | 说明 | |---------|-------------|------| | `git clone` | `zeta checkout` | 检出存储库,非全量克隆 | | `git fetch` | `zeta pull --fetch` | 仅获取数据 | | `git pull` | `zeta pull` | 拉取并合并 | | `git switch` | `zeta switch` | 切换分支 | --- ## 获取帮助 - **命令帮助**:`zeta -h` - **问题反馈**:提交 Issue 到内部代码仓库 - **技术支持**:联系 Zeta 团队 --- ## 文档更新 - 2026-03-18: 补全设计哲学、拉取策略、分支切换、暂存功能文档 - 2026-03-17: 添加 CDC 分片功能文档 - 2025-08-20: 初始文档创建 ================================================ FILE: docs/cdc.md ================================================ # CDC (Content-Defined Chunking) 实现文档 ## 一、核心原理 ### 传统固定分片的问题 传统 VCS 使用**固定大小分片**,存在严重缺陷: ``` 文件版本 1: [AAAAA][BBBBB][CCCCC][DDDDD] ↑ 插入一个字节 文件版本 2: [AXAAAA][BBBBB][CCCCC] ← 所有分片边界偏移! ``` **结果**: 仅仅插入 1 字节,导致所有后续分片都改变,去重率接近 0%。 ### CDC 的解决方案 CDC 通过**内容决定边界**,而不是固定偏移: ``` 文件版本 1: [AAAAA][BBBBB][CCCCC][DDDDD] ↑ 插入一个字节 文件版本 2: [AX][AAAAA][BBBBB][CCCCC][DDDDD] ↑ 只有这一个分片改变 ``` **结果**: 局部修改只影响附近的 1-2 个分片,其他分片保持不变。 --- ## 二、FastCDC 算法实现 ### 核心算法 我们使用 **FastCDC** 算法,这是工业级的 CDC 实现: ```go // FastCDC 滚动哈希 (Gear Hash) hash = (hash << 1) + gearTable[byte] ``` ### 归一化切割策略 FastCDC 的核心创新:根据当前分片大小动态调整切割概率 ``` 阶段 1 (0 ~ minSize): 不切割 阶段 2 (minSize ~ normalSize): 使用 maskS (最高切割概率) 阶段 3 (normalSize ~ normalSize+window): 使用 maskN (标准切割概率) 阶段 4 (normalSize+window ~ maxSize): 使用 maskL (最低切割概率) 阶段 5 (maxSize+): 强制切割 ``` **三 mask 策略**: - `maskS = 2^(bits-2) - 1`: 最高切割概率 (快速跳过小分片) - `maskN = 2^bits - 1`: 标准切割概率 - `maskL = 2^(bits+1) - 1`: 最低切割概率 (允许更大分片) ### 参数配置 **默认参数 (针对 AI 模型优化)**: ```go targetSize = 4MB // 目标分片大小 minSize = 1MB // 最小分片 (target / 4) maxSize = 32MB // 最大分片 (target * 8) ``` **为什么选择 4MB?** AI 模型文件的特点: - 典型张量大小: 几 MB 到几百 MB - Fine-tuning 更新: 通常是整个张量或较大区域 - Checkpoint 文件: 10GB - 100GB **4MB 分片的优势**: | 指标 | 1MB 分片 | 4MB 分片 | 改进 | |------|---------|---------|------| | 10GB 模型 | ~10000 fragments | ~2500 fragments | 减少 75% 元数据 | | 去重效果 | 优秀 | 优秀 (相近) | 保持高去重率 | | CPU 开销 | 高 | 低 | 减少 hash 计算次数 | | 传输协商 | 慢 | 快 | metadata 传输更快 | --- ## 三、流式处理实现 ### Rolling Buffer 架构 CDC 需要检测边界后才能处理分片,无法实现真正的纯 streaming: ``` 1. 读取字节计算 rolling hash 2. 检测到边界 3. 然后才能哈希分片数据 ``` 问题:流一旦读过去就无法"回退"。 **解决方案**:使用滚动缓冲区 (Rolling Buffer) ```go // 缓冲区大小 = maxChunkSize (通常 32MB) chunkBuf := make([]byte, 0, c.maxSize) // 检测到边界后 onChunk(offset, size int64, chunkReader io.Reader) ``` **内存占用**: O(maxChunkSize) - 典型值: 32MB (maxSize = target * 8) - 与文件大小无关,只与分片大小有关 **这是工业标准做法**: - restic: 使用滚动缓冲区 - borg: 使用滚动缓冲区 - rsync: 使用滚动缓冲区 ### Pipeline 设计 **单遍扫描,零临时文件**: ```go func (r *Repository) hashToWithCDC(ctx context.Context, reader io.Reader, size int64) (oid plumbing.Hash, fragments bool, err error) { // 1. 计算完整文件哈希 h := plumbing.NewHasher() teeReader := io.TeeReader(reader, h) // 2. 创建 CDC 分片器 cdcChunker := NewCDCChunker(r.Fragment.Size()) // 3. 单遍流式分片 + 哈希计算 err = cdcChunker.ChunkStreaming(teeReader, size, func(offset, chunkSize int64, chunkReader io.Reader) error { chunkHash, _ := r.odb.HashTo(ctx, chunkReader, chunkSize) ff.Entries = append(ff.Entries, &object.Fragment{ Index: chunkIndex, Hash: chunkHash, Size: uint64(chunkSize), }) return nil }) // 4. 保存 Fragments 对象 ff.Origin = h.Sum() oid, _ = r.odb.WriteEncoded(ff) return } ``` **优点**: - 单 pass - full hash + chunk hash 同时算 - 无临时文件 --- ## 四、配置使用 ### 启用 CDC 在 `.zeta/config` 文件中添加: ```toml [fragment] enable_cdc = true # 启用 CDC 分片 (Boolean 类型,支持配置 merge) ``` **配置说明**: - `enable_cdc` 是 `Boolean` 类型,支持 `true/false` 值 - 支持配置层级 merge (Local > Global > System) - 默认值: `false` (使用固定大小分片) ### 配置层级 Zeta 的配置系统有三个层级 (优先级从低到高): 1. **System config** (`/etc/zeta/config`) - 系统级配置 2. **Global config** (`~/.zeta/config`) - 用户全局配置 3. **Local config** (`.zeta/config`) - 仓库本地配置 **(最高优先级)** **Merge 语义**: 高优先级配置覆盖低优先级配置 ```go // Boolean.Merge() 实现 func (b *Boolean) Merge(other *Boolean) { // If other has a definite value, it should override b (higher priority) if other.val != BOOLEAN_UNSET { b.val = other.val } } ``` --- ## 五、实现文件 | 文件 | 说明 | |------|------| | `pkg/zeta/cdc.go` | FastCDC 分片器核心实现 | | `pkg/zeta/safetensors.go` | SafeTensors 格式解析器 (未来优化) | | `pkg/zeta/objects.go` | `hashToWithCDC` 主入口函数 | | `modules/zeta/config/config.go` | CDC 配置项定义 | | `modules/zeta/config/type.go` | Boolean 类型实现 | --- ## 六、常见问题 ### Q1: CDC 会影响读取性能吗? **A**: 不会。读取时只根据 `Fragments.Entries` 中的偏移和大小读取,分片策略对读取透明。 ### Q2: 已有仓库可以使用 CDC 吗? **A**: 可以! CDC 只影响**新上传的文件**。已有文件保持原有分片方式,两种方式可以共存。 ### Q3: CDC 分片大小不固定,如何优化存储? **A**: CDC 分片大小在 `[minSize, maxSize]` 范围内波动,平均大小接近 `targetSize`。实际测试表明存储开销与固定分片相当。 ### Q4: 为什么不能实现真正的 O(1) 空间复杂度? **A**: CDC 的本质决定了它需要缓冲: - CDC 需要读取字节 → 计算 hash → 检测边界 - 检测到边界后,才能哈希分片 - 但流已经读过去了,无法"回退" **工业标准**: restic, borg, rsync 都使用 rolling buffer --- ## 七、技术参考 1. **FastCDC 算法**: Xia, W., et al. "FastCDC: A Fast and Efficient Content-Defined Chunking Approach for Data Deduplication." USENIX ATC 2016 2. **Gear Hash**: 比传统 Rabin Fingerprint 快 2-3 倍 3. **CDC 原理**: "Content-Defined Chunking" (joshleeb.com) 4. **SafeTensors 格式**: https://huggingface.co/docs/safetensors 5. **工业实现参考**: restic, borg, rsync --- **文档版本**: v2.0 **最后更新**: 2026-03-17 **维护者**: Zeta Team ================================================ FILE: docs/config.md ================================================ # HugeSCM 配置文件说明 本文档详细说明 HugeSCM 支持的配置项和环境变量。 ## 一、配置层级 HugeSCM 的配置系统支持三个层级(优先级从低到高): | 层级 | 位置 | 说明 | |------|------|------| | System | `/etc/zeta.toml` | 系统级配置,所有用户共享 | | Global | `~/.zeta.toml` | 用户级配置,当前用户所有仓库共享 | | Local | `.zeta/zeta.toml` | 仓库级配置,仅当前仓库有效 | **优先级规则**:高优先级配置覆盖低优先级配置。 ## 二、配置命令 ### 2.1 查看配置 ```bash # 查看所有配置 zeta config --list # 查看特定配置项 zeta config user.name zeta config core.accelerator # 查看特定层级的配置 zeta config --global --list zeta config --local --list ``` ### 2.2 设置配置 ```bash # 设置全局配置 zeta config --global user.name "Your Name" zeta config --global user.email "your@email.com" # 设置仓库级配置 zeta config core.accelerator direct # 添加配置项(多值) zeta config --add core.sparse "src/core" ``` ### 2.3 删除配置 ```bash # 删除配置项 zeta config --unset core.accelerator # 删除所有匹配的配置 zeta config --unset-all core.sparse ``` ### 2.4 重命名配置 ```bash # 重命名配置节 zeta config --rename-section old.name new.name ``` ## 三、配置文件格式 配置文件采用 TOML 格式: ```toml # 用户信息 [user] name = "Your Name" email = "your@email.com" # 核心配置 [core] remote = "https://zeta.example.io/group/repo" accelerator = "direct" concurrenttransfers = 10 # 分片配置 [fragment] threshold = "1GB" size = "4GB" enable_cdc = true # HTTP 配置 [http] sslVerify = true ``` ## 四、核心配置项 ### 4.1 用户信息 | 配置项 | 环境变量 | 说明 | 示例 | |--------|----------|------|------| | `user.name` | `ZETA_AUTHOR_NAME` | 作者名 | `"John Doe"` | | | `ZETA_COMMITTER_NAME` | 提交者名 | `"John Doe"` | | `user.email` | `ZETA_AUTHOR_EMAIL` | 作者邮箱 | `"john@example.com"` | | | `ZETA_COMMITTER_EMAIL` | 提交者邮箱 | `"john@example.com"` | | | `ZETA_AUTHOR_DATE` | 作者签名时间 | `"2024-01-01T00:00:00"` | | | `ZETA_COMMITTER_DATE` | 提交时间 | `"2024-01-01T00:00:00"` | ### 4.2 存储库配置 | 配置项 | 环境变量 | 说明 | 默认值 | |--------|----------|------|--------| | `core.remote` | | 远程存储库地址 | - | | `core.sparse` | | 稀疏检出目录列表 | `[]` | | `core.sharingRoot` | `ZETA_CORE_SHARING_ROOT` | Blob 共享存储根目录 | - | | `core.optimizeStrategy` | `ZETA_CORE_OPTIMIZE_STRATEGY` | 空间管理策略 | - | ### 4.3 传输配置 | 配置项 | 环境变量 | 说明 | 默认值 | |--------|----------|------|--------| | `core.accelerator` | `ZETA_CORE_ACCELERATOR` | 下载加速器 | - | | `core.concurrenttransfers` | `ZETA_CORE_CONCURRENT_TRANSFERS` | 并发下载数(1-50) | - | | | `ZETA_CORE_PROMISOR` | 按需下载标志 | `true` | ### 4.4 编辑器配置 | 配置项 | 环境变量 | 说明 | 备注 | |--------|----------|------|------| | `core.editor` | `ZETA_EDITOR` | 提交信息编辑器 | 兼容 `GIT_EDITOR`、`EDITOR` | ## 五、HTTP 配置 ### 5.1 SSL 配置 | 配置项 | 环境变量 | 说明 | 默认值 | |--------|----------|------|--------| | `http.sslVerify` | `ZETA_SSL_NO_VERIFY` | SSL 验证 | `true` | 注意:`ZETA_SSL_NO_VERIFY=true` 与 `http.sslVerify=false` 效果相同。 ### 5.2 HTTP 头配置 | 配置项 | 说明 | |--------|------| | `http.extraHeader` | 设置 HTTP 附加头 | ```bash # 设置附加 HTTP 头 zeta config http.extraHeader "X-Custom-Header: value" # 设置 Authorization 跳过权限预验证 zeta config http.extraHeader "Authorization: Bearer token" ``` ## 六、传输层配置 | 配置项 | 环境变量 | 说明 | 默认值 | |--------|----------|------|--------| | `transport.maxEntries` | `ZETA_TRANSPORT_MAX_ENTRIES` | Batch 下载对象数量限制 | - | | `transport.largeSize` | `ZETA_TRANSPORT_LARGE_SIZE` | 大文件大小阈值 | `5M` | | `transport.externalProxy` | `ZETA_TRANSPORT_EXTERNAL_PROXY` | Direct 下载外部代理 | - | ## 七、Diff 和 Merge 配置 ### 7.1 Diff 配置 | 配置项 | 说明 | 可选值 | |--------|------|--------| | `diff.algorithm` | Diff 算法 | `histogram`、`onp`、`myers`、`patience`、`minimal` | ```bash # 设置 diff 算法 zeta config diff.algorithm histogram ``` ### 7.2 Merge 配置 | 配置项 | 说明 | 可选值 | |--------|------|--------| | `merge.conflictStyle` | 冲突标记样式 | `merge`、`diff3`、`zdiff3` | | 环境变量 | 说明 | |----------|------| | `ZETA_MERGE_TEXT_DRIVER` | 文本合并工具,可设置为 `git` 使用 git merge-file | ```bash # 设置冲突样式 zeta config merge.conflictStyle diff3 # 使用 git 作为合并工具 export ZETA_MERGE_TEXT_DRIVER=git ``` ## 八、终端配置 | 环境变量 | 说明 | |----------|------| | `ZETA_PAGER` / `PAGER` | 终端分页工具,默认搜索 `less` | | `ZETA_TERMINAL_PROMPT` | 设为 `false` 禁用终端交互 | ```bash # 禁用分页 export PAGER="" # 禁用终端交互 export ZETA_TERMINAL_PROMPT=false ``` ## 九、分片配置 | 配置项 | 类型 | 默认值 | 说明 | |--------|------|--------|------| | `fragment.threshold` | Size | `1GB` | 文件大小阈值,小于此值不分片 | | `fragment.size` | Size | `1GB` | 目标分片大小(固定分片) | | `fragment.enable_cdc` | Boolean | `false` | 启用 CDC 分片 | ### 9.1 Size 格式 支持以下单位: - `KB`、`MB`、`GB`(1000 进制) - `KiB`、`MiB`、`GiB`(1024 进制) ```toml [fragment] threshold = "512MiB" size = "1GB" enable_cdc = true ``` ### 9.2 配置层级合并 Boolean 类型支持配置层级合并: ```go // 高优先级配置覆盖低优先级配置 func (b *Boolean) Merge(other *Boolean) { if other.val != BOOLEAN_UNSET { b.val = other.val } } ``` ## 十、下载加速器配置 | 加速器 | 说明 | 适用场景 | |--------|------|----------| | `direct` | 直接从 OSS 签名 URL 下载 | AI 场景,高速内网 | | `dragonfly` | 使用 Dragonfly P2P 加速 | 大规模分布式环境 | | `aria2` | 使用 aria2c 多线程下载 | 个人开发环境 | ```bash # 设置加速器 zeta config --global core.accelerator direct # 设置 Dragonfly 路径 export ZETA_EXTENSION_DRAGONFLY_GET=/path/to/dfget # 设置 aria2 路径 export ZETA_EXTENSION_ARIA2C=/path/to/aria2c ``` ## 十一、完整配置示例 ### 11.1 全局配置示例 (`~/.zeta.toml`) ```toml [user] name = "John Doe" email = "john@example.com" [core] accelerator = "direct" concurrenttransfers = 10 editor = "vim" [http] sslVerify = true [diff] algorithm = "histogram" [merge] conflictStyle = "diff3" [fragment] enable_cdc = true threshold = "1GB" size = "1GB" ``` ### 11.2 仓库配置示例 (`.zeta/zeta.toml`) ```toml [core] remote = "https://zeta.example.io/group/repo" sparse = ["src/core", "src/utils"] compression-algo = "zstd" ``` ## 十二、配置速查表 | 配置 | 环境变量 | 说明 | |:-----|:---------|:-----| | `core.sharingRoot` | `ZETA_CORE_SHARING_ROOT` | Blob 共享存储根目录 | | `core.sparse` | | 稀疏检出目录配置 | | `core.remote` | | 远程存储库地址 | | `user.name` | `ZETA_AUTHOR_NAME` / `ZETA_COMMITTER_NAME` | 用户名 | | `user.email` | `ZETA_AUTHOR_EMAIL` / `ZETA_COMMITTER_EMAIL` | 用户邮箱 | | | `ZETA_AUTHOR_DATE` / `ZETA_COMMITTER_DATE` | 签名时间 | | `core.accelerator` | `ZETA_CORE_ACCELERATOR` | 下载加速器 | | `core.optimizeStrategy` | `ZETA_CORE_OPTIMIZE_STRATEGY` | 空间管理策略 | | `core.concurrenttransfers` | `ZETA_CORE_CONCURRENT_TRANSFERS` | 并发下载数 | | | `ZETA_CORE_PROMISOR` | 按需下载标志 | | `core.editor` | `ZETA_EDITOR` / `GIT_EDITOR` / `EDITOR` | 编辑器 | | | `ZETA_MERGE_TEXT_DRIVER` | 文本合并工具 | | | `ZETA_SSL_NO_VERIFY` | 禁用 SSL 验证 | | `http.sslVerify` | | SSL 验证(与上相反) | | `http.extraHeader` | | HTTP 附加头 | | `transport.maxEntries` | `ZETA_TRANSPORT_MAX_ENTRIES` | Batch 下载限制 | | `transport.largeSize` | `ZETA_TRANSPORT_LARGE_SIZE` | 大文件阈值 | | `transport.externalProxy` | `ZETA_TRANSPORT_EXTERNAL_PROXY` | 外部代理 | | `diff.algorithm` | | Diff 算法 | | `merge.conflictStyle` | | 冲突样式 | | | `ZETA_PAGER` / `PAGER` | 分页工具 | | | `ZETA_TERMINAL_PROMPT` | 终端交互 | ## 十三、相关文档 | 文档 | 说明 | |------|------| | [design.md](design.md) | 设计哲学 | | [sparse-checkout.md](sparse-checkout.md) | 稀疏检出 | | [cdc.md](cdc.md) | CDC 分片配置 | ================================================ FILE: docs/design.md ================================================ # HugeSCM 的设计哲学 ## 一、版本控制系统的演进与挑战 ### 1.1 传统版本控制系统的局限性 在软件开发的长河中,版本控制系统(VCS)经历了从集中式到分布式的演进。Subversion 作为集中式版本控制的代表,采用客户端-服务器架构,所有版本历史存储在中央服务器。Git 作为分布式版本控制系统的典范,将完整仓库克隆到本地,支持离线操作。 然而,随着软件研发规模的急剧膨胀,特别是 AI 大模型研发、游戏开发等场景的兴起,传统 VCS 面临严峻挑战: **单一存储库体积巨大** 现代 AI 模型训练产生的 checkpoint 文件动辄数十 GB 甚至上百 GB,一个存储库可能包含多个版本,总体积轻易突破 TB 级别。Git 的本地存储架构使得克隆和同步变得极其低效。 **单一文件体积巨大** 大型 AI 模型文件、游戏资源文件、二进制依赖包等单一文件可能达到数十 GB。Git 对大文件的支持有限,Git LFS 虽然缓解了这一问题,但引入了额外的存储开销和管理复杂度。 **网络传输瓶颈** 传统的 VCS 传输协议未针对大文件优化,在网络不稳定的环境下,大文件传输失败率高,重传代价大。 ### 1.2 现有解决方案的不足 针对 Git 在大规模存储库场景下的问题,业界已有一些尝试: **Git LFS (Large File Storage)** Git LFS 将大文件存储在单独的服务器上,仅在仓库中保留指针文件。但这种方案存在明显缺陷: - 需要额外的存储空间存储 LFS 对象 - 文件分割后仍需完整下载,无法增量同步 - 与 Git 主仓库的集成不够紧密 **Git + OSS/分布式文件系统** 将 Git 对象存储到对象存储或分布式文件系统中,看似解决了存储上限问题,但: - 未经优化的架构导致性能严重下降 - 频繁的小文件读写成为性能瓶颈 - 无法从根本上解决 Git 设计的局限性 ## 二、HugeSCM 的设计理念 ### 2.1 数据分离原则 HugeSCM 的核心创新在于**数据分离原则**,将版本控制系统的数据分为两类: **元数据(Metadata)** 包括提交对象(commit)、目录对象(tree)、分片对象(fragments)和标签对象(tag)。这些对象体积较小,但访问频繁,适合存储在分布式数据库中,支持快速索引和查询。 **文件数据(Blob)** 文件内容数据,体积可能非常大,存储在分布式文件系统或对象存储中。Blob 采用压缩存储,支持多种压缩算法(ZSTD、Brotli、Deflate 等)。 这种分离设计带来了显著优势: ``` +------------------+ +------------------+ | 元数据数据库 | | 对象存储/OSS | | (分布式数据库) | | (分布式文件系统) | +------------------+ +------------------+ ↑ ↑ │ │ commit/tree blob 数据 fragments/tag (压缩存储) (高频访问) (大文件优化) ``` ### 2.2 集中式与分布式的融合 HugeSCM 采用**集中式架构**,但并非传统意义上的集中式: - 服务端存储完整的数据集,支持巨型存储库 - 客户端获取浅表副本,按需拉取数据 - 支持**单分支/单标签**的数据获取,而非全量克隆 这种设计避免了分布式 VCS 的全量同步负担,同时保留了本地操作的灵活性。 ### 2.3 高效传输协议 HugeSCM 设计了专门的传输协议,针对不同场景优化: **元数据传输** - 支持增量获取,基于 `deepen-from` 或 `deepen` 参数 - 支持稀疏获取,仅下载指定目录的元数据 - 使用 ZSTD 压缩减少传输量 **文件传输** - 小文件批量下载,减少 HTTP 请求数 - 大文件签名 URL 下载,支持断点续传 - 支持外部加速器(Dragonfly、aria2) ### 2.4 巨型文件支持:Fragments 对象 HugeSCM 引入了 **Fragments** 对象,专门解决单一文件体积限制问题: **分片机制** 将巨型文件切割成多个 Blob 存储,Fragments 对象记录每个分片的索引、大小和哈希值。 **CDC 分片** 支持 Content-Defined Chunking,基于文件内容动态确定分片边界: - 局部修改只影响附近的 1-2 个分片 - 相同内容自动去重 - 特别适合 AI 模型的增量更新场景 **优势** - 上传/下载可并行化,提高稳定性 - 支持断点续传,网络抖动不影响整体 - 增量传输,减少带宽消耗 ## 三、架构设计 ### 3.1 服务端架构 ``` +------------------------+ | Zeta Server | +------------------------+ │ +-----------------+-----------------+ │ │ │ +-------v-------+ +-------v-------+ +-------v-------+ | 元数据缓存 | | 元数据存储 | | 文件存储 | | (内存/磁盘) | | (分布式DB) | | (OSS) | +---------------+ +---------------+ +---------------+ ``` **存储层次** 1. **内存缓存**:最新元数据的缓存,加速热点访问 2. **磁盘缓存**:服务端本地磁盘缓存,减少 DB 查询 3. **元数据数据库**:持久化存储 commit/tree/fragments 4. **对象存储**:持久化存储 blob 数据 ### 3.2 客户端架构 ``` 工作目录 ├── .zeta/ │ ├── zeta.toml # 存储库配置 │ ├── packed-refs # 打包的引用 │ ├── refs/ # 松散引用 │ ├── index # 工作区索引 │ ├── metadata/ # 元数据对象 │ └── blob/ # 文件对象 ├── .zetaignore # 忽略规则 └── .zattributes # 属性配置 ``` **特点** - 本地存储浅表副本,按需获取数据 - 支持 `--one` 逐一检出模式,节省磁盘空间 - 支持 `--limit=0` 空检出模式,按需获取文件 ### 3.3 对象模型 HugeSCM 定义了完整的对象模型,使用 BLAKE3 作为哈希算法: | 对象类型 | 说明 | 存储位置 | |---------|------|---------| | Commit | 提交对象,记录版本快照 | 元数据库 | | Tree | 目录对象,记录文件结构 | 元数据库 | | Blob | 文件内容对象,支持压缩 | 对象存储 | | Fragments | 分片对象,管理大文件分片 | 元数据库 | | Tag | 标签对象,兼容 Git Tag 格式 | 元数据库 | ## 四、核心特性 ### 4.1 空间优化 **逐一检出(One-by-One Checkout)** 检出文件后立即删除 blob 对象,100GB 的存储库仅需 100GB+ 的空间,相比 Git LFS 节省 60% 以上空间。 **按需获取(Promisor Object)** 默认开启自动下载缺失对象,需要时自动从服务端获取。 **空间管理策略** 支持 `core.optimizeStrategy` 配置,自动清理不再需要的对象。 ### 4.2 下载加速 | 加速器 | 说明 | 适用场景 | |-------|------|---------| | direct | 直接从 OSS 签名 URL 下载 | AI 场景,高速内网 | | dragonfly | 使用 Dragonfly P2P 加速 | 大规模分布式环境 | | aria2 | 使用 aria2c 多线程下载 | 个人开发环境 | ### 4.3 跨平台文件名处理 Windows/macOS 文件系统忽略文件名大小写,HugeSCM 利用稀疏检出机制: - 检测同名冲突文件 - 将冲突路径标记为不可变、不可见 - 避免数据丢失问题 ### 4.4 稀疏检出 支持目录级别的稀疏检出,只检出需要的目录: - 减少本地存储空间 - 加快检出速度 - 支持忽略文件名大小写冲突处理 ## 五、与 Git 的差异 ### 5.1 架构差异 | 特性 | Git | HugeSCM | |-----|-----|---------| | 架构模式 | 分布式 | 集中式 | | 全量克隆 | 必须 | 不支持,按需获取 | | 哈希算法 | SHA-1/SHA-256 | BLAKE3 | | 数据存储 | 本地文件系统 | DB + OSS | ### 5.2 命令差异 | Git 命令 | HugeSCM 命令 | 说明 | |---------|-------------|------| | git clone | zeta checkout (co) | 检出存储库,非全量克隆 | | git fetch | zeta pull --fetch | 仅获取数据,不合并 | | git pull | zeta pull | 拉取并合并 | | - | zeta ls-tree -r HEAD | 查看目录结构(含文件大小) | ### 5.3 设计哲学差异 **Git 的设计假设** - 存储库可以完整克隆到本地 - 本地操作优先,网络操作次要 - 全量历史可用 **HugeSCM 的设计假设** - 存储库太大,无法完整克隆 - 网络传输是核心瓶颈 - 按需获取,最小化本地存储 ## 六、适用场景 ### 6.1 AI 大模型研发 - 存储 checkpoint 文件 - 模型版本管理 - 增量更新传输 - 多团队协作 ### 6.2 游戏研发 - 大型二进制资源管理 - 美术资产版本控制 - 跨团队协作 ### 6.3 驱动开发 - 二进制依赖管理 - 多版本维护 - 发布管理 ### 6.4 数据集存储 - 大规模数据集版本管理 - 数据标注协作 - 数据集分发 ## 七、设计权衡 ### 7.1 为何不支持 Delta 压缩 Git 的 Pack 文件使用 Delta 压缩减少存储空间,但 HugeSCM 选择不支持: - Delta 解压计算开销大 - 大文件 Delta 效果有限 - 集中式架构下可直接删除不需要的对象 - 简化实现,提高性能 ### 7.2 为何不支持多 Remote HugeSCM 设计上不支持多 Remote: - 巨型存储库的多 Remote 管理复杂 - 数据一致性难以保证 - 集中式架构下单 Remote 足够 ### 7.3 为何使用 BLAKE3 BLAKE3 相比 SHA-1/SHA-256: - 更快的计算速度(SIMD 优化) - 更强的安全性 - 更短的哈希值(256 bit) - 现代密码学设计 ## 八、总结 HugeSCM 是面向巨型存储库的下一代版本控制系统,通过数据分离、高效传输协议、分片机制等创新设计,解决了传统 VCS 在 AI 大模型、游戏研发等场景下的存储和传输瓶颈。 **核心理念**:按需获取,最小化本地存储,最大化传输效率。 **设计目标**:让版本控制不再成为大规模研发的瓶颈。 ================================================ FILE: docs/hot.md ================================================ # hot - Git 存储库维护工具 `hot` 是整合到 HugeSCM 中的 Git 存储库维护工具,专用于存储库治理和优化。它帮助开发者高效地清理、维护和迁移 Git 存储库。 --- ## 为什么需要 hot? Git 存储库在长期使用中会积累技术债务: | 挑战 | hot 解决方案 | |------|-------------| | 历史中的敏感数据 | `hot remove` 重写历史,彻底删除敏感信息 | | 存储库膨胀 | `hot size`/`hot smart` 识别并清理大文件 | | SHA1 安全问题 | `hot mc` 迁移到 SHA256 对象格式 | | 过期分支/标签 | `hot prune-refs`/`hot expire-refs` 自动清理 | | 开源发布准备 | `hot unbranch` 创建干净的公开历史 | --- ## 命令概览 | 命令 | 描述 | |------|------| | `hot size` | 查看存储库大小和大文件(原始大小) | | `hot az` | 分析大文件的近似压缩大小 | | `hot remove` | 删除存储库中的文件并重写历史 | | `hot smart` | 交互式清理大文件(结合 `size` 和 `remove` 命令) | | `hot graft` | 交互式清理大文件(嫁接模式) | | `hot mc` | 迁移存储库对象格式(SHA1 ↔ SHA256) | | `hot unbranch` | 线性化存储库历史(移除合并提交) | | `hot prune-refs` | 按前缀清理引用 | | `hot scan-refs` | 扫描本地存储库中的引用 | | `hot expire-refs` | 清理过期引用 | | `hot snapshot` | 为工作树创建快照提交 | | `hot cat` | 查看存储库对象(commit/tree/tag/blob) | | `hot stat` | 查看存储库状态 | | `hot co` | 克隆存储库(实验性) | --- ## 常见使用场景 ### 1. 查找大文件 ```shell # 查看大文件的原始大小 hot size # 查看近似压缩大小 hot az # 交互模式,筛选 >= 20MB 的文件 hot smart -L20m ``` ### 2. 删除敏感数据 误提交了密码、密钥等敏感信息时,使用 `hot remove` 彻底删除: ```shell # 删除指定文件并重写历史 hot remove path/to/secret.txt # 使用通配符删除 hot remove "*.env" --confirm --prune # 删除后清理 hot remove sensitive.txt --prune git reflog expire --expire=now --all git gc --prune=now --aggressive ``` **注意**:重写历史后,需要强制推送(`git push --force`),并通知协作者重新克隆。 ### 3. 迁移对象格式 从 SHA1 迁移到 SHA256(推荐,提升安全性): ```shell # 迁移远程存储库 hot mc https://github.com/user/repo.git # 迁移本地存储库 hot mc /path/to/repo --format sha256 ``` 迁移过程会: 1. 克隆原存储库 2. 转换所有对象到新格式 3. 生成新的存储库目录 ### 4. 清理过期引用 长期开发的存储库会积累大量过期分支和标签: ```shell # 先扫描引用 hot scan-refs # 按前缀删除引用 hot prune-refs "feature/deprecated-" # 删除超过 90 天未更新的引用 hot expire-refs --days 90 # 仅删除分支 hot expire-refs --days 90 --branches # 仅删除标签 hot expire-refs --days 90 --tags ``` ### 5. 线性化历史 用于开源发布或简化历史: ```shell # 移除所有合并提交,使历史线性化 hot unbranch --confirm # 创建保留最近历史的孤儿分支(适用于开源场景) hot unbranch -K1 master -T new-branch # 保留最近 10 次提交 hot unbranch -K10 main -T clean-history ``` **选项说明**: - `-K N`:保留最近 N 次提交 - `-T `:指定新分支名称 - `--confirm`:确认执行 ### 6. 查看对象 调试和分析存储库对象: ```shell # 以 JSON 格式查看 commit/tree/tag hot cat HEAD --json # 查看文件内容 hot cat HEAD:README.md # 查看二进制文件(16 进制显示) hot cat HEAD:docs/images/blob.png # 查看特定对象 hot cat abc123def456 ``` ### 7. 创建快照 快速保存当前工作状态: ```shell # 创建快照提交 hot snapshot -m "WIP: 功能开发中" # 带标签的快照 hot snapshot -m "Release candidate" --tag v1.0.0-rc1 ``` --- ## 高级用法 ### 交互式大文件清理 `hot smart` 提供交互式界面,逐步清理大文件: ```shell # 启动交互模式 hot smart # 指定最小文件大小 hot smart -L50m # 仅显示 >= 50MB 的文件 # 自动模式(跳过确认) hot smart --auto ``` ### 嫁接模式清理 `hot graft` 使用嫁接(graft)技术,无需重写完整历史: ```shell # 嫁接模式清理 hot graft path/to/large-file.bin # 从特定提交开始嫁接 hot graft large.bin --since abc123 ``` 嫁接模式比 `remove` 更快,但会改变提交 ID。 ### 查看存储库状态 ```shell # 查看整体状态 hot stat # 显示详细信息 hot stat --verbose ``` --- ## 注意事项 ### 重写历史的风险 使用 `hot remove`、`hot unbranch` 等命令会重写 Git 历史: 1. **提交 ID 会改变**:所有受影响提交的 SHA 都会变化 2. **需要强制推送**:必须使用 `git push --force` 3. **协作者需重新克隆**:其他人需要重新克隆存储库 4. **备份重要分支**:操作前建议创建备份分支 ### 性能建议 对于大型存储库(>10GB): ```shell # 先分析,再清理 hot size > large-files.txt hot smart -L100m # 分批清理 hot remove "path/to/large1.bin" git gc --prune=now hot remove "path/to/large2.bin" git gc --prune=now ``` --- ## 示例场景 ### 场景 1:开源前清理 准备将内部项目开源: ```shell # 1. 线性化历史,保留最近提交 hot unbranch -K50 main -T public # 2. 删除敏感配置文件 hot remove "config/prod/*" --prune hot remove ".env.*" --prune # 3. 清理大文件 hot smart -L10m # 4. 迁移到 SHA256 hot mc /path/to/repo --format sha256 ``` ### 场景 2:存储库瘦身 存储库过大,需要瘦身: ```shell # 1. 分析存储库 hot size hot az # 2. 交互式清理 hot smart -L20m # 3. 清理过期分支 hot expire-refs --days 180 --branches # 4. 清理过期标签 hot expire-refs --days 365 --tags # 5. 最终清理 git reflog expire --expire=now --all git gc --prune=now --aggressive ``` ### 场景 3:安全加固 修复 SHA1 碰撞风险: ```shell # 1. 检查当前格式 hot stat # 2. 迁移到 SHA256 hot mc https://internal.example.com/repo.git # 3. 验证新存储库 cd new-repo hot stat git log --oneline | head ``` --- ## 获取帮助 每个命令都有详细的帮助信息: ```shell hot -h # 查看所有命令 hot size -h # 查看 size 命令帮助 hot remove -h # 查看 remove 命令帮助 ``` ================================================ FILE: docs/object-format.md ================================================ # HugeSCM 对象格式与存储规范 ## 一、前言 ### 1.1 术语和定义 元数据(Metadata):提交(commit),以及目录(tree),切片(fragments)。 元数据数据库:由分布式关系型数据库存储版本控制系统的元数据。 存储库:存储库是特定的元数据和文件的集合。 分支:Branch,分支是大多数版本控制系统中可用的功能,它是独立的开发线,Branch 记录 Commit 的 16 进制哈希值。 提交:Commit,指存储库特定的一次快照,commit 与其父 commit 相比,可计算出本次变更的内容,commit 记录根 Tree 的哈希值。 目录:Tree,指存储库特定的目录结构元数据,Tree 由若干个 TreeEntry 组成,TreeEntry 通过 Hash 引用到 Tree 或者 Blob。 分片:Fragments,HugeSCM 中特殊的对象,纳入版本控制时,将一个特别巨大的文件拆分成多部分,检出时将多个部分合并为一个文件,切片的引入解决了 AI 研发的单个文件体积限制问题。 BLOB:用于保存文件数据的对象格式。 引用:分支(Branch)和标签 (Tag) ## 二、对象的存储 在 HugeSCM 中,我们引入了数据分离的设计,即文件对象 blob 单独存储,而像目录结构(tree),提交(commit),以及其他扩展对象,比如切片(fragments),tag 对象(兼容 Git)则作为元数据另外存储,而引用,包括分支(branch),标签(tag)又另外存储,对于本地存储库快照和服务端,我们的存储细节又不一样,归根结底,这些不同的设计都是为了支撑 HugeSCM 的愿景。 ### 2.1 本地存储库目录布局 我们将 HugeSCM 存储库在本地的部分集合称之为存储库的本地快照,包含工作目录和存储库目录,其目录结构如下: ![](./images/local-layout.png) 1. 本地存储库分为工作目录,即 `.zeta`的父目录,不包含 `.zeta`本身。 2. `.zetaignore`用于从版本控制中排除特定的文件。 3. `.zattributes`属性文件,后续可能有助于 AI 场景。 存储库目录为 `.zeta`,包含如下目录和文件: + zeta.toml 存储库配置数据,在客户端兼容规范中定义。 + `packed-refs`, `refs/` 引用文件,用于存储本地分支,标签,及其变更记录。 + index 当前工作目录检出,纳入变更的索引。 + metadata 松散/打包的元数据。 + blob 松散/打包的 blob 数据。 配置文件 zeta.toml 示例如下: ```toml [core] remote = "https://zeta.io/group/mono-zeta" sparse = ["miracle"] compression-algo = "zstd" ``` + remote 即远程存储库地址。 + sparse 当前仓库检出的路径。 + compression-algo 压缩算法 当用户修改了 refs 之后,name 是分支和标签的全名,hash 是当前的提交,baseline 则代表从哪个 commit 开始创建该分支,在更新分支/标签时需要 `updated_at`。 + 分支使用 `refs/branches/`前缀。 + 标签使用 `refs/tags/`前缀。 + 分支和标签均不能以这些前缀开头。 + HugeSCM 在设计上没有支持多 remote。 ### 2.2 服务端存储布局 HugeSCM 为了解决巨型存储库存在海量 commit/tree/blob 的问题,会将这些数据按照约定存储到服务端的磁盘上,这里的约定如下: + 元数据,即 commit 和 tree,可以在服务端的磁盘中保持完整的数据集。 + 当服务端内存容量较为充足时,可以在服务端的内存中保持最新的元数据缓存,缓存算法自选。 + 文件,即 Blob,但大小小于一定限制,比如 16K,可以将其存储在服务端的磁盘中。 + 服务端接受到用户请求后,先内存,后磁盘,最后才是 DB/OSS 等后端。 + 服务端可以实现缓存同步机制,将缓存同步到新的实例。 服务端磁盘缓存参考目录结构: ![](./images/server-side-cache.png) 服务端 ODB 层次结构如下图: ![](./images/server-side-odb.png) ### 2.3 服务端 MDB 表 **commits** 表,存储 commit 对象: ```sql CREATE TABLE `commits` ( `id` bigint (20) unsigned NOT NULL AUTO_INCREMENT comment '主键', `rid` bigint (20) unsigned NOT NULL comment '仓库 ID', `hash` char(64) NOT NULL DEFAULT '' comment '提交哈希值', `author` varchar(512) NOT NULL DEFAULT '' comment '作者邮箱', `committer` varchar(512) NOT NULL DEFAULT '' comment '提交者邮箱', `bindata` mediumblob NOT NULL comment '编码对象', `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间,以 author when 填充', `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '修改时间,以 committer when 填充', PRIMARY KEY (`id`), UNIQUE KEY `uk_commits_rid_hash` (`rid`, `hash`) LOCAL, KEY `idx_commits_rid` (`rid`) LOCAL, KEY `idx_commits_author` (`author`) LOCAL, KEY `idx_commits_committer` (`committer`) LOCAL ) DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '提交表'; ``` **trees** 表,存储 tree 对象: ```sql CREATE TABLE `trees` ( `id` bigint (20) unsigned NOT NULL AUTO_INCREMENT comment '主键', `rid` bigint (20) unsigned NOT NULL comment '存储库 ID', `hash` char(64) NOT NULL comment 'tree 哈希值 - 16 进制', `bindata` mediumblob NOT NULL comment '编码对象', `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间', `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP comment '修改时间', PRIMARY KEY (`id`), UNIQUE KEY `uk_trees_rid_hash` (`rid`, `hash`) LOCAL, KEY `idx_trees_rid` (`rid`) LOCAL ) AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = 'tree 表'; ``` **objects** 表,存储 fragments 和 tag: ```sql CREATE TABLE `objects` ( `id` bigint (20) unsigned NOT NULL AUTO_INCREMENT comment '主键', `rid` bigint (20) unsigned NOT NULL comment '仓库 ID', `hash` char(64) NOT NULL DEFAULT '' comment '对象哈希值', `bindata` mediumblob NOT NULL comment '编码对象', `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间', `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP comment '修改时间', PRIMARY KEY (`id`), UNIQUE KEY `uk_objects_rid_hash` (`rid`, `hash`) LOCAL, KEY `idx_objects_rid` (`rid`) LOCAL ) DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '扩展元数据对象表'; ``` **分支**表: ```sql CREATE TABLE `branches` ( `id` bigint (20) unsigned NOT NULL AUTO_INCREMENT comment '主键', `name` varchar(4096) NOT NULL DEFAULT '' comment '分支名', `rid` bigint (20) unsigned NOT NULL comment '存储库 ID', `hash` char(64) NOT NULL DEFAULT '' comment '分支提交', `protection_level` int (11) NOT NULL DEFAULT '0' comment '保护分支级别,普通 0,保护分支 10,归档 20,隐藏分支 30', `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间', `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP comment '修改时间', PRIMARY KEY (`id`), UNIQUE KEY `uk_branches_rid_name` (`rid`, `name`) LOCAL, KEY `idx_branches_rid` (`rid`) LOCAL ) DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '分支表'; ``` **标签**表: ```sql CREATE TABLE `tags` ( `id` bigint (20) unsigned NOT NULL AUTO_INCREMENT comment '主键', `rid` bigint (20) unsigned NOT NULL comment '存储库 ID', `uid` bigint (20) unsigned NOT NULL DEFAULT '0' comment '创建者的 ID', `name` varchar(4096) NOT NULL comment '标签名', `hash` char(64) NOT NULL comment 'Tag 哈希值', `subject` varchar(1024) NOT NULL DEFAULT 'CURRENT_TIMESTAMP' comment 'Tag 标题', `description` mediumtext NOT NULL comment 'Tag 描述信息', `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间', `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP comment '修改时间', PRIMARY KEY (`id`), UNIQUE KEY `uk_tags_rid_name` (`rid`, `name`) LOCAL, KEY `idx_tags_rid` (`rid`) LOCAL ) AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '引用表'; ``` 此外,服务端并未支持其他类型引用,是否要支持,请根据实际功能选择是否添加。 ## 三、对象的类型 ### 3.1 文件 在 HugeSCM 中,用于保存文件数据的对象格式是 blob,这与 git 的概念是相同的,但 HugeSCM 的 blob 格式与 Git 有很大的区别,HugeSCM 借鉴了 ZIP 归档格式的机制,为 blob 引入了扩展压缩的能力。 我们计算文件内容的 BLAKE3 哈希值作为其 blob 对象的 ID,无论文件采用什么压缩格式,其对象 ID 是恒定的,blob 的格式如下: 1. 4 字节魔数,为 `'Z','B','\x00','\x01'`。 2. 2 字节解压版本,当前为 1。 3. 2 字节压缩算法。 4. 8 字节文件原始大小。 5. 可变长度的压缩内容。 请注意,我们使用大端,也就是网络字节序存储双字节以上的数据,下面是 blob 的结构图: ![](./images/blob.png) 我们可以使用代码描述 blob 格式: ```cpp enum compress_method : std::uint16_t { STORE = 0, // Store as-is (without compression) ZSTD = 1, // Use zstd compression BROTLI = 2, // Use brotli compression DEFLATE = 3, // Use Deflate compression XZ = 4, // Use xz compression BZ2 = 5, // Use bzip2 compression }; struct blob { std::byte magic[4]; // 'Z','B','\x00','\x01' std::uint16_t version_needed; // version compress_method method; // compress method std::uint64_t uncompressed_size; // uncompressed size std::byte *content; // content }; ``` blob 保留的压缩格式: + 0 - STORE,即原样存储,不需要压缩,如果文件是压缩文件,二进制文件,体积较大,压缩的意义不大,那么我们可以选择原样存储,根据香浓信息论,压缩是有极限的,因此重复压缩并无必要,反而会浪费 CPU,现在的存储成本已经大大降低,没必要过度压缩。 + 1 - ZSTD,ZSTD 是 facebook 推出的压缩算法,其压缩率和计算成本均衡性比较好,被许多的开源项目使用,包括 Linux 内核,以及 OB 数据库,在 HugeSCM 中,我们也优先使用 ZSTD。 + 2 - BROTLI, Google 推出的压缩算法,保留。 + 3 - DEFLATE,Git 使用的压缩算法,保留。 + 4 - XZ,压缩率很高的压缩算法,保留。 + 5 - BZ2,BZIP2 压缩算法,保留。 ### 3.2 巨型文件 在 HugeSCM 中,我们将体积较大的文件切割成多个 blob 存储,将这些 blob 的长度,序号,哈希值存储到一个特殊的对象,即 **Fragments** 对象中,这样我们在上传,下载 blob 的时候能够避免因长时间的上传和下载带来的稳定性问题。降低了服务端的实现难度和网络稳定性带来的风险。 **Fragments** 对象自身,我们视其为 **Metadata** 的一种。 ![](./images/fragments.png) 分片的二进制定义如下: ```go type Fragment struct { Index uint32 Size uint64 Hash plumbing.Hash } type Fragments struct { Hash plumbing.Hash // NOT Encode Size uint64 Origin plumbing.Hash // origin file hash checksum Entries []Fragment } ``` 我们 `Fragments`对象进行编码,`Size`为原始内容的大小,`Origin`则是原始文件的哈希值,`Entries`则是分片记录,其字段如下: + Index 即分片的顺序,从 0 开始。 + Size 分片的原始大小 + Hash 分片的哈希 当我们将 Fragments 添加到 TreeEntry 时,其 Hash 应使用 Fragments 的哈希值,FileMode 添加掩码 `0400000`。 其编码如下: ```go var ( FRAGMENTS_MAGIC = []byte{'Z', 'F', 0x00, 0x01} ) type Fragment struct { Index uint32 Size uint64 Hash plumbing.Hash } type Fragments struct { Hash plumbing.Hash // NOT Encode Size uint64 Origin plumbing.Hash // origin file hash checksum Entries []Fragment } func (f *Fragments) Encode(w io.Writer) error { _, err := w.Write(FRAGMENTS_MAGIC) if err != nil { return err } if err := binary.WriteUint64(w, f.Size); err != nil { return err } if _, err = w.Write(f.Origin[:]); err != nil { return err } for _, entry := range f.Entries { if err := binary.WriteUint32(w, entry.Index); err != nil { return err } if err := binary.WriteUint64(w, entry.Size); err != nil { return err } if _, err = w.Write(entry.Hash[:]); err != nil { return err } } return nil } func (f *Fragments) Decode(reader Reader) error { if reader.Type() != FragmentObject { return ErrUnsupportedObject } f.Hash = reader.Hash() r := sync.GetBufioReader(reader) defer sync.PutBufioReader(r) f.Entries = nil var err error if f.Size, err = binary.ReadUint64(r); err != nil { return err } if _, err = io.ReadFull(r, f.Origin[:]); err != nil { return err } for { var entry Fragment if entry.Index, err = binary.ReadUint32(r); err != nil { if err == io.EOF { break } return err } if entry.Size, err = binary.ReadUint64(r); err != nil { return err } if _, err = io.ReadFull(r, entry.Hash[:]); err != nil { return err } f.Entries = append(f.Entries, entry) } return nil } func (f Fragments) Pretty(w io.Writer) error { if _, err := fmt.Fprintf(w, "origin: %s size: %d\n", f.Origin, f.Size); err != nil { return err } for _, e := range f.Entries { if _, err := fmt.Fprintf(w, "%s %d\t%d\n", e.Hash, e.Index, e.Size); err != nil { return err } } return nil } ``` `Fragments`对象的 Filemode 需要添加掩码: `0400000`,其代码如下: ```go func NewFragments(m os.FileMode) (FileMode, error) { mode, err := NewFromOS(m) if err != nil { return Empty, err } return mode | Fragments, nil } func (m FileMode) ToOSFileMode() (os.FileMode, error) { if m&Fragments != 0 { m = m ^ Fragments } switch m { case Dir: return os.ModePerm | os.ModeDir, nil case Submodule: return os.ModePerm | os.ModeDir, nil case Regular: return os.FileMode(0644), nil // Deprecated is no longer allowed: treated as a Regular instead case Deprecated: return os.FileMode(0644), nil case Executable: return os.FileMode(0755), nil case Symlink: return os.ModePerm | os.ModeSymlink, nil } return os.FileMode(0), fmt.Errorf("malformed mode (%s)", m) } ``` ### 3.3 目录结构 我们在 HugeSCM 中,使用 tree 对象代表目录结构,tree 对象可以包含一组指向 blob 对象和/或其他 tree 对象的指针。与 Git 不同的是,我们的 TreeEntry 会记录文件的原始大小,这种设计在计算文件大小,特别是 FUSE 等场景还是有很大的收益。 tree 的二进制布局 ![](./images/tree.png) tree 的编码实现: ```go // Hash BLAKE3 hashed content type Hash [32]byte // TreeEntry represents a file type TreeEntry struct { Name string Size int64 Mode uint32 Hash Hash BLOB []byte } // Tree is basically like a directory - it references a bunch of other trees // and/or blobs (i.e. files and sub-directories) type Tree struct { Hash string Entries []TreeEntry } var ( TREE_MAGIC = []byte{'Z', 'T', 0x00, 0x01} ) func (t *Tree) Encode(w io.Writer) error { _, err := w.Write(TREE_MAGIC) if err != nil { return err } for _, entry := range t.Entries { size := entry.Size if len(entry.BLOB) > 0 { size = -entry.Size } if _, err = fmt.Fprintf(w, "%o %d %s", entry.Mode, size, entry.Name); err != nil { return err } if _, err = w.Write([]byte{0x00}); err != nil { return err } if _, err = w.Write(entry.Hash[:]); err != nil { return err } if len(entry.BLOB) > 0 { if _, err = w.Write(entry.BLOB); err != nil { return err } } } return nil } func (t *Tree) Verify() error { h := blake3.New() if err := t.Encode(h); err != nil { return err } hash := hex.EncodeToString(h.Sum(nil)) if hash == t.Hash { return nil } return fmt.Errorf("hash mistake. got '%s' want '%s'", hash, t.Hash) } ``` ### 3.4 提交对象 在 HugeSCM 中,提交对象(commit)是非常重要的一类对象,commit 对象的二进制布局如下:![](./images/commit.png) 编码代码如下: ```go type Signature struct { Name string // sig Email string When time.Time } // Encode encodes a Signature into a writer. func (s *Signature) Encode(w io.Writer) error { if _, err := fmt.Fprintf(w, "%s <%s> ", s.Name, s.Email); err != nil { return err } if err := s.encodeTimeAndTimeZone(w); err != nil { return err } return nil } func (s *Signature) encodeTimeAndTimeZone(w io.Writer) error { u := s.When.Unix() if u < 0 { u = 0 } _, err := fmt.Fprintf(w, "%d %s", u, s.When.Format("-0700")) return err } type Commit struct { Hash string // commit oid Message string // commit message (include subject and body) Author Signature // author Committer Signature // committer Parents []string // parents TreeHash string } var ( COMMIT_MAGIC = []byte{'Z', 'C', 0x00, 0x01} ) func (c *Commit) Encode(w io.Writer) error { _, err := w.Write(COMMIT_MAGIC) if err != nil { return err } if _, err = fmt.Fprintf(w, "tree %s\n", c.TreeHash); err != nil { return err } for _, p := range c.Parents { if _, err = fmt.Fprintf(w, "parent %s\n", p); err != nil { return err } } if _, err = fmt.Fprint(w, "author "); err != nil { return err } if err = c.Author.Encode(w); err != nil { return err } if _, err = fmt.Fprint(w, "\ncommitter "); err != nil { return err } if err = c.Committer.Encode(w); err != nil { return err } if _, err = fmt.Fprintf(w, "\n\n%s", c.Message); err != nil { return err } return nil } func (c *Commit) Verify() error { h := blake3.New() if err := c.Encode(h); err != nil { return err } hash := hex.EncodeToString(h.Sum(nil)) if hash == c.Hash { return nil } return fmt.Errorf("hash mistake. got '%s' want '%s'", hash, c.Hash) } ``` 在将 commit 编码到 commits 表时,需要进行额外的处理,比如 commit 的 message 较长时应当使用 message_extral 存储提交信息,parents 使用`;`组合成一个字符串。 ### 3.5 Tag 对象 HugeSCM 目前为了兼容 Git,支持存储 Tag 对象,其编码如下: ```go var ( TAG_MAGIC = [4]byte{'Z', 'G', 0x00, 0x01} ) type Tag struct { Hash plumbing.Hash Object plumbing.Hash ObjectType ObjectType Name string Tagger string Content string } // https://git-scm.com/docs/signature-format // https://github.blog/changelog/2022-08-23-ssh-commit-verification-now-supported/ func (t *Tag) Extract() (message string, signature string) { if i := strings.Index(t.Content, "-----BEGIN"); i > 0 { return t.Content[:i], t.Content[i:] } return t.Content, "" } func (t *Tag) Message() string { m, _ := t.Extract() return m } // ObjectTypeFromString converts from a given string to an ObjectType // enumeration instance. func ObjectTypeFromString(s string) ObjectType { switch strings.ToLower(s) { case "blob": return BlobObject case "tree": return TreeObject case "commit": return CommitObject case "tag": return TagObject default: return InvalidObject } } // Decode implements Object.Decode and decodes the uncompressed tag being // read. It returns the number of uncompressed bytes being consumed off of the // stream, which should be strictly equal to the size given. // // If any error was encountered along the way it will be returned, and the // receiving *Tag is considered invalid. func (t *Tag) Decode(reader Reader) error { if reader.Type() != TagObject { return ErrUnsupportedObject } br := sync.GetBufioReader(reader) defer sync.PutBufioReader(br) t.Hash = reader.Hash() var ( finishedHeaders bool ) var message strings.Builder for { line, readErr := br.ReadString('\n') if readErr != nil && readErr != io.EOF { return readErr } if finishedHeaders { message.WriteString(line) } else { text := strings.TrimSuffix(line, "\n") if len(text) == 0 { finishedHeaders = true continue } field, value, ok := strings.Cut(text, " ") if !ok { return fmt.Errorf("zeta: invalid tag header: %s", text) } switch field { case "object": if !plumbing.IsHash(value) { return fmt.Errorf("zeta: unable to decode BLAKE3: %s", value) } t.Object = plumbing.NewHash(value) case "type": t.ObjectType = ObjectTypeFromString(value) case "tag": t.Name = value case "tagger": t.Tagger = value default: return fmt.Errorf("zeta: unknown tag header: %s", field) } } if readErr == io.EOF { break } } t.Content = message.String() return nil } func (t *Tag) encodeInternal(w io.Writer) error { headers := []string{ fmt.Sprintf("object %s", t.Object), fmt.Sprintf("type %s", t.ObjectType), fmt.Sprintf("tag %s", t.Name), fmt.Sprintf("tagger %s", t.Tagger), } _, err := fmt.Fprintf(w, "%s\n\n%s", strings.Join(headers, "\n"), t.Content) return err } // Encode encodes the Tag's contents to the given io.Writer, "w". If there was // any error copying the Tag's contents, that error will be returned. // // Otherwise, the number of bytes written will be returned. func (t *Tag) Encode(w io.Writer) error { _, err := w.Write(TAG_MAGIC[:]) if err != nil { return err } return t.encodeInternal(w) } ``` tag 对象主要用于兼容,在 HugeSCM 中 tags 表的已经能够呈现非常丰富的内容,因此除了兼容场景,优先使用 HugeSCM Tag 引用而不是兼容的 HugeSCM Tag 对象。 ## 四、引用存储 ### 4.1 客户端 我们使用类似 Git 的 loose refs 和 packed-refs 存储本地引用。 ### 4.2 服务端 我们使用 branches/tags 表分别存储分支和标签,其他类型的引用,HugeSCM 暂时不支持。 ## 五、对象打包 HugeSCM 打包文件格式 当我们实现了 HugeSCM 的基本功能之后,我们也逐渐考虑到应当实现对象打包机制,从而减少打开的文件数量,从而提高各种操作的效率。在借鉴了 git 的打包格式之后,结合 HugeSCM 自身的特性,我们引入了自己的打包格式,这里需要注意,在打包格式按照大端存储。 + 4 字节签名 'P', 'A', 'C', 'K' + 4 字节版本信息, 当前版本为: 'Z' + 4 字节条目数量(N),在一个包中,对象的数量不能超过 4294967296 个。 + N 个条目(4 字节长度 + 对象内容)。 + 32 字节 BLAKE3 校验和 文件名为打包文件的 BLAKE3 哈希值,如: `pack-18bdc1a5ac3123aa7252cb81739fe0c9d2455e45ac8c34e285bdeffdf12df3bb.pack`,鉴于 metadata 和 blob 的特点,我们会采用不同的机制打包这些对象。由于我们将 BLOB 和 Metadata 分别存储,并且在这些对象中都存在 magic(或 ZSTD magic),因此我们有完整的类型检测机制,也就不用担心对象的识别。 ### 5.1 Metadata 条目 + 4 字节 metadata 后续的对象在 pack 中占据的长度,值为 N。 + N 字节 metadata 压缩(或原始)内容。如果是压缩存储,则采用存储库设置的压缩算法。 ### 5.2 Blob 条目 + 4 字节 blob 对象长度,值为 N。 + N 字节 blob 对象内容,在 blob pack 中,我们将松散对象原样复制到 pack 文件中。 ### 5.3 Pack Index 格式 + 4 字节签名 '0xff', '0x74', '0x4f', '0x63' + 4 字节版本信息, 当前版本为: 'Z' + 4 字节 * 256 Fanout 表,记录了不大于它的对象的数量,比如 00 代表 OID(`[32]byte`) 第一位为 0 的 OID 数量,而 01 则是包含 `00-01` 对象的数量之和,最后则代表 pack 文件中的总条目(N)。 + N 个对象 Hash 存储,每条目 32 字节。 + N 个对象最后修改时间,每条目 4 字节。 + N 个对象 CRC32 存储,使用 IEEE 风格,每条目 4 字节。 + N 个对象在包中的 32 位(4 字节)偏移,如果对象的偏移大于 2GB,则该值与 `0x7fffffff` 进行 `&` 运算,得到 64 位偏移的索引。 + M 个对象在包中的 64 位(8 字节)偏移。 + 32 字节包文件 BLAKE3 校验和。 + 32 字节 Index 文件 BLAKE3 校验和。 ### 5.4 与 Git 打包文件格式的差异 在流行的版本控制系统 Git 中,同样存在打包文件,虽然 HugeSCM 借鉴了 git 大量设计,但也会从实际情况出发,针对 HugeSCM 的特性调整设计。比如,HugeSCM 会将 metadata(tree/commit)和 blob 分开存储,不像 git 那样存储在一起,这是因为,对于 commit/tree 这些对象,我们最终会将其存储到 Database,对于 BLOB,最终会将其存储到 OSS,这些数据事实上被分流了,因此我们在实现客户端的时候也对其进行分流。并保留清理不同的策略。 此外,从实践来看,将大文件打包到 pack 文件中,是一个低效的操作,大量的二进制文件使得 pack 文件打包困难,体积巨大,传输容易失败。在 HugeSCM 中,无论是 Push 还是 checkout,对于体积超过 4G 的文件都需要使用额外的接口进行操作,因此在打包文件中,我们同样不支持超过 4G 的对象,这与 git 显著不同。此外,HugeSCM 是一种集中式的版本控制系统,并不是非常需要在打包中引入 Delta 机制以节省空间,如果需要节省空间直接删除不需要的对象即可。因此我们对打包格式的设计是保持简单和高效。 ================================================ FILE: docs/pack-format.md ================================================ # HugeSCM 打包文件格式 当我们实现了 HugeSCM 的基本功能之后,我们也逐渐考虑到应当实现对象打包机制,从而减少打开的文件数量,从而提高各种操作的效率。在借鉴了 git 的打包格式之后,结合 HugeSCM 自身的特性,我们引入了自己的打包格式,这里需要注意,在打包格式按照大端存储。 + 4 字节签名 'P', 'A', 'C', 'K' + 4 字节版本信息, 当前版本为: 'Z' + 4 字节条目数量(N),在一个包中,对象的数量不能超过 4294967296 个。 + N 个条目(4 字节长度 + 对象内容)。 + 32 字节 BLAKE3 校验和 文件名为打包文件的 BLAKE3 哈希值,如: `pack-18bdc1a5ac3123aa7252cb81739fe0c9d2455e45ac8c34e285bdeffdf12df3bb.pack`,鉴于 metadata 和 blob 的特点,我们会采用不同的机制打包这些对象。由于我们将 BLOB 和 Metadata 分别存储,并且在这些对象中都存在 magic(或 ZSTD magic),因此我们有完整的类型检测机制,也就不用担心对象的识别。 ## Metadata 条目 + 4 字节 metadata 后续的对象在 pack 中占据的长度,值为 N。 + N 字节 metadata 压缩(或原始)内容。如果是压缩存储,则采用存储库设置的压缩算法。 ## Blob 条目 + 4 字节 blob 对象长度,值为 N。 + N 字节 blob 对象内容,在 blob pack 中,我们将松散对象原样复制到 pack 文件中。 ## Pack Index 格式 + 4 字节签名 '0xff', '0x74', '0x4f', '0x63' + 4 字节版本信息, 当前版本为: 'Z' + 4 字节 * 256 Fanout 表,记录了不大于它的对象的数量,比如 00 代表 OID(`[32]byte`) 第一位为 0 的 OID 数量,而 01 则是包含 `00-01` 对象的数量之和,最后则代表 pack 文件中的总条目(N)。 + N 个对象 Hash 存储,每条目 32 字节。 + N 个对象最后修改时间,每条目 4 字节。 + N 个对象 CRC32 存储,使用 IEEE 风格,每条目 4 字节。 + N 个对象在包中的 32 位(4 字节)偏移,如果对象的偏移大于 2GB,则该值与 `0x7fffffff` 进行 `&` 运算,得到 64 位偏移的索引。 + M 个对象在包中的 64 位(8 字节)偏移。 + 32 字节包文件 BLAKE3 校验和。 + 32 字节 Index 文件 BLAKE3 校验和。 ## 与 Git 打包文件格式的差异 在流行的版本控制系统 Git 中,同样存在打包文件,虽然 HugeSCM 借鉴了 git 大量设计,但也会从实际情况出发,针对 HugeSCM 的特性调整设计。比如,HugeSCM 会将 metadata(tree/commit)和 blob 分开存储,不像 git 那样存储在一起,这是因为,对于 commit/tree 这些对象,我们最终会将其存储到 Database,对于 BLOB,最终会将其存储到 OSS,这些数据事实上被分流了,因此我们在实现客户端的时候也对其进行分流。并保留清理不同的策略。 此外,从实践来看,将大文件打包到 pack 文件中,是一个低效的操作,大量的二进制文件使得 pack 文件打包困难,体积巨大,传输容易失败。在 HugeSCM 中,无论是 Push 还是 checkout,对于体积超过 4G 的文件都需要使用额外的接口进行操作,因此在打包文件中,我们同样不支持超过 4G 的对象,这与 git 显著不同。此外,HugeSCM 是一种集中式的版本控制系统,并不是非常需要在打包中引入 Delta 机制以节省空间,如果需要节省空间直接删除不需要的对象即可。因此我们对打包格式的设计是保持简单和高效。 ================================================ FILE: docs/protocol.md ================================================ # HugeSCM 传输协议规范 ## 一、协议约定 早期在我们设计 HugeSCM 传输协议时,我们对 HugeSCM 的设计存在认识不足,没有充分考虑到实际需求,此外,在 HugeSCM 的推广过程,我们也发现 HugeSCM 需要引入一些设计扩展,以支持 HugeSCM 的功能扩展,因此,在我们专门引入了 HugeSCM 传输协议规范,制定相关约束。 ### 1.1 版本协商 在采用 HugeSCM 传输协议下载/上传数据时,应正确设置传输协议版本,服务端根据传输协议版本选择合适的实现,其中。 本规范的传输协议字符串为:`Z1` HTTP 请求需设置请求头 `Zeta-Protocol: Z1`。 SSH 请求需设置环境变量:`ZETA_PROTOCOL=Z1` 后续如果有新的协议引入,则使用字符串:`Z2 Z3 ... ZN`。 ### 1.2 授权 #### 1.2.1 HTTP 验证 HugeSCM 的传输协议支持用户名和密码(Token)的验证方式,支持的授权方式有 `Basic`以及 `Bearer`。 对于 Basic 授权,我们支持:`邮箱+密码`,`域账号+密码`,`允许的用户名+token`。 为了提高服务端的安全性,我们还引入了签名验证机制,在本协议中,我们使用 Bearer 验证机制,即使用 JWT 签名。 用户在请求 `{namespace}/{repo}/authorization` 接口时,我们先验证用户权限,如果权限 OK,我们将使用特定的算法,生成一个 Bearer Token,客户端后续使用该 token 操作即可。 请求体: ```json { "operation": "download", "version": "0.12.3" } ``` 这里的 `operation`有效值是 `download`和 `upload`,客户端如果想要检查是否有写入权限,则可以指定 `upload`,否则指定 `download`即可,因为我们在后续的协议中会再度检查用户的权限。而 `version`用于告诉服务端客户端的版本。 返回: ```json { "header": { "authorization": "Bearer *****" }, "notice": "可选", "expires_at": "2023-12-20T17:54:49.244244+08:00" } ``` 客户端可以检测 `expires_at`确认 token 是否过期,可以使用我们提供的 `authorization`设置到 HTTP 请求头,当然用户可以不使用该机制,使用标准的 Basic 验证也是支持的。该接口返回的 `notice`,客户端可以将该通知/提示输出到终端。 #### 1.2.2 SSH 验证 SSH 传输协议可以使用 SSH 公钥进行验证,与 SSH 相同,这里不做赘述。 ## 二、下载数据协议集 本章内容主要是介绍如何实现下载数据的传输协议集,便于用户从远程存储获取所需的数据,从而在本地创建存储库的快照,本协议集即需要支持稀疏的,浅表的存储库数据获取,也需要具备完全的存储库数据下载能力,在 HugeSCM 中,我们的遵循的原则都是单分支/单标签的数据下载,而不像 Git 那样,下载所有的存储库数据,因为在举行存储库中,无论如何,将存储库的数据完全下载到本地都是不经济的,没有必要的。 | 名称 | 匹配 | 备注 | | --- | --- | --- | | 引用发现 | `GET /{namespace}/{repo}/reference/{refname}` | `Accept: application/vnd.zeta+json` | | 元数据 | `GET /{namespace}/{repo}/metadata/{revision:.*}`
`POST /{namespace}/{repo}/metadata/{revision:.*}`
`POST /{namespace}/{repo}/metadata/batch` | 在这里 `revision`只能是 `commit`或者 `tag`对象,不能是 `tree`或者其他。
可设置 `deepen-from`和 `deepen`,分别表示从那个 commit 开始或者回溯深度,deepen-from 默认没有设置,而 deepen 如果没有设置就使用默认值 1.
其中批量元数据下载不支持 `deepen-from`和 `deepen`。 | | blob | `POST /{namespace}/{repo}/objects/batch`
`POST /{namespace}/{repo}/objects/share`
`GET /{namespace}/{repo}/objects/{oid}` | 在这里我们需要支持批量下载小文件,也需要支持下载大文件,此外还需要支持签名下载对象,支持签名下载的好处是,我们可以减少网络带宽的消耗。 | ### 2.1 引用发现协议 在 HugeSCM 中,我们目前设计了分支发现协议和标签发现协议,以支持用户获得存储库的分支/标签信息,并且在返回中包含存储库的哈希算法,默认分支,压缩算法,以及 capabilities 等信息,客户端可以根据 capabilities 信息感知服务端的能力。 由于 HugeSCM 的特殊设计,我们并不需要像 Git 那样将所有的引用数据都传输给客户端,因此我们完全可以将引用发现协议的返回数据设置`Content-Type: application/vnd.zeta+json`,以降低解析数据的难度。 假如 zeta 存储库的 remote 为:`https://zeta.io/group/mono-zeta` ,那么我们可以通过: ```bash # Get ref information GET "https://zeta.io/group/mono-zeta/reference/${REFNAME}" # SSH command zeta-serve ls-remote "group/mono-zeta" --reference "${REFNAME}" ``` 计算分支/标签的名称: + 分支:`refs/heads/`+`branch` + 标签:`refs/tags/`+`tag` + 其他:待补充 客户端需要设置:`Accept: application/vnd.zeta+json` 引用的返回格式如下: ```json { "remote": "https://zeta.io/zeta/zeta-mono", "name": "refs/tags/v1.0.0", "hash": "9b724e5d1e1434ea916feaa3f1c2d3e467058c6bdab1b34fe9752550451a7039", "peeled": "6d2eb25e45c4f5135da48e786cbb4c8af06a6009ecd679e0547c06a640bbc310", "head": "refs/heads/mainline", "version": 1, "agent": "Zeta-1.0", "hash-algo": "BLAKE3", "compression-algo": "zstd", "capabilities": [] } ``` + remote 即远程存储库地址,保留。 + name 即当前的引用的名称。 + hash 即 v1.0.0 分支的最新提交。 + peeled 是可选的,如果一个引用是 tag,并且是从 git 迁移过来的,可能是 tag 对象,服务端应返回去皮 tag,如果不是则省略。 + head,通常是默认分支。 + version 即 zeta 协议版本。 + agent zeta 服务端版本。 + hash-algo 则是哈希算法。 + compression-algo 压缩算法。 + capabilities 预留能力。 错误返回格式为: + code 错误码 + message 错误信息 比如引用不存在,则返回 404。 ```json { "code":404, "message":"repo cs not exist" } ``` ### 2.2 元数据传输协议 HugeSCM 元数据传输协议,支持的 Query 分别有: + `deepen-from`值为 commit 的哈希,从某个 commit 开始到指定 commit 之前所有的提交和 tree,fragments 等元数据集合。 + `deepen`值类型为正整数,即获取 deepen 个提交的元数据集合,如果设置了 `deepen-from`则忽略 `deepen`,未设置 `deepen`时,我们默认会获取 commit 一个提交包含的元数据。 + `depth`目录层级深度,未设置则获得所有的 tree。 #### 2.2.1 编码格式 在 HugeSCM 中,方案规定,metadata 数据格式为: 1. 4 字节 MAGIC,目前的定义为 `'Z','M','\x00','\x01'` 2. 4 字节 Version,当前值为 1。 3. 16 字节 Reserved 保留字段,全部填充为 `'\0'`。 4. 4 字节的 object_length,这个即 `metadata_entry`的数据总长度。 5. `$object_length`字节的 `metadata_entry`包括 64 字节的哈希和二进制内容。 6. `metadata_entry`的数量是可变的,只有当接收到的 object_end 值为 0 时表示元数据传输结束。 7. 16 字节的 CRC64 (ISO) 校验合。即整个传输流的 CRC64,不包含 crc64_checksum 本身。 ```cpp struct metadata_entry { std::byte hash[64]; // object hash std::byte *content; // variable content }; struct metadata { std::byte magic[4]; // 'Z','M','\x00','\x01' std::uint32_t version; // VERSION default =1 std::byte reserved[16]; // reserved: full zero std::uint32_t object_length; // object length - 64 == object content length metadata_entry entry; // object hash and content. /* ... */ std::uint32_t object_end; // ==> 0000 std::byte crc64_checksum[16]; // 16 byte CRC64 (ISO) checksum }; ``` 无论是 Commit/Tree 还是稀疏 Commit 协议的返回都应该是符合元数据二进制格式。 客户端需要设置正确的 `Accept`: + `Accept: application/x-zeta-metadata` 传输流不压缩。 + `Accept: application/x-zeta-compress-metadata`,传输流使用 ZSTD 压缩。 SSH 协议可以添加参数 `--zstd` 开启元数据压缩。 #### 2.2.2 基本元数据下载 在 HugeSCM 系统中,只需要获得最新的 `revision`及其 tree 就行了,这里 `revision`可以是 `commit`也可以是 `tag`,如果是 `tag`对象需进一步解析到 `commit`为止。 ```bash # Get commit metadata GET "https://zeta.io/group/mono-zeta/metadata/${REVISION}" # SSH zeta-serve metadata "group/mono-zeta" --revision "${REVISION}" --depth=1 --deepen-from=${from} ``` 请求格式 | **参数** | **类型** | **描述** | | --- | --- | --- | | revision | String | 提交 ID 或 tag 对象 ID | | depth | Integer | 可选,如果没有设置,服务端将遍历该提交所有的 tree,否则,按照 depth 指定遍历指定深度的 tree。 | | deepen-from | Hash | 可选,将从 `deepen-from`开始的 commit 到 指定的 commit 之间所有的 commit 也返回给客户端,一旦设置了 `deepen-from`,服务端将检查 deepen- from 是否是所需 commit 的祖先,不是祖先则返回 419。 | | have | Hash | 该值标记本地存在的 commit,在 Fetch 阶段,服务端会根据 deepen-from 以及 have 确认本地存储库已经存在哪些 commit,并轻点出所需的对象。 | | deepen | Integer | 值类型为正整数,即获取 deepen 个提交的元数据集合,如果设置了 `deepen-from`则忽略 `deepen`,未设置 `deepen`时,我们默认会获取 commit 一个提交包含的元数据。 | 如果查询是添加了 `depth=N`,我们将限制查询 tree 的深度,`0`表示不返回任何 `tree`,默认(即 depth 参数不存在时)返回所有该 revision `root-tree`的所有 `sub-tree`。 #### 2.2.3 稀疏元数据下载 在 HugeSCM 中,我们支持稀疏元数据下载,其请求如下: ```bash # Get commit metadata POST "https://zeta.io/group/mono-zeta/metadata/${REVISION}" # SSH zeta-serve metadata "group/mono-zeta" --revision "${REVISION}" --sparse --depth=1 --deepen-from=${from} ``` 客户端将请求的目录发送给服务端,服务端据此返回相应的稀疏元数据,请求格式如下: ```bash cat < src/link LF src/zeta LF LF > ``` 内容返回细节与基本元数据传输相同。 #### 2.2.4 批量元数据下载 在 HugeSCM 中,我们支持批量元数据下载,其请求如下: ```bash # Get commit metadata POST "https://zeta.io/group/mono-zeta/metadata/batch" # SSH zeta-serve metadata "group/mono-zeta" --batch --depth=1 ``` 客户端将请求的目录发送给服务端,服务端据此返回相应的稀疏元数据,请求格式如下: ```bash cat < oid LF oid LF LF > ``` 内容返回细节与基本元数据传输相同。 这里对不同类型的对象的返回如下: + tree 返回指定深度的 sub tree。 + commit 返回根 tree 和指定深度的 sub tree。 + fragments 返回自身。 + tag 返回自身及其 commit 和 tree ,指定深度的 sub tree。 这里需要注意,通常情况下标准客户端可能不需要实现批量元数据下载,基本元数据下载和稀疏元数据下载已经能满足现有的需求,而批量元数据下载可以适用于 FUSE 等场景,而元数据并不像 blob 那样占据大量空间,绝大多数时候都可以完全下载到本地。 ### 2.3 文件数据传输协议 本节主要描述如何实现 Blob 的下载,包含批量下载(小 blob),签名分享下载(大 blob),以及单一 blob 下载(无论大小)。 #### 2.3.1 单个下载 在 HugeSCM 中,最简单的 blob 获取方式是单个 blob 下载,请求格式如下: ```bash # HTTP GET "https://zeta.io/group/mono-zeta/objects/${OID}" # SSH zeta-serve objects group/mono-zeta --oid "${OID}" --offset=0 ``` 此外,客户端需要设置:`Accept: application/x-zeta-blob`。 该接口需要支持断点续传功能,即客户端在下载数据中断后,可以请求从指定位置开始下载,对于体积较大的 blob,很容易出现因网络的原因超时中断,因此,服务端需具备该能力,客户端也需要支持断点续传。 本接口返回体系 blob 的二进制内容,服务端需要在 Header 中设置 `X-Zeta-Compressed-Size: $compressed_size`,或者正确设置 `Content-Length`,保证断点续传功能正常运行。 在 SSH 协议中,单个对象下载与 HTTP 的返回是不同,HTTP 返回的是 BLOB 对象的内容(端点下载的内容),而 SSH 协议需要保留一定长度的元数据: 1. 4 字节的 MAGIC,目前是 `'Z', 'B', '\x00', '\x02'`。 2. 4 字节 Version,当前值为 `1`。 3. 8 字节当前 BLOB 传输长度。 4. 8 字节当前 BLOB 压缩长度。 #### 2.3.2 批量下载 批量下载是返回用户的请求所需的 blob,请求格式如下: ```bash POST "https://zeta.io/group/mono-zeta/objects/batch" # SSH zeta-serve objects group/mono-zeta --batch # ----- cat < oid LF oid LF ... oid LF LF > ``` 连续两个换行符代表(`LF`)传输结束。 此外,客户端需要设置:`Accept: application/x-zeta-blobs` 批量 blob 下载二进制格式如下: 1. 4 字节的 MAGIC,目前是 `'Z', 'B', '\x00', '\x02'`。 2. 4 字节 Version,当前值为 `1`。 3. 16 字节 Reserved 保留字段,全部填充为 `'\0'`。 4. 4 字节的 entry_length,这个即`blob_entry`的数据总长度。 5. `$entry_length`字节的 `blob_entry`包括 64 字节的哈希和二进制内容。 6. `blob_entry`的数量是可变的,只有当接收到的 blob_end 值为 0 时表示元数据传输结束。 7. 16 字节的 CRC64 (ISO) 校验合。即整个传输流的 CRC64,不包含 crc64_checksum 本身。 结构体定义: ```cpp struct blob_entry { std::byte hash[64]; // object hash std::byte *content; // variable content }; struct batch_blob_stream { std::byte magic[4]; // 'Z','B','\x00','\x02' std::uint32_t version; // VERSION default =1 std::byte reserved[16]; // reserved: full zero std::uint32_t entry_length; // blob entry length - 64 == blob content size blob_entry entry; // blob hash and content /* ... */ std::uint32_t blob_end; // ==>0000 std::byte crc64_checksum[16]; // 16 byte CRC64 (ISO) checksum }; ``` **注意事项**:批量 blob 下载不支持传输大于 4G 的文件,因为这会降低用户体验。对于这些文件,客户端应当使用签名 URL 下载或者使用单一 blob 下载以加速下载,提高下载的稳定性。 #### 2.3.3 签名分享下载 在 HugeSCM 中,我们引入了类似 OSS 的分享签名 URL 下载特性,客户端可以将签名 URL 交由各种 P2P 客户端,比如 Dragonfly,Aria2 下载,该机制的引进能很好的解决下载加速的问题,特别是对 AI/游戏研发这种包含很多大文件,静态资源的场景,非常有裨益。 签名分享下载请求格式如下: ```bash # HTTP POST "https://zeta.io/group/mono-zeta/objects/share" # SSH zeta-serve objects group/mono-zeta --share ``` 请求体的格式为 `application/vnd.zeta+json`,客户端请求时需要设置的头有 `Accept: application/vnd.zeta+json`。 ```bash { "objects":[ { "oid":"1c3e65a02d6d6b47355ef52fd4db4f35b055dcd0bd73f27512bf05b874399378", "path":"os-images/AlmaLinux-8-latest-aarch64-boot.iso" } ] } ``` 以 Golang 为例定义如下: ```go type WantObject struct { OID string `json:"oid"` } type BatchShareObjectsRequest struct { Objects []*WantObject `json:"objects"` } ``` 该接口的返回体格式如下: ```json { "objects": [ { "oid": "1c3e65a02d6d6b47355ef52fd4db4f35b055dcd0bd73f27512bf05b874399378", "compressed_size": 857622544, "href": "http://zeta.oss-cn-hangzhou.aliyuncs.com/123123/1c/1c3e65a02d6d6b47355ef52fd4db4f35b055dcd0bd73f27512bf05b874399378****", "expires_at": "2023-11-22T22:23:33.891096+08:00" } ] } ``` 以 Golang 为例,定义如下: ```go type Representation struct { OID string `json:"oid"` CompressedSize int64 `json:"compressed_size"` Href string `json:"href"` Header map[string]string `json:"header,omitempty"` ExpiresAt time.Time `json:"expires_at,omitzero"` } type BatchShareObjectsResponse struct { Objects []*Representation `json:"objects"` } ``` 这里分别指出相应字段的含义: + oid - 请求对象的哈希值。 + compressed_size - 请求 blob 的存储大小,不是 blob 对应文件的原始大小。 + href - 请求的 URL,与 Git LFS 协议类似,客户端可以使用 href 作为下载的 URL。 + header - 请求的 Header,与 Git LFS 协议类似,客户端需要设置 header,当然,现在默认为空。 + expires_at - 签名 URL 过期时间,客户端在签名 URL 过期后需要重新请求新的签名 URL。 ## 三、上传数据协议集 在这一章中,我们制定了上传数据的协议集,用来实现从本地将提交,修改推送到远程存储库,在维护 Git 代码托管平台的过程中,我们吸取了 git 的教训,将大文件与小文件,元数据分离开来,从而提高整个传输的稳定性,健壮性,再加上 HugeSCM 特有的分片特性,能够极大的提高整个平台的稳定性,降低网络抖动导致的推送中断重试现象。 ### 3.1 文件上传检查 我们引入了文件上传检查,这个协议与 Git LFS batch API 类似,但也有一定的区别,我们没有将 download/upload 两个操作混合到一个 API,而是分离的,这样对权限校验有帮助。 请求格式如下: ```bash # HTTP POST https://zeta.io/group/mono-zeta/reference/{refname}/objects/batch # SSH zeta-serve push "group/mono-zeta" --reference "$REFNAME" --batch-check ``` 客户端需要设置(HTTP):`Accept: application/vnd.zeta+json`。 请求体格式如下: ```json { "objects": [ { "oid": "7b5da36a30c19384275d7bf409b46a527579ecde94fdbd0175dab6f53749d280", "compressed_size": 111225555 }, { "oid": "17201adab16049cddd2b3d1993031091b9cdf0689f7504ed90ca0d6f5dd347bd", "compressed_size": 1073741840 } ] } ``` 返回体格式如下: ```json { "objects": [ { "oid": "7b5da36a30c19384275d7bf409b46a527579ecde94fdbd0175dab6f53749d280", "compressed_size": 111225555, "action": "upload" }, { "oid": "17201adab16049cddd2b3d1993031091b9cdf0689f7504ed90ca0d6f5dd347bd", "compressed_size": 1073741840, "action": "download" } ] } ``` 对于存在的对象,设置其 `action`为 `download`,对于不存在的对象,设置其 `action`为 `upload`,客户端根据 `action`选择上传还是跳过该 blob。 ### 3.2 单一文件上传 在 HugeSCM 中,体积比较大的文件应当使用单一文件上传,建议是体积大于 20M,超过 100 M 应当使用单一文件上传,而不是将这些文件编码到推送协议一同上传。对于单一文件上传,其格式比较简单: ```bash # HTTP PUT https://zeta.io/group/mono-zeta/reference/{refname}/objects/{oid} # SSH zeta-serve push "group/mono-zeta" --reference "$REFNAME" --oid "$OID" --size "${SIZE}" ``` 客户端在请求的时候,应当将 blob 的实际大小值设置到 HTTP 头 `X-Zeta-Compressed-Size`(10进制),服务端据此能绕过 OSS 大小限制(如阿里云 5GB 限制),SSH 协议请使用 `--size=N`告知服务端。 服务端选择直连上传大文件到 OSS,不过应当注意,服务端需要检测传输的 blob oid 是否与输入的 oid 相同,不同则返回错误。 此外,服务端应当检测用户是否有权限修改当前分支。 ### 3.3 推送协议 在 HugeSCM 中,客户端可以使用推送协议,将本地的修改同步到远程服务器,并更新引用。请求格式如下: ```bash # HTTP POST "https://zeta.io/group/mono-zeta/reference/{refname}" # SSH zeta-serve push "group/mono-zeta" --reference "$REFNAME" ``` 客户端需要设置(HTTP):`Accept: application/x-zeta-report-result`。 此外还需要设置额外的头: | HTTP Header | SSH 参数/环境变量 | 备注 | | :---: | :---: | --- | | `X-Zeta-Command-OldRev` | `--newrev` | 64 字节待更新的分支旧的哈希值,不存在使用**缺省 OID **代替。 | | `X-Zeta-Command-NewRev` | `--oldrev` | 64 字节待更新分支新的哈希值,删除分支可以使用**缺省 OID **代替 | | `X-Zeta-Objects-Stats` | `ZETA_OBJECTS_STATS` | 记录对象数量,服务端可以据此进行特别的优化,客户端
格式为:`m-11;b-12` | 注意缺省 OID 为:`0000000000000000000000000000000000000000000000000000000000000000` 请求体的二进制格式如下: 1. 4 字节魔数,为 `'Z', 'P', '\0', '\1'`。 2. 4 字节 Version,当前为 1。 3. 16 字节保留字段,用 `'\0'`填充。 4. 8 字节条目长度(包括哈希长度),长度大于 0,则为 blob,小于 0 则为 metadata(commit/tree),等于 0 表示条目终止。(对于 metadata,其长度写入时,如 X 写入 `uint64(-(X+64))`,读取时,使用 `int64(X)`判断其大小即可。) 5. 16 字节 CRC64(ISO)校验和,不包含其本身。 以下是二进制格式定义: ```cpp struct object_entry { std::byte hash[64]; // object hash std::byte *content; // variable content }; struct push_stream { std::byte magic[4]; // 'Z','P','\0','\1' std::uint32_t version; // VERSION default =1 std::byte reserved[16]; // reserved: full zero std::int64_t entry_length; // entry_length < 0 metadata; entry_length >0 blob; entry_length==0 end object_entry entry; // object hash and content /* ... */ std::uint64_t entry_end; // ==>0000 std::byte crc64_checksum[16]; // 16 byte CRC64 (ISO) checksum }; ``` 推送协议采用 `pktline` 进行编码,用于展示进度以及结果,如果返回了字符串行 `unpack ok\nok branch`则表示分支更新成功。 服务端更新引用需要进行以下判断: + 如果远程的分支/标签不存在,那么 `old revision`则为全零。 + 分支存在是否为保护分支。 + 用户是否有相关权限。 服务端还要具备如下约束: + 更新引用前,元数据/Blob 应当先写入到(如未实现高可用的小文件存储,且以 DB/OSS 为后端) DB/OSS。 在 Push 过程中,服务端会将状态使用 `pktline` 编码进行返回,使用 `pktline` 解码后,为状态 + 信息,关键字如下: | 关键字 | 用途 | | --- | --- | | rate | 表示当前进度 | | unpack | 返回 ok 或者错误信息,意味着 unpack 成功或者失败
格式:
+ 成功:unpack ok
+ 失败:unpack message | | status | 服务端发送的一个状态,用户直接打印出来即可,如果本地是终端,责服务端可以输出彩色状态
格式:status message | | ng | 表示服务端拒绝更新引用。
格式:ng refname reason | | ok | 表示服务端接受更新引用。
格式:ok refname newRev | 可选功能:我们还支持 `push-option` 功能,客户端可以设置 `X-Zeta-Push-Option-Count (ZETA_PUSH_OPTION_COUNT)` 和 `X-Zeta-Push-Option-${N} (ZETA_PUSH_OPTION_${N})` 以传递 `push-option`,平台可以定义一些自定义能力。 ## 四、用户体验补充 在本章,我们将引入一些约定用于提高 zeta 工具和服务端数据传输之间的用户体验。 ### 4.1 区域语言感知 在 HTTP 协议中,拥有标准字段 `Accept-Language`字段,浏览器请求时会将用户本地的语言设置传递到服务端,服务端可以根据用户的设置按照特定的语言返回,我们在实现 HugeSCM 服务端/客户端的时候也可以将本地环境变量的 LANG 解析成 Accept-Language 的字段,发送到服务端,从而按照用户的语言返回特定的信息,针对不同的协议,该传递的信息如下 + HTTP 协议可以解析 `LANG`设置到 `Accept-Language`。 + SSH 协议可以传输环境变量 `LANG`。 ### 4.2 终端感知 客户端可以感知 zeta 是否运行在终端环境中,告知服务端,服务端可以据此是否开启更丰富的输出结果/ + HTTP 协议可以将 `TERM`设置到 `X-Zeta-Terminal`。 + SSH 协议可以传输环境变量 `TERM`。 ================================================ FILE: docs/pull-strategy.md ================================================ # HugeSCM Pull 不同策略说明 在 HugeSCM 中,我们引入了与 git pull 相匹配的策略,如下: 1. **merge** - 合并策略(默认) 2. **rebase** - 变基策略 3. **fast-forward only** - 仅快进策略 三种策略有不同的处理流程,适用于不同的协作场景。 ## 一、Merge 策略 ### 1.1 策略说明 Merge 策略是 HugeSCM 的默认拉取策略。当本地分支相对于远程分支有独立的提交时,会创建一个合并提交(merge commit),将远程分支的变更与本地变更合并。 ### 1.2 工作流程 ``` 远程分支: A --- B --- C --- D \ 本地分支: E --- F 执行 pull --merge 后: 远程分支: A --- B --- C --- D \ \ 本地分支: E --- F --- M (合并提交) ``` ### 1.3 使用场景 - 团队协作开发,多人同时在不同分支工作 - 需要保留完整的分支历史 - 需要清晰看到何时进行了合并 ### 1.4 冲突处理 当本地修改与远程修改冲突时: 1. HugeSCM 会标记冲突文件 2. 用户需要手动解决冲突 3. 解决冲突后执行 `zeta add` 和 `zeta commit` 4. 完成合并后推送变更 冲突标记格式(默认使用 `merge` 风格): ``` <<<<<<< HEAD 本地修改内容 ======= 远程修改内容 >>>>>>> remote ``` ### 1.5 命令示例 ```bash # 默认使用 merge 策略 zeta pull # 显式指定 merge 策略 zeta pull --merge # 指定冲突样式 zeta pull --merge --conflict-style=diff3 ``` --- ## 二、Rebase 策略 ### 2.1 策略说明 Rebase 策略将本地提交"重新应用"到远程分支的最新提交之上,保持线性历史,避免产生合并提交。 ### 2.2 工作流程 ``` 远程分支: A --- B --- C --- D \ 本地分支: E --- F 执行 pull --rebase 后: 远程分支: A --- B --- C --- D --- E' --- F' ↑ 重新应用的提交 ``` ### 2.3 使用场景 - 保持提交历史的线性,便于理解 - 避免不必要的合并提交 - 代码审查时历史更清晰 ### 2.4 注意事项 - **不要对已推送的提交执行 rebase**:这会改变提交历史,影响其他协作者 - Rebase 会重写提交哈希,原始提交将无法直接访问 - 冲突需要逐个提交解决 ### 2.5 冲突处理 Rebase 过程中遇到冲突: 1. HugeSCM 会暂停 rebase 过程 2. 用户解决当前提交的冲突 3. 执行 `zeta add` 标记冲突已解决 4. 执行 `zeta rebase --continue` 继续 rebase 5. 或执行 `zeta rebase --abort` 放弃 rebase ### 2.6 命令示例 ```bash # 使用 rebase 策略拉取 zeta pull --rebase # 自动暂存本地修改后 rebase zeta pull --rebase --autostash ``` --- ## 三、Fast-forward Only 策略 ### 3.1 策略说明 Fast-forward Only 策略仅在可以进行快进合并时执行合并。如果本地分支有独立提交(即无法快进),则拒绝合并。 ### 3.2 工作流程 **可以快进的情况:** ``` 远程分支: A --- B --- C --- D \ 本地分支: C 执行 pull --ff-only 后: 本地分支: A --- B --- C --- D (快进到 D) ``` **无法快进的情况:** ``` 远程分支: A --- B --- C --- D \ 本地分支: E 执行 pull --ff-only 后: 报错:无法快进合并,操作被拒绝 ``` ### 3.3 使用场景 - 需要严格保持线性历史 - 禁止在本地进行独立开发 - CI/CD 环境中确保干净的合并 ### 3.4 与 --ff 的区别 | 选项 | 可快进时 | 不可快进时 | |-----|---------|-----------| | `--ff` | 执行快进 | 执行合并(创建合并提交) | | `--ff-only` | 执行快进 | 拒绝合并,报错退出 | ### 3.5 命令示例 ```bash # 仅允许快进合并 zeta pull --ff-only # 组合使用 zeta pull --ff-only --autostash ``` --- ## 四、策略对比 | 特性 | Merge | Rebase | Fast-forward Only | |-----|-------|--------|------------------| | 历史类型 | 非线性 | 线性 | 线性 | | 合并提交 | 产生 | 不产生 | 不产生 | | 冲突处理 | 一次解决 | 逐提交解决 | 不适用 | | 适用场景 | 团队协作 | 个人分支 | 严格流程 | | 历史可读性 | 完整但复杂 | 清晰 | 最清晰 | | 安全性 | 高 | 中(可能改写历史) | 高 | --- ## 五、配置 ### 5.1 设置默认策略 可以通过配置设置默认的拉取策略: ```bash # 设置默认使用 rebase 策略 zeta config pull.rebase true # 设置默认仅使用快进合并 zeta config pull.ff only ``` ### 5.2 Autostash 配置 自动暂存本地修改,在 pull 完成后恢复: ```bash # 启用 autostash zeta config pull.autostash true ``` ### 5.3 冲突样式配置 配置合并时的冲突标记样式: ```bash # 可选值: merge, diff3, zdiff3 zeta config merge.conflictStyle diff3 ``` **diff3 样式示例:** ``` <<<<<<< HEAD 本地修改 ||||||| 基准版本 原始内容 ======= 远程修改 >>>>>>> remote ``` --- ## 六、最佳实践 ### 6.1 团队协作推荐 ``` 1. 在共享分支上使用 merge 或 ff-only 2. 个人特性分支使用 rebase 保持整洁 3. 已推送的提交不要 rebase ``` ### 6.2 工作流建议 **Git Flow 风格:** - 主分支:使用 `--ff-only` - 开发分支:使用 `--merge` - 特性分支:rebase 到开发分支 **GitHub Flow 风格:** - 主分支:使用 `--ff-only` - 特性分支:通过 PR 合并 ### 6.3 常见问题 **Q: pull 失败提示 "cannot fast-forward"** A: 本地有未推送的提交,且远程分支有新提交。选择: - 使用 `--merge` 创建合并提交 - 使用 `--rebase` 变基本地提交 **Q: rebase 过程中想放弃怎么办?** A: 执行 `zeta rebase --abort` 恢复到 rebase 前的状态。 **Q: 如何查看当前分支与远程分支的差异?** A: 执行 `zeta log HEAD..@{u}` 查看远程领先的提交。 --- ## 七、与 Git 的兼容性 HugeSCM 的 pull 策略设计与 Git 保持一致,熟悉 Git 的用户可以无缝切换: | Git 命令 | HugeSCM 命令 | |---------|-------------| | `git pull` | `zeta pull` | | `git pull --rebase` | `zeta pull --rebase` | | `git pull --ff-only` | `zeta pull --ff-only` | | `git pull --no-rebase` | `zeta pull --merge` | 主要差异在于 HugeSCM 是集中式架构,pull 操作从远程获取指定分支的数据,而非全量获取所有远程分支。 ================================================ FILE: docs/sparse-checkout.md ================================================ # HugeSCM 稀疏检出 稀疏检出(Sparse Checkout)允许用户只检出存储库中的部分目录,而非完整的工作区。这对于巨型存储库特别有用,可以显著减少本地存储空间和检出时间。 ## 一、概述 ### 1.1 什么是稀疏检出 在传统的版本控制系统中,检出(checkout/clone)意味着获取存储库的完整内容。但在巨型存储库中,这往往是不必要的: - AI 模型存储库可能包含多个模型的多个版本 - 游戏存储库可能包含大量美术资源 - 单体仓库可能包含多个子项目 稀疏检出允许用户只获取需要的目录,而不是整个存储库。 ### 1.2 HugeSCM 稀疏检出的优势 | 特性 | 说明 | |------|------| | 按需获取 | 仅下载指定目录的元数据和文件 | | 节省空间 | 大幅减少本地磁盘占用 | | 快速检出 | 减少网络传输,加快检出速度 | | 冲突处理 | 自动处理文件名大小写冲突 | ## 二、基本用法 ### 2.1 检出时指定目录 使用 `checkout` 命令的 `-s` 或 `--sparse` 选项: ```bash # 检出单个目录 zeta checkout http://zeta.example.io/group/repo myrepo -s src/core # 检出多个目录 zeta checkout http://zeta.example.io/group/repo myrepo -s src/core -s src/utils # 使用简写 zeta co http://zeta.example.io/group/repo myrepo -s dir1 ``` ### 2.2 查看当前稀疏配置 ```bash # 查看稀疏检出配置 zeta config core.sparse # 查看配置文件 cat .zeta/zeta.toml ``` ### 2.3 修改稀疏配置 修改稀疏配置需要通过修改配置文件实现: ```bash # 修改配置文件中的 core.sparse 项 # 编辑 .zeta/zeta.toml 文件: # [core] # sparse = ["src/core", "src/utils", "src/newdir"] ``` ### 2.4 应用稀疏配置 修改配置后,重新检出或切换分支来应用: ```bash # 切换到其他分支再切回来 zeta switch other-branch zeta switch mainline # 或者恢复工作区 zeta restore . ``` ## 三、命令详解 ### 3.1 checkout 命令的稀疏选项 ```bash zeta checkout [options] [] 稀疏相关选项: -s, --sparse=,... 指定稀疏检出的目录(可多次使用) -L, --limit= 限制检出文件大小 --one 逐一检出模式 ``` ### 3.2 完整选项 | 选项 | 说明 | |------|------| | `-b, --branch=` | 检出后创建指定分支 | | `-t, --tag=` | 检出特定标签 | | `--commit=` | 检出特定提交 | | `-s, --sparse=` | 稀疏检出目录 | | `-L, --limit=` | 限制检出文件大小 | | `--depth=` | 浅表检出深度 | | `--one` | 逐一检出大文件 | | `--batch` | 批量检出文件 | | `--snapshot` | 检出不可编辑的快照 | | `--quiet` | 静默模式 | ## 四、配置文件 ### 4.1 稀疏配置存储 稀疏配置存储在 `.zeta/zeta.toml` 文件中: ```toml [core] remote = "https://zeta.example.io/group/repo" sparse = ["src/core", "src/utils"] compression-algo = "zstd" ``` ### 4.2 配置格式说明 - `sparse` 是一个字符串数组 - 每个元素是一个目录路径(相对于仓库根目录) - 路径不需要以 `/` 开头 ## 五、实现原理 ### 5.1 Matcher 接口 在 HugeSCM 中,我们引入了 `noder.Matcher` 接口来实现稀疏匹配: ```go type Matcher interface { Len() int Match(name string) (Matcher, bool) } type sparseTreeMatcher struct { entries map[string]*sparseTreeMatcher } func (m *sparseTreeMatcher) Len() int { return len(m.entries) } func (m *sparseTreeMatcher) Match(name string) (Matcher, bool) { sm, ok := m.entries[name] return sm, ok } func (m *sparseTreeMatcher) insert(p string) { dv := strengthen.StrSplitSkipEmpty(p, '/', 10) current := m for _, d := range dv { e, ok := current.entries[d] if !ok { e = &sparseTreeMatcher{entries: make(map[string]*sparseTreeMatcher)} current.entries[d] = e } current = e } } func NewSparseTreeMatcher(dirs []string) Matcher { root := &sparseTreeMatcher{entries: make(map[string]*sparseTreeMatcher)} for _, d := range dirs { root.insert(d) } return root } ``` ### 5.2 匹配策略 稀疏检出的匹配策略: 1. 将路径转为 `noder.Matcher` 2. 从 root tree 开始匹配 3. 对于非 tree 对象则检出 4. tree 对象如果未匹配上,则跳过 5. 匹配到则使用其子 Matcher 6. 如果子 Matcher 为 nil 或长度为 0,则跳过匹配,检出所有子条目 ### 5.3 不可变对象机制 HugeSCM 使用 index 机制创建提交,为支持全功能稀疏检出,引入了**不可变对象**的概念: - 将稀疏树的排除目录作为不可变条目 - 在写入 tree 时合并这些条目 - 保证提交时包含完整的目录结构 ### 5.4 文件名大小写冲突处理 在 Windows/macOS 系统上,文件系统忽略文件名大小写,可能导致同名文件冲突: ``` src/File.txt src/file.txt # Windows/macOS 上会冲突 ``` HugeSCM 的解决方案: 1. 检测同名冲突文件 2. 将冲突路径视为不可变、不可见对象 3. 在 Windows/macOS 上不检出这些文件 4. 避免数据丢失问题 ## 六、使用场景 ### 6.1 AI 模型开发 ```bash # 只检出特定模型的目录 zeta co http://zeta.example.io/ai/models mymodels -s gpt-4 -s bert # 只检出训练脚本,不检出模型文件 zeta co http://zeta.example.io/ai/project myproject -s scripts -s configs ``` ### 6.2 单体仓库开发 ```bash # 只检出自己负责的子项目 zeta co http://zeta.example.io/mono monorepo -s services/auth -s libs/common ``` ### 6.3 文档贡献 ```bash # 只检出文档目录 zeta co http://zeta.example.io/project proj -s docs -s README.md ``` ### 6.4 CI/CD 构建 ```bash # 只检出构建所需的目录 zeta co http://zeta.example.io/project proj -s src -s build -s package.json ``` ## 七、与 Git 的差异 ### 7.1 Git 稀疏检出 ```bash # Git 需要多步操作 git clone --filter=blob:none --sparse http://example.io/repo cd repo git sparse-checkout init --cone git sparse-checkout set dir1 dir2 ``` ### 7.2 HugeSCM 稀疏检出 ```bash # HugeSCM 一条命令搞定 zeta co http://zeta.example.io/repo myrepo -s dir1 -s dir2 ``` ### 7.3 主要差异 | 特性 | Git | HugeSCM | |-----|-----|---------| | 配置复杂度 | 多步操作 | 一条命令 | | 服务端支持 | 部分过滤 | 原生支持 | | 元数据获取 | 全量 | 按需 | | 大小写冲突 | 无处理 | 自动处理 | | 子命令 | `sparse-checkout add/set/list` | 通过配置修改 | ## 八、最佳实践 ### 8.1 初始检出 ```bash # 建议:先稀疏检出,再按需添加目录 zeta co http://zeta.example.io/repo myrepo -s src/core # 后续如需添加目录,修改配置文件后重新检出 # 编辑 .zeta/zeta.toml 添加目录 # 然后执行 switch 或 restore ``` ### 8.2 配合按需获取 ```bash # 稀疏检出 + 按需获取 zeta co http://zeta.example.io/repo myrepo -s src --limit=0 # 需要特定文件时再检出 zeta checkout -- path/to/file ``` ### 8.3 避免频繁修改 频繁修改稀疏配置会导致: - 频繁的网络请求 - 工作区文件的删除和下载 建议: - 初始时规划好需要的目录 - 批量修改后再应用 ## 九、故障排查 ### 9.1 文件未检出 ```bash # 检查稀疏配置 zeta config core.sparse # 确认目录是否在配置中 # 如不在,修改配置后重新检出 ``` ### 9.2 配置不生效 ```bash # 检查配置文件 cat .zeta/zeta.toml # 确认配置格式正确 ``` ### 9.3 稀疏配置丢失 ```bash # 检查配置文件是否正确 zeta config core.sparse # 重新设置 zeta config core.sparse '["dir1", "dir2"]' ``` ## 十、相关命令 | 命令 | 说明 | |-----|------| | `zeta checkout` | 检出存储库 | | `zeta config` | 查看和修改配置 | | `zeta restore` | 恢复工作区文件 | | `zeta switch` | 切换分支 | | `zeta status` | 查看工作区状态 | ================================================ FILE: docs/stash.md ================================================ # Stash - 暂存工作区修改 `zeta stash` 命令用于暂存工作区和索引的修改,以便在不提交的情况下切换分支或执行其他操作。这对于需要临时保存工作进度的场景非常有用。 ## 一、基本概念 ### 1.1 什么是 Stash Stash 是一个栈结构,用于临时保存工作区和索引的修改状态。当你需要: - 切换分支但不想提交当前修改 - 暂时处理其他紧急任务 - 在不同分支间共享修改 可以使用 stash 保存当前工作状态。 ### 1.2 Stash 的结构 在 HugeSCM 中,stash 采用类似 Git 的存储策略: ``` stash 存储结构: ┌─────────────────────────────────────┐ │ Stash Entry (stash@{0}) │ ├─────────────────────────────────────┤ │ Index Commit (A) │ ← 暂存区的状态 │ - parents: [HEAD] │ │ - tree: index tree │ ├─────────────────────────────────────┤ │ Worktree Commit (B) │ ← 工作区的状态 │ - parents: [Index Commit, HEAD] │ │ - tree: worktree tree │ └─────────────────────────────────────┘ ``` **工作原理:** 1. 将 index 创建一个提交 A,A 的 parents 为 HEAD,其 tree 为 index 的 tree 2. 创建一个合并提交 B,其父提交是 A 和 HEAD,其 tree 为 worktree 的 tree 这种设计允许 stash 在恢复时正确处理 index 和 worktree 的差异。 ## 二、基本用法 ### 2.1 创建 Stash ```bash # 暂存所有修改(工作区 + 暂存区) zeta stash # 带描述信息 zeta stash save "WIP: 用户认证功能" # 仅暂存已跟踪文件的修改 zeta stash --keep-index # 包含未跟踪的文件 zeta stash --include-untracked # 包含未跟踪和忽略的文件 zeta stash --all ``` ### 2.2 查看 Stash 列表 ```bash # 列出所有 stash zeta stash list # 输出示例: # stash@{0}: On mainline: WIP: 用户认证功能 # stash@{1}: WIP on feature: 数据导入优化 # stash@{2}: On mainline: 临时保存 ``` ### 2.3 查看 Stash 详情 ```bash # 查看 stash 的详细变更 zeta stash show stash@{0} # 查看完整 diff zeta stash show -p stash@{0} ``` ### 2.4 应用 Stash ```bash # 应用最近的 stash(不删除) zeta stash apply # 应用指定的 stash zeta stash apply stash@{2} # 应用并从列表中删除 zeta stash pop # 应用指定的 stash 并删除 zeta stash pop stash@{2} ``` ### 2.5 删除 Stash ```bash # 删除指定的 stash zeta stash drop stash@{0} # 删除所有 stash zeta stash clear ``` ## 三、命令选项 ### 3.1 stash save 选项 | 选项 | 说明 | |-----|------| | `-p, --patch` | 交互式选择要暂存的修改 | | `-k, --keep-index` | 保持暂存区不变 | | `-u, --include-untracked` | 包含未跟踪文件 | | `-a, --all` | 包含未跟踪和忽略的文件 | | `-m, --message ` | 添加描述信息 | ### 3.2 stash apply/pop 选项 | 选项 | 说明 | |-----|------| | `--index` | 恢复暂存区状态 | ## 四、Stash 恢复流程 ### 4.1 正常恢复 当 HEAD 未改变时,stash 可以完美恢复: ``` 保存时状态: HEAD: commit A index: 修改 X worktree: 修改 X + 修改 Y 恢复后: HEAD: commit A (未变) index: 修改 X worktree: 修改 X + 修改 Y ``` ### 4.2 HEAD 改变后的恢复 如果 HEAD 在保存 stash 后发生了变化: ``` 保存时: HEAD: commit A stash: 修改 X 切换分支后: HEAD: commit B 恢复 stash: 尝试合并修改 X 到 commit B - 无冲突:成功恢复 - 有冲突:需要手动解决 ``` ### 4.3 冲突处理 当 stash pop/apply 产生冲突时: ``` $ zeta stash pop 错误:stash 恢复时产生冲突 CONFLICT (content): Merge conflict in src/auth.go # 解决冲突 # 编辑冲突文件... # 标记冲突已解决 zeta add src/auth.go # stash 会自动从列表中移除(pop 时) # 或手动删除(apply 时) zeta stash drop ``` ### 4.4 恢复暂存区状态 默认情况下,`stash apply` 不会恢复暂存区状态。使用 `--index` 选项: ```bash # 同时恢复暂存区状态 zeta stash apply --index # 如果 HEAD 改变,暂存区恢复可能失败 # 此时可以先恢复工作区,再手动 add ``` ## 五、使用场景 ### 5.1 临时切换分支 ```bash # 场景:在 feature 分支工作,需要紧急修复 mainline 的 bug # 保存当前工作 zeta stash save "WIP: 功能开发中" # 切换到 mainline zeta switch mainline zeta pull # 修复 bug zeta add . zeta commit -m "fix: 紧急修复 XXX 问题" zeta push # 返回 feature 分支 zeta switch feature # 恢复工作 zeta stash pop ``` ### 5.2 暂存部分修改 ```bash # 场景:只想暂存部分文件 # 使用 --patch 交互式选择 zeta stash save --patch # 或先 add 想保留的文件,再 stash --keep-index zeta add file-to-keep.c zeta stash --keep-index ``` ### 5.3 保留未跟踪文件 ```bash # 场景:创建了新文件但还不想提交 # 默认 stash 不包含新文件 zeta stash # 新文件不会被暂存 # 使用 --include-untracked zeta stash --include-untracked # 新文件也会被暂存 ``` ### 5.4 多个 Stash 管理 ```bash # 创建多个 stash zeta stash save "功能 A 开发中" zeta stash save "功能 B 实验性修改" # 查看列表 zeta stash list # 应用特定的 stash zeta stash apply stash@{1} ``` ## 六、与 Git 的兼容性 HugeSCM 的 stash 功能与 Git 基本兼容: | Git 命令 | HugeSCM 命令 | 说明 | |---------|-------------|------| | `git stash` | `zeta stash` | 功能相同 | | `git stash list` | `zeta stash list` | 功能相同 | | `git stash pop` | `zeta stash pop` | 功能相同 | | `git stash apply` | `zeta stash apply` | 功能相同 | | `git stash drop` | `zeta stash drop` | 功能相同 | | `git stash clear` | `zeta stash clear` | 功能相同 | ## 七、最佳实践 ### 7.1 使用描述性消息 ```bash # 不推荐 zeta stash # 推荐 zeta stash save "WIP: 用户认证模块,缺少密码验证" ``` ### 7.2 及时清理 ```bash # 定期检查 stash 列表 zeta stash list # 删除不再需要的 stash zeta stash drop stash@{n} ``` ### 7.3 避免长期存储 Stash 是临时存储机制,不应长期保存重要修改: ```bash # 如果修改很重要,应该创建临时分支 zeta switch -c temp/save-work zeta add . zeta commit -m "临时保存" zeta switch original-branch ``` ### 7.4 使用 pop 而非 apply ```bash # apply 保留 stash 在列表中 zeta stash apply # 需要手动 drop # pop 自动删除 zeta stash pop # 推荐使用 ``` ## 八、故障排查 ### 8.1 Stash 恢复冲突 ``` $ zeta stash pop CONFLICT (content): Merge conflict in file.c Automatic merge failed; fix conflicts and then commit the result. ``` 解决方案: ```bash # 查看冲突 zeta status # 编辑冲突文件解决冲突 # ... # 标记已解决 zeta add file.c # stash pop 失败时 stash 不会被删除 # 解决冲突后手动删除 zeta stash drop ``` ### 8.2 暂存区恢复失败 ``` $ zeta stash apply --index 错误:无法恢复暂存区状态 ``` 解决方案: ```bash # 不恢复暂存区 zeta stash apply # 手动 add 需要暂存的文件 zeta add ``` ### 8.3 Stash 列表丢失 Stash 存储在 `refs/stash` 引用中: ```bash # 检查 stash 引用 cat .zeta/refs/stash # 如果不小心删除了 stash 引用 # 可以在 packed-refs 或 reflog 中查找 ``` ## 九、内部实现 ### 9.1 Stash 引用存储 Stash 使用 `refs/stash` 引用存储最新的 stash entry,每个 entry 的 parent 指向之前的 stash: ``` stash@{0} ← refs/stash │ └── parent → stash@{1} │ └── parent → stash@{2} │ └── ... ``` ### 9.2 Stash Entry 结构 ``` Stash Entry (提交 B - Worktree State) ├── parent 1: Index Commit (提交 A) ├── parent 2: HEAD Commit ├── tree: 完整的 worktree tree └── message: stash 描述信息 Index Commit (提交 A) ├── parent: HEAD Commit ├── tree: index tree └── (无 message) ``` ### 9.3 恢复算法 ``` 1. 读取 stash entry 的两个 parent 2. 计算 HEAD 与 stash worktree commit 的差异 3. 应用差异到当前工作区 4. 如果指定 --index: a. 计算 HEAD 与 index commit 的差异 b. 恢复暂存区状态 ``` ## 十、相关命令 | 命令 | 说明 | |-----|------| | `zeta status` | 查看工作区状态 | | `zeta add` | 添加修改到暂存区 | | `zeta reset` | 重置暂存区 | | `zeta switch` | 切换分支 | | `zeta commit` | 提交修改 | ================================================ FILE: docs/switch.md ================================================ # Switch - 切换分支和提交 `zeta switch` 命令用于切换工作区到不同的分支或提交。与 Git 的 `git switch` / `git checkout` 类似,但针对 HugeSCM 的集中式架构进行了优化。 ## 一、基本用法 ### 1.1 切换分支 ```bash # 切换到已存在的本地分支 zeta switch feature-branch # 切换到远程分支(自动创建本地跟踪分支) zeta switch origin/feature-branch ``` ### 1.2 创建并切换分支 ```bash # 从当前分支创建新分支并切换 zeta switch -c new-feature # 从指定提交创建新分支 zeta switch -c new-feature abc123 # 从远程分支创建本地分支 zeta switch -c new-feature origin/mainline ``` ### 1.3 切换到特定提交 ```bash # 切换到特定提交(分离 HEAD 状态) zeta switch abc123def456... # 使用短哈希 zeta switch abc123 ``` ### 1.4 切换到标签 ```bash # 切换到标签(分离 HEAD 状态) zeta switch v1.0.0 ``` ## 二、命令选项 | 选项 | 说明 | |-----|------| | `-c, --create ` | 创建新分支并切换 | | `-C, --force-create ` | 强制创建分支(覆盖已存在的分支) | | `-d, --detach` | 切换到提交时强制进入分离 HEAD 状态 | | `--discard-changes` | 丢弃本地未提交的修改 | | `-f, --force` | 强制切换(等同于 --discard-changes) | | `-m, --merge` | 切换时合并本地修改到目标分支(默认开启) | | `--no-merge` | 禁用合并模式 | | `--orphan` | 创建孤儿分支 | | `--remote` | 当分支不存在时尝试从远程获取 | | `-L, --limit ` | 限制检出文件大小 | | `--quiet` | 静默模式 | ## 三、切换行为详解 ### 3.1 正常切换 当工作区干净或本地修改与目标分支无冲突时: ``` 当前分支: mainline (有未提交修改) 目标分支: feature (与修改无冲突) 执行: zeta switch feature 结果: 成功切换,本地修改保留 ``` ### 3.2 有冲突的切换 当本地修改与目标分支有冲突时: ```bash # 方式一:强制切换,丢弃本地修改 zeta switch --force feature # 方式二:合并本地修改到目标分支 zeta switch --merge feature # 方式三:暂存修改后切换 zeta stash zeta switch feature zeta stash pop ``` ### 3.3 分离 HEAD 状态 切换到特定提交或标签时,进入分离 HEAD 状态: ``` $ zeta switch abc123 注意:您正处于分离 HEAD 状态。 您可以查看、进行实验性修改并提交,这些更改不会影响任何分支。 如果您想以当前状态创建新分支,请使用: zeta switch -c <新分支名> ``` 在分离 HEAD 状态下的提交不会被任何分支引用,切换到其他分支后可能丢失。建议: ```bash # 在分离 HEAD 状态下创建新分支保存工作 zeta switch -c my-work ``` ## 四、分支创建 ### 4.1 从当前分支创建 ```bash # 从当前 HEAD 创建新分支 zeta switch -c feature-123 # 等价于 zeta branch feature-123 zeta switch feature-123 ``` ### 4.2 从指定起点创建 ```bash # 从指定提交创建 zeta switch -c feature-123 abc123 # 从远程分支创建 zeta switch -c feature-123 origin/mainline # 从标签创建 zeta switch -c v1.0-hotfix v1.0.0 ``` ### 4.3 强制创建/覆盖 ```bash # 覆盖已存在的分支 zeta switch -C existing-branch origin/mainline ``` ## 五、与 Git 的差异 ### 5.1 远程分支处理 **Git:** ```bash git switch origin/feature # 进入分离 HEAD 状态 ``` **HugeSCM:** ```bash zeta switch origin/feature # 自动创建本地跟踪分支 feature ``` HugeSCM 由于是集中式架构,切换到远程分支会自动创建本地分支。 ### 5.2 数据获取 **Git:** 需要先 `git fetch` 获取远程数据才能切换到远程分支。 **HugeSCM:** 切换时会自动从服务端获取所需的元数据和对象,无需手动 fetch。 ### 5.3 网络依赖 HugeSCM 的 switch 操作需要网络连接(除非目标分支数据已完整缓存)。 ## 六、常见场景 ### 6.1 开始新功能开发 ```bash # 从主分支创建新功能分支 zeta switch mainline zeta pull zeta switch -c feature/new-feature ``` ### 6.2 切换到同事的分支 ```bash # 直接切换,自动获取数据 zeta switch origin/colleague-feature ``` ### 6.3 回退到历史版本 ```bash # 切换到指定提交查看历史状态 zeta switch abc123 # 创建分支保存修改 zeta switch -c hotfix-branch ``` ### 6.4 放弃当前修改 ```bash # 丢弃所有未提交的修改 zeta switch --force HEAD ``` ## 七、最佳实践 ### 7.1 切换前检查状态 ```bash # 查看当前状态 zeta status # 如果有未提交的修改 zeta stash # 暂存修改 zeta switch ... # 切换分支 zeta stash pop # 恢复修改 ``` ### 7.2 分支命名规范 ```bash # 推荐使用规范的分支前缀 zeta switch -c feature/user-authentication zeta switch -c bugfix/login-error zeta switch -c release/v1.0.0 zeta switch -c hotfix/security-patch ``` ### 7.3 避免长时间处于分离 HEAD 状态 ```bash # 不推荐:在分离 HEAD 状态下工作 zeta switch abc123 # ... 进行修改和提交(可能丢失) # 推荐:立即创建分支 zeta switch abc123 zeta switch -c my-work ``` ## 八、故障排查 ### 8.1 切换失败:本地修改冲突 ``` 错误:本地修改与目标分支冲突,无法切换 ``` 解决方案: ```bash # 方案一:暂存修改 zeta stash zeta switch zeta stash pop # 方案二:丢弃修改 zeta switch --force # 方案三:尝试合并 zeta switch --merge ``` ### 8.2 切换失败:分支不存在 ``` 错误:分支 'feature' 不存在 ``` 解决方案: ```bash # 检查远程分支 zeta branch -r # 如果远程存在,使用完整名称 zeta switch origin/feature ``` ### 8.3 网络错误 ``` 错误:无法连接到远程服务器 ``` 解决方案: ```bash # 检查网络连接 ping zeta.example.io # 检查远程配置 zeta config core.remote # 如果数据已缓存,可尝试离线模式 ZETA_OFFLINE=1 zeta switch ``` ## 九、相关命令 | 命令 | 说明 | |-----|------| | `zeta branch` | 列出、创建、删除分支 | | `zeta checkout` | switch 的别名 | | `zeta stash` | 暂存工作区修改 | | `zeta status` | 查看工作区状态 | | `zeta log` | 查看提交历史 | ================================================ FILE: docs/version-negotiation.md ================================================ # 版本协商备忘录 本文档描述 HugeSCM 的版本协商机制,包括分支基线、检出、拉取、合并和推送等核心操作的流程。 ## 一、分支基线 ### 1.1 基线概念 在 HugeSCM 客户端,我们存在一个**分支基线(Baseline)**的概念。这个基线标记了存储库从远程存储库的某个提交开始向前发展,计算对象变更时会从基线开始计算。对于多个分支,我们会保留多个基线。 我们在 Fetch/Push 这些阶段严格依赖基线以实现版本协商。 ### 1.2 与 Git Shallow 的对比 这和 Git 类似,Git 浅表克隆+稀疏检出时,会在存储库中保留一个 `shallow` 文件。但该文件是全局的,因此无法对多个分支实现 shallow 控制。在拉取其他分支时,往往也需要依赖此 shallow 文件。除非用户更改,否则 shallow 是不改变的,这样的结果是 Git shallow 克隆的仓库体积还是会随着时间膨胀。 HugeSCM 的改进: - 每个分支独立维护基线 - 支持多分支独立的浅表控制 - 基线可以动态调整 ### 1.3 基线重置 在 Fetch/Push 后,远程分支发生改变后,客户端可以修改分支基线到最新的 commit: ```bash # 拉取时自动更新基线 zeta pull # 获取完整历史 zeta fetch --unshallow # 推送后更新基线 zeta push ``` ### 1.4 基线存储 基线信息存储在 `.zeta/refs/` 目录下: ``` .zeta/ ├── refs/ │ ├── branches/ │ │ └── mainline # 包含 hash 和 baseline │ └── tags/ │ └── v1.0.0 ``` ## 二、检出(Checkout) ### 2.1 检出流程 检出 ==> 拉取 + 重置 在 HugeSCM 中,我们将远程存储库创建到本地的浅表副本,该操作称之为检出(checkout),别名 `co`。 其步骤如下: 1. **初始化存储库本地目录** - 创建工作目录 - 创建 `.zeta` 目录结构 - 生成初始配置文件 2. **获取引用信息** - 使用引用发现协议获取分支/标签信息 - 对于检出特定 commit 的操作,忽略引用发现获得的 commit/peeled commit 3. **获取元数据** - 使用获取的 commit 或特定 commit 获取元数据 - 可指定深度(deepen)和目录(sparse) 4. **拉取对象** - 批量下载 blobs(小文件) - 下载大的 blobs(如有需要) - 对象清点基于第三步获得的对象 5. **重置索引,检出文件** - 更新索引 - 检出文件到工作区 - 设置分支基线 ### 2.2 检出命令 ```bash # 基本检出 zeta checkout http://zeta.example.io/group/repo myrepo # 检出特定分支 zeta checkout http://zeta.example.io/group/repo myrepo -b feature # 检出特定标签 zeta checkout http://zeta.example.io/group/repo myrepo -t v1.0.0 # 检出特定提交 zeta checkout http://zeta.example.io/group/repo myrepo --commit=abc123... # 稀疏检出 zeta checkout http://zeta.example.io/group/repo myrepo -s dir1 -s dir2 # 浅表检出(只获取最近 N 个提交) zeta checkout http://zeta.example.io/group/repo myrepo --depth=1 ``` ### 2.3 检出选项 | 选项 | 说明 | |-----|------| | `-b, --branch=` | 检出并创建本地分支 | | `-t, --tag=` | 检出特定标签 | | `--commit=` | 检出特定提交 | | `-s, --sparse=` | 稀疏检出目录 | | `--depth=` | 浅表检出深度 | | `-L, --limit=` | 限制检出文件大小 | | `--one` | 逐一检出模式 | ## 三、拉取(Pull) ### 3.1 拉取流程 在 HugeSCM 中,从服务端拉取数据的步骤: 1. **获得远程引用信息** - 使用引用发现协议 - 获取远程分支最新提交 2. **下载元数据** - 基于 baseline 参数 - 获取 commit、tree、fragments 等元数据 3. **批量下载 blobs** - 小文件批量下载 - 支持并发下载 4. **下载大的 blobs** - 使用签名 URL 下载 - 支持断点续传 5. **记录引用信息到本地** - 更新本地分支引用 - 更新基线信息 ### 3.2 拉取命令 ```bash # 基本拉取(合并模式) zeta pull # 使用 rebase 策略 zeta pull --rebase # 仅快进合并 zeta pull --ff-only # 获取完整历史 zeta pull --unshallow # 限制文件大小 zeta pull -L 100MB ``` ### 3.3 拉取选项 | 选项 | 说明 | |-----|------| | `--[no-]ff` | 允许快进(默认开启) | | `--ff-only` | 仅允许快进合并 | | `--rebase` | 使用 rebase 策略 | | `--squash` | 创建单个提交而非合并 | | `--unshallow` | 获取完整历史 | | `--one` | 逐一检出大文件 | | `-L, --limit=` | 限制文件大小 | ### 3.4 获取(Fetch) 如果只想获取数据而不合并: ```bash # 获取远程数据 zeta fetch # 获取特定引用 zeta fetch mainline # 获取完整历史 zeta fetch --unshallow # 仅获取标签 zeta fetch --tag ``` Fetch 选项: | 选项 | 说明 | |-----|------| | `--unshallow` | 获取完整历史 | | `-t, --tag` | 下载标签而非分支 | | `-L, --limit=` | 限制文件大小 | | `-f, --force` | 覆盖引用检查 | ## 四、合并(Merge) ### 4.1 合并流程 当本地分支与远程分支有分叉时,需要进行合并: 1. **检测分叉** - 比较本地和远程的提交历史 - 确定共同祖先 2. **三路合并** - 以共同祖先为基准 - 合并本地和远程的变更 3. **冲突处理** - 自动合并可解决的冲突 - 标记需要手动解决的冲突 4. **创建合并提交** - 记录合并结果 - 保持历史完整 ### 4.2 合并命令 ```bash # 合并指定分支 zeta merge feature # 合并并编辑提交信息 zeta merge feature -m "Merge feature" # 快进合并(默认) zeta merge feature --ff # 仅快进合并 zeta merge feature --ff-only # 强制创建合并提交 zeta merge feature --no-ff # 创建 squash 提交 zeta merge feature --squash # 中止合并 zeta merge --abort # 继续合并(解决冲突后) zeta merge --continue ``` ### 4.3 冲突解决 当合并产生冲突时: ```bash # 查看冲突文件 zeta status # 编辑冲突文件 # 解决冲突标记: # <<<<<<< HEAD # 本地修改 # ======= # 远程修改 # >>>>>>> feature # 标记冲突已解决 zeta add # 继续合并 zeta merge --continue ``` ### 4.4 冲突样式 可通过配置设置冲突标记样式: ```bash # merge 样式(默认) zeta config merge.conflictStyle merge # diff3 样式(显示基准版本) zeta config merge.conflictStyle diff3 # zdiff3 样式(压缩的 diff3) zeta config merge.conflictStyle zdiff3 ``` ### 4.5 合并选项 | 选项 | 说明 | |-----|------| | `--[no-]ff` | 允许快进(默认开启) | | `--ff-only` | 仅快进合并 | | `--squash` | 创建单个提交 | | `--allow-unrelated-histories` | 允许合并不相关历史 | | `-m, --message=` | 合并提交信息 | | `--abort` | 中止合并 | | `--continue` | 继续合并 | ## 五、推送(Push) ### 5.1 推送流程 将本地变更推送到远程存储库: 1. **对象上传** - 上传新的 blob 对象 - 上传新的元数据对象 2. **引用更新** - 更新远程分支引用 - 验证权限 3. **基线更新** - 更新本地基线信息 ### 5.2 推送前检查 ```bash # 查看待推送的提交 zeta log origin/mainline..HEAD # 查看待推送的变更 zeta diff origin/mainline --stat ``` ### 5.3 推送命令 ```bash # 推送当前分支 zeta push # 推送标签 zeta push --tag # 强制推送 zeta push --force # 推送并传递选项 zeta push -o option=value ``` ### 5.4 推送选项 | 选项 | 说明 | |-----|------| | `-t, --tag` | 推送标签 | | `-f, --force` | 强制推送 | | `-o, --push-option=` | 传输选项 | ### 5.5 推送保护 服务端会进行以下检查: - 分支是否存在 - 是否为保护分支 - 用户是否有写权限 - 是否为快进更新(非强制推送) ## 六、版本协商协议 ### 6.1 协议版本 当前协议版本为 `Z1`: - HTTP 请求设置头:`Zeta-Protocol: Z1` - SSH 请求设置环境变量:`ZETA_PROTOCOL=Z1` ### 6.2 基线协商 在 Fetch/Push 时,客户端会发送基线信息: ``` 客户端请求: I have: I want: 服务端响应: 需要发送的对象列表 或 增量元数据 ``` ### 6.3 增量传输 基于基线的增量传输: - **第一次检出**:从空状态获取 commit 及其所有对象 - **后续拉取**:基于 baseline 获取增量对象 - **推送**:发送 baseline 到本地 HEAD 之间的增量对象 ## 七、最佳实践 ### 7.1 定期拉取 ```bash # 建议定期拉取更新 zeta pull --rebase ``` ### 7.2 推送前检查 ```bash # 检查待推送内容 zeta log origin/mainline..HEAD --oneline zeta diff origin/mainline --stat ``` ### 7.3 解决冲突 ```bash # 拉取时产生冲突 zeta pull # 解决冲突... zeta add . zeta commit zeta push ``` ### 7.4 保持基线更新 ```bash # 定期获取更多历史,减少增量传输 zeta fetch --unshallow ``` ## 八、相关文档 | 文档 | 说明 | |------|------| | [protocol.md](protocol.md) | 传输协议规范 | | [pull-strategy.md](pull-strategy.md) | 拉取策略详解 | | [sparse-checkout.md](sparse-checkout.md) | 稀疏检出 | | [switch.md](switch.md) | 分支切换 | ================================================ FILE: docs/zeta.toml ================================================ [core] remote = "https://zeta.example.io/group/mono-zeta" # https://git-scm.com/docs/sparse-index sparse-checkout = ["dev/app/client", "dev/modules/basic"] hash-algo = "BLAKE3" compression-algo = "zstd" [user] name = "admin" email = "zeta@example.io" ================================================ FILE: docs/zeta.toml.example ================================================ # Zeta Configuration Example for AI Model Storage [core] remote = "https://zeta.io/your-group/your-repo" compression-algo = "zstd" # Compression algorithm: zstd, lz4, etc. [fragment] # Fragment threshold: files smaller than this won't be fragmented threshold = "1GB" # Target fragment size for chunking size = "1GB" # Enable CDC (Content-Defined Chunking) for AI model files # - SafeTensors files: tensor-level chunking (best deduplication) # - Other formats: CDC fallback for better deduplication # - Values: true/false (Boolean type, supports config merge) # - Default: false (use fixed-size chunking) enable_cdc = true # Recommended configurations for different scenarios: # Small models (<10GB) # threshold = "512MB" # size = "512MB" # enable_cdc = true # Large models (10-100GB) # threshold = "1GB" # size = "1GB" # enable_cdc = true # Huge models (>100GB) # threshold = "2GB" # size = "2GB" # enable_cdc = true # Frequent iterations # threshold = "512MB" # size = "512MB" # enable_cdc = true # Mixed file types # threshold = "1GB" # size = "1GB" # enable_cdc = true # Auto-detects format and chooses best strategy ================================================ FILE: go.mod ================================================ module github.com/antgroup/hugescm go 1.26 require ( charm.land/bubbles/v2 v2.1.0 charm.land/bubbletea/v2 v2.0.6 charm.land/glamour/v2 v2.0.0 charm.land/huh/v2 v2.0.3 charm.land/lipgloss/v2 v2.0.3 github.com/ProtonMail/go-crypto v1.4.1 github.com/alecthomas/chroma/v2 v2.24.1 github.com/charmbracelet/x/ansi v0.11.7 github.com/charmbracelet/x/exp/charmtone v0.0.0-20260511125431-fe5d686e0c99 github.com/clipperhouse/displaywidth v0.11.0 github.com/dgraph-io/ristretto/v2 v2.4.0 github.com/ebitengine/purego v0.10.0 github.com/emirpasic/gods v1.18.1 github.com/gliderlabs/ssh v0.3.8 github.com/go-sql-driver/mysql v1.10.0 github.com/godbus/dbus/v5 v5.2.2 github.com/golang-jwt/jwt/v5 v5.3.1 github.com/google/go-cmp v0.7.0 github.com/gorilla/mux v1.8.1 github.com/klauspost/compress v1.18.6 github.com/klauspost/cpuid/v2 v2.3.0 github.com/pelletier/go-toml/v2 v2.3.1 github.com/sirupsen/logrus v1.9.4 github.com/zeebo/blake3 v0.2.4 github.com/zeebo/xxh3 v1.1.0 golang.org/x/crypto v0.51.0 golang.org/x/net v0.54.0 golang.org/x/sync v0.20.0 golang.org/x/sys v0.44.0 golang.org/x/term v0.43.0 golang.org/x/text v0.37.0 ) require ( filippo.io/edwards25519 v1.2.0 // indirect github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect github.com/atotto/clipboard v0.1.4 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/catppuccin/go v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/charmbracelet/colorprofile v0.4.3 // indirect github.com/charmbracelet/harmonica v0.2.0 // indirect github.com/charmbracelet/ultraviolet v0.0.0-20260511121909-c840852527f3 // indirect github.com/charmbracelet/x/exp/ordered v0.1.0 // indirect github.com/charmbracelet/x/exp/slice v0.0.0-20260511125431-fe5d686e0c99 // indirect github.com/charmbracelet/x/exp/strings v0.1.0 // indirect github.com/charmbracelet/x/term v0.2.2 // indirect github.com/charmbracelet/x/termios v0.1.1 // indirect github.com/charmbracelet/x/windows v0.2.2 // indirect github.com/clipperhouse/uax29/v2 v2.7.0 // indirect github.com/cloudflare/circl v1.6.3 // indirect github.com/dlclark/regexp2 v1.12.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-runewidth v0.0.23 // indirect github.com/microcosm-cc/bluemonday v1.0.27 // indirect github.com/mitchellh/hashstructure/v2 v2.0.2 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect github.com/yuin/goldmark v1.8.2 // indirect github.com/yuin/goldmark-emoji v1.0.6 // indirect ) ================================================ FILE: go.sum ================================================ charm.land/bubbles/v2 v2.1.0 h1:YSnNh5cPYlYjPxRrzs5VEn3vwhtEn3jVGRBT3M7/I0g= charm.land/bubbles/v2 v2.1.0/go.mod h1:l97h4hym2hvWBVfmJDtrEHHCtkIKeTEb3TTJ4ZOB3wY= charm.land/bubbletea/v2 v2.0.6 h1:UHN/91OyuhaOFGSrBXQ/hMZD8IO1Uc4BvHlgHXL2WJo= charm.land/bubbletea/v2 v2.0.6/go.mod h1:MH/D8ZLlN3op37vQvijKuU29g3rqTp+aQapURFonF9g= charm.land/glamour/v2 v2.0.0 h1:IDBoqLEy7Hdpb9VOXN+khLP/XSxtJy1VsHuW/yF87+U= charm.land/glamour/v2 v2.0.0/go.mod h1:kjq9WB0s8vuUYZNYey2jp4Lgd9f4cKdzAw88FZtpj/w= charm.land/huh/v2 v2.0.3 h1:2cJsMqEPwSywGHvdlKsJyQKPtSJLVnFKyFbsYZTlLkU= charm.land/huh/v2 v2.0.3/go.mod h1:93eEveeeqn47MwiC3tf+2atZ2l7Is88rAtmZNZ8x9Wc= charm.land/lipgloss/v2 v2.0.3 h1:yM2zJ4Cf5Y51b7RHIwioil4ApI/aypFXXVHSwlM6RzU= charm.land/lipgloss/v2 v2.0.3/go.mod h1:7myLU9iG/3xluAWzpY/fSxYYHCgoKTie7laxk6ATwXA= filippo.io/edwards25519 v1.2.0 h1:crnVqOiS4jqYleHd9vaKZ+HKtHfllngJIiOpNpoJsjo= filippo.io/edwards25519 v1.2.0/go.mod h1:xzAOLCNug/yB62zG1bQ8uziwrIqIuxhctzJT18Q77mc= github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/ProtonMail/go-crypto v1.4.1 h1:9RfcZHqEQUvP8RzecWEUafnZVtEvrBVL9BiF67IQOfM= github.com/ProtonMail/go-crypto v1.4.1/go.mod h1:e1OaTyu5SYVrO9gKOEhTc+5UcXtTUa+P3uLudwcgPqo= github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/chroma/v2 v2.24.1 h1:m5ffpfZbIb++k8AqFEKy9uVgY12xIQtBsQlc6DfZJQM= github.com/alecthomas/chroma/v2 v2.24.1/go.mod h1:l+ohZ9xRXIbGe7cIW+YZgOGbvuVLjMps/FYN/CwuabI= github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs= github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o= github.com/aymanbagabas/go-udiff v0.4.1/go.mod h1:0L9PGwj20lrtmEMeyw4WKJ/TMyDtvAoK9bf2u/mNo3w= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/catppuccin/go v0.3.0 h1:d+0/YicIq+hSTo5oPuRi5kOpqkVA5tAsU6dNhvRu+aY= github.com/catppuccin/go v0.3.0/go.mod h1:8IHJuMGaUUjQM82qBrGNBv7LFq6JI3NnQCF6MOlZjpc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q= github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q= github.com/charmbracelet/harmonica v0.2.0 h1:8NxJWRWg/bzKqqEaaeFNipOu77YR5t8aSwG4pgaUBiQ= github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao= github.com/charmbracelet/ultraviolet v0.0.0-20260511121909-c840852527f3 h1:pxGjlWZFcRQMWAdtjRelpL3Gbu8iYIyuO3Eqbd037Ow= github.com/charmbracelet/ultraviolet v0.0.0-20260511121909-c840852527f3/go.mod h1:SnKWaPaTnkTNXJgdgdquu66de12V8pW/b/qlTGaF9xg= github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI= github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ= github.com/charmbracelet/x/conpty v0.1.1 h1:s1bUxjoi7EpqiXysVtC+a8RrvPPNcNvAjfi4jxsAuEs= github.com/charmbracelet/x/conpty v0.1.1/go.mod h1:OmtR77VODEFbiTzGE9G1XiRJAga6011PIm4u5fTNZpk= github.com/charmbracelet/x/errors v0.0.0-20240508181413-e8d8b6e2de86 h1:JSt3B+U9iqk37QUU2Rvb6DSBYRLtWqFqfxf8l5hOZUA= github.com/charmbracelet/x/errors v0.0.0-20240508181413-e8d8b6e2de86/go.mod h1:2P0UgXMEa6TsToMSuFqKFQR+fZTO9CNGUNokkPatT/0= github.com/charmbracelet/x/exp/charmtone v0.0.0-20260511125431-fe5d686e0c99 h1:79Whx3H/thq9X9I+iqsi7o/pVaI7EhaIWbzB173eHsw= github.com/charmbracelet/x/exp/charmtone v0.0.0-20260511125431-fe5d686e0c99/go.mod h1:nsExn0DGyX0lh9LwLHTn2Gg+hafdzfSXnC+QmEJTZFY= github.com/charmbracelet/x/exp/golden v0.0.0-20250806222409-83e3a29d542f h1:pk6gmGpCE7F3FcjaOEKYriCvpmIN4+6OS/RD0vm4uIA= github.com/charmbracelet/x/exp/golden v0.0.0-20250806222409-83e3a29d542f/go.mod h1:IfZAMTHB6XkZSeXUqriemErjAWCCzT0LwjKFYCZyw0I= github.com/charmbracelet/x/exp/ordered v0.1.0 h1:55/qLwjIh0gL0Vni+QAWk7T/qRVP6sBf+2agPBgnOFE= github.com/charmbracelet/x/exp/ordered v0.1.0/go.mod h1:5UHwmG+is5THxMyCJHNPCn2/ecI07aKNrW+LcResjJ8= github.com/charmbracelet/x/exp/slice v0.0.0-20260511125431-fe5d686e0c99 h1:e4VttUIAVgO4neqnJG80U4BE//1kcvyOrJ5utftPXQE= github.com/charmbracelet/x/exp/slice v0.0.0-20260511125431-fe5d686e0c99/go.mod h1:vqEfX6xzqW1pKKZUUiFOKg0OQ7bCh54Q2vR/tserrRA= github.com/charmbracelet/x/exp/strings v0.1.0 h1:i69S2XI7uG1u4NLGeJPSYU++Nmjvpo9nwd6aoEm7gkA= github.com/charmbracelet/x/exp/strings v0.1.0/go.mod h1:/ehtMPNh9K4odGFkqYJKpIYyePhdp1hLBRvyY4bWkH8= github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk= github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI= github.com/charmbracelet/x/termios v0.1.1 h1:o3Q2bT8eqzGnGPOYheoYS8eEleT5ZVNYNy8JawjaNZY= github.com/charmbracelet/x/termios v0.1.1/go.mod h1:rB7fnv1TgOPOyyKRJ9o+AsTU/vK5WHJ2ivHeut/Pcwo= github.com/charmbracelet/x/windows v0.2.2 h1:IofanmuvaxnKHuV04sC0eBy/smG6kIKrWG2/jYn2GuM= github.com/charmbracelet/x/windows v0.2.2/go.mod h1:/8XtdKZzedat74NQFn0NGlGL4soHB0YQZrETF96h75k= github.com/charmbracelet/x/xpty v0.1.3 h1:eGSitii4suhzrISYH50ZfufV3v085BXQwIytcOdFSsw= github.com/charmbracelet/x/xpty v0.1.3/go.mod h1:poPYpWuLDBFCKmKLDnhBp51ATa0ooD8FhypRwEFtH3Y= github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8= github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0= github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8= github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/ristretto/v2 v2.4.0 h1:I/w09yLjhdcVD2QV192UJcq8dPBaAJb9pOuMyNy0XlU= github.com/dgraph-io/ristretto/v2 v2.4.0/go.mod h1:0KsrXtXvnv0EqnzyowllbVJB8yBonswa2lTCK2gGo9E= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dlclark/regexp2 v1.12.0 h1:0j4c5qQmnC6XOWNjP3PIXURXN2gWx76rd3KvgdPkCz8= github.com/dlclark/regexp2 v1.12.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= github.com/go-sql-driver/mysql v1.10.0 h1:Q+1LV8DkHJvSYAdR83XzuhDaTykuDx0l6fkXxoWCWfw= github.com/go-sql-driver/mysql v1.10.0/go.mod h1:M+cqaI7+xxXGG9swrdeUIoPG3Y3KCkF0pZej+SK+nWk= github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/klauspost/compress v1.18.6 h1:2jupLlAwFm95+YDR+NwD2MEfFO9d4z4Prjl1XXDjuao= github.com/klauspost/compress v1.18.6/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4= github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw= github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc= github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE= github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= github.com/yuin/goldmark-emoji v1.0.6 h1:QWfF2FYaXwL74tfGOW5izeiZepUDroDJfWubQI9HTHs= github.com/yuin/goldmark-emoji v1.0.6/go.mod h1:ukxJDKFpdFb5x0a5HqbdlcKtebh086iJpI31LTKmWuA= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/blake3 v0.2.4 h1:KYQPkhpRtcqh0ssGYcKLG1JYvddkEA8QwCM/yBqhaZI= github.com/zeebo/blake3 v0.2.4/go.mod h1:7eeQ6d2iXWRGF6npfaxl2CU+xy2Fjo2gxeyZGCRUjcE= github.com/zeebo/pcg v1.0.1 h1:lyqfGeWiv4ahac6ttHs+I5hwtH/+1mrhlCtVNQM2kHo= github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4= github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w= golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk= golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= ================================================ FILE: modules/README.md ================================================ ================================================ FILE: modules/base58/LICENSE ================================================ ISC License Copyright (c) 2013-2017 The btcsuite developers Copyright (c) 2016-2017 The Lightning Network Developers Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ================================================ FILE: modules/base58/README.md ================================================ base58 ========== [![Build Status](http://img.shields.io/travis/btcsuite/btcutil.svg)](https://travis-ci.org/btcsuite/btcutil) [![ISC License](http://img.shields.io/badge/license-ISC-blue.svg)](http://copyfree.org) [![GoDoc](https://img.shields.io/badge/godoc-reference-blue.svg)](http://godoc.org/github.com/btcsuite/btcd/btcutil/base58) Package base58 provides an API for encoding and decoding to and from the modified base58 encoding. It also provides an API to do Base58Check encoding, as described [here](https://en.bitcoin.it/wiki/Base58Check_encoding). A comprehensive suite of tests is provided to ensure proper functionality. ## Installation and Updating ```bash $ go get -u github.com/btcsuite/btcd/btcutil/base58 ``` ## Examples * [Decode Example](http://godoc.org/github.com/btcsuite/btcd/btcutil/base58#example-Decode) Demonstrates how to decode modified base58 encoded data. * [Encode Example](http://godoc.org/github.com/btcsuite/btcd/btcutil/base58#example-Encode) Demonstrates how to encode data using the modified base58 encoding scheme. * [CheckDecode Example](http://godoc.org/github.com/btcsuite/btcd/btcutil/base58#example-CheckDecode) Demonstrates how to decode Base58Check encoded data. * [CheckEncode Example](http://godoc.org/github.com/btcsuite/btcd/btcutil/base58#example-CheckEncode) Demonstrates how to encode data using the Base58Check encoding scheme. ## License Package base58 is licensed under the [copyfree](http://copyfree.org) ISC License. ================================================ FILE: modules/base58/alphabet.go ================================================ // Copyright (c) 2015 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. // AUTOGENERATED by genalphabet.go; do not edit. package base58 const ( // alphabet is the modified base58 alphabet used by Bitcoin. alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" alphabetIdx0 = '1' ) var b58 = [256]byte{ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 255, 255, 255, 255, 255, 255, 255, 9, 10, 11, 12, 13, 14, 15, 16, 255, 17, 18, 19, 20, 21, 255, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 255, 255, 255, 255, 255, 255, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 255, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, } ================================================ FILE: modules/base58/base58.go ================================================ // Copyright (c) 2013-2015 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package base58 import ( "math/big" ) //go:generate go run genalphabet.go var bigRadix = [...]*big.Int{ big.NewInt(0), big.NewInt(58), big.NewInt(58 * 58), big.NewInt(58 * 58 * 58), big.NewInt(58 * 58 * 58 * 58), big.NewInt(58 * 58 * 58 * 58 * 58), big.NewInt(58 * 58 * 58 * 58 * 58 * 58), big.NewInt(58 * 58 * 58 * 58 * 58 * 58 * 58), big.NewInt(58 * 58 * 58 * 58 * 58 * 58 * 58 * 58), big.NewInt(58 * 58 * 58 * 58 * 58 * 58 * 58 * 58 * 58), bigRadix10, } var bigRadix10 = big.NewInt(58 * 58 * 58 * 58 * 58 * 58 * 58 * 58 * 58 * 58) // 58^10 func countNumZeros(s string) int { for i := range len(s) { if s[i] != alphabetIdx0 { return i } } return len(s) } // Decode decodes a modified base58 string to a byte slice. func Decode(b string) []byte { answer := big.NewInt(0) scratch := new(big.Int) // Calculating with big.Int is slow for each iteration. // x += b58[b[i]] * j // j *= 58 // // Instead we can try to do as much calculations on int64. // We can represent a 10 digit base58 number using an int64. // // Hence we'll try to convert 10, base58 digits at a time. // The rough idea is to calculate `t`, such that: // // t := b58[b[i+9]] * 58^9 ... + b58[b[i+1]] * 58^1 + b58[b[i]] * 58^0 // x *= 58^10 // x += t // // Of course, in addition, we'll need to handle boundary condition when `b` is not multiple of 58^10. // In that case we'll use the bigRadix[n] lookup for the appropriate power. for t := b; len(t) > 0; { n := min(len(t), 10) total := uint64(0) for _, v := range t[:n] { if v > 255 { return []byte("") } tmp := b58[v] if tmp == 255 { return []byte("") } total = total*58 + uint64(tmp) } answer.Mul(answer, bigRadix[n]) scratch.SetUint64(total) answer.Add(answer, scratch) t = t[n:] } tmpval := answer.Bytes() numZeros := countNumZeros(b) flen := numZeros + len(tmpval) val := make([]byte, flen) copy(val[numZeros:], tmpval) return val } // Encode encodes a byte slice to a modified base58 string. func Encode(b []byte) string { x := new(big.Int) x.SetBytes(b) // maximum length of output is log58(2^(8*len(b))) == len(b) * 8 / log(58) maxlen := int(float64(len(b))*1.365658237309761) + 1 answer := make([]byte, 0, maxlen) mod := new(big.Int) for x.Sign() > 0 { // Calculating with big.Int is slow for each iteration. // x, mod = x / 58, x % 58 // // Instead we can try to do as much calculations on int64. // x, mod = x / 58^10, x % 58^10 // // Which will give us mod, which is 10 digit base58 number. // We'll loop that 10 times to convert to the answer. x.DivMod(x, bigRadix10, mod) if x.Sign() == 0 { // When x = 0, we need to ensure we don't add any extra zeros. m := mod.Int64() for m > 0 { answer = append(answer, alphabet[m%58]) m /= 58 } } else { m := mod.Int64() for range 10 { answer = append(answer, alphabet[m%58]) m /= 58 } } } // leading zero bytes for _, i := range b { if i != 0 { break } answer = append(answer, alphabetIdx0) } // reverse alen := len(answer) for i := range alen / 2 { answer[i], answer[alen-1-i] = answer[alen-1-i], answer[i] } return string(answer) } ================================================ FILE: modules/base58/base58_test.go ================================================ // Copyright (c) 2013-2017 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package base58_test import ( "bytes" "encoding/hex" "testing" "github.com/antgroup/hugescm/modules/base58" ) var stringTests = []struct { in string out string }{ {"", ""}, {" ", "Z"}, {"-", "n"}, {"0", "q"}, {"1", "r"}, {"-1", "4SU"}, {"11", "4k8"}, {"abc", "ZiCa"}, {"1234598760", "3mJr7AoUXx2Wqd"}, {"abcdefghijklmnopqrstuvwxyz", "3yxU3u1igY8WkgtjK92fbJQCd4BZiiT1v25f"}, {"00000000000000000000000000000000000000000000000000000000000000", "3sN2THZeE9Eh9eYrwkvZqNstbHGvrxSAM7gXUXvyFQP8XvQLUqNCS27icwUeDT7ckHm4FUHM2mTVh1vbLmk7y"}, } var invalidStringTests = []struct { in string out string }{ {"0", ""}, {"O", ""}, {"I", ""}, {"l", ""}, {"3mJr0", ""}, {"O3yxU", ""}, {"3sNI", ""}, {"4kl8", ""}, {"0OIl", ""}, {"!@#$%^&*()-_=+~`", ""}, {"abcd\xd80", ""}, {"abcd\U000020BF", ""}, } var hexTests = []struct { in string out string }{ {"", ""}, {"61", "2g"}, {"626262", "a3gV"}, {"636363", "aPEr"}, {"73696d706c792061206c6f6e6720737472696e67", "2cFupjhnEsSn59qHXstmK2ffpLv2"}, {"00eb15231dfceb60925886b67d065299925915aeb172c06647", "1NS17iag9jJgTHD1VXjvLCEnZuQ3rJDE9L"}, {"516b6fcd0f", "ABnLTmg"}, {"bf4f89001e670274dd", "3SEo3LWLoPntC"}, {"572e4794", "3EFU7m"}, {"ecac89cad93923c02321", "EJDM8drfXA6uyA"}, {"10c8511e", "Rt5zm"}, {"00000000000000000000", "1111111111"}, {"000111d38e5fc9071ffcd20b4a763cc9ae4f252bb4e48fd66a835e252ada93ff480d6dd43dc62a641155a5", "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"}, {"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff", "1cWB5HCBdLjAuqGGReWE3R3CguuwSjw6RHn39s2yuDRTS5NsBgNiFpWgAnEx6VQi8csexkgYw3mdYrMHr8x9i7aEwP8kZ7vccXWqKDvGv3u1GxFKPuAkn8JCPPGDMf3vMMnbzm6Nh9zh1gcNsMvH3ZNLmP5fSG6DGbbi2tuwMWPthr4boWwCxf7ewSgNQeacyozhKDDQQ1qL5fQFUW52QKUZDZ5fw3KXNQJMcNTcaB723LchjeKun7MuGW5qyCBZYzA1KjofN1gYBV3NqyhQJ3Ns746GNuf9N2pQPmHz4xpnSrrfCvy6TVVz5d4PdrjeshsWQwpZsZGzvbdAdN8MKV5QsBDY"}, } func TestBase58(t *testing.T) { // Encode tests for x, test := range stringTests { tmp := []byte(test.in) if res := base58.Encode(tmp); res != test.out { t.Errorf("Encode test #%d failed: got: %s want: %s", x, res, test.out) continue } } // Decode tests for x, test := range hexTests { b, err := hex.DecodeString(test.in) if err != nil { t.Errorf("hex.DecodeString failed failed #%d: got: %s", x, test.in) continue } if res := base58.Decode(test.out); !bytes.Equal(res, b) { t.Errorf("Decode test #%d failed: got: %q want: %q", x, res, test.in) continue } } // Decode with invalid input for x, test := range invalidStringTests { if res := base58.Decode(test.in); string(res) != test.out { t.Errorf("Decode invalidString test #%d failed: got: %q want: %q", x, res, test.out) continue } } } ================================================ FILE: modules/base58/base58bench_test.go ================================================ // Copyright (c) 2013-2014 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package base58_test import ( "bytes" "testing" "github.com/antgroup/hugescm/modules/base58" ) var ( raw5k = bytes.Repeat([]byte{0xff}, 5000) raw100k = bytes.Repeat([]byte{0xff}, 100*1000) encoded5k = base58.Encode(raw5k) encoded100k = base58.Encode(raw100k) ) func BenchmarkBase58Encode_5K(b *testing.B) { b.SetBytes(int64(len(raw5k))) for b.Loop() { base58.Encode(raw5k) } } func BenchmarkBase58Encode_100K(b *testing.B) { b.SetBytes(int64(len(raw100k))) for b.Loop() { base58.Encode(raw100k) } } func BenchmarkBase58Decode_5K(b *testing.B) { b.SetBytes(int64(len(encoded5k))) for b.Loop() { base58.Decode(encoded5k) } } func BenchmarkBase58Decode_100K(b *testing.B) { b.SetBytes(int64(len(encoded100k))) for b.Loop() { base58.Decode(encoded100k) } } ================================================ FILE: modules/base58/base58check.go ================================================ // Copyright (c) 2013-2014 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package base58 import ( "crypto/sha256" "errors" ) // ErrChecksum indicates that the checksum of a check-encoded string does not verify against // the checksum. var ErrChecksum = errors.New("checksum error") // ErrInvalidFormat indicates that the check-encoded string has an invalid format. var ErrInvalidFormat = errors.New("invalid format: version and/or checksum bytes missing") // checksum: first four bytes of sha256^2 func checksum(input []byte) (cksum [4]byte) { h := sha256.Sum256(input) h2 := sha256.Sum256(h[:]) copy(cksum[:], h2[:4]) return } // CheckEncode prepends a version byte and appends a four byte checksum. func CheckEncode(input []byte, version byte) string { b := make([]byte, 0, 1+len(input)+4) b = append(b, version) b = append(b, input...) cksum := checksum(b) b = append(b, cksum[:]...) return Encode(b) } // CheckDecode decodes a string that was encoded with CheckEncode and verifies the checksum. func CheckDecode(input string) (result []byte, version byte, err error) { decoded := Decode(input) if len(decoded) < 5 { return nil, 0, ErrInvalidFormat } version = decoded[0] var cksum [4]byte copy(cksum[:], decoded[len(decoded)-4:]) if checksum(decoded[:len(decoded)-4]) != cksum { return nil, 0, ErrChecksum } payload := decoded[1 : len(decoded)-4] result = append(result, payload...) return } ================================================ FILE: modules/base58/base58check_test.go ================================================ // Copyright (c) 2013-2014 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package base58_test import ( "errors" "strings" "testing" "github.com/antgroup/hugescm/modules/base58" ) var checkEncodingStringTests = []struct { version byte in string out string }{ {20, "", "3MNQE1X"}, {20, " ", "B2Kr6dBE"}, {20, "-", "B3jv1Aft"}, {20, "0", "B482yuaX"}, {20, "1", "B4CmeGAC"}, {20, "-1", "mM7eUf6kB"}, {20, "11", "mP7BMTDVH"}, {20, "abc", "4QiVtDjUdeq"}, {20, "1234598760", "ZmNb8uQn5zvnUohNCEPP"}, {20, "abcdefghijklmnopqrstuvwxyz", "K2RYDcKfupxwXdWhSAxQPCeiULntKm63UXyx5MvEH2"}, {20, "00000000000000000000000000000000000000000000000000000000000000", "bi1EWXwJay2udZVxLJozuTb8Meg4W9c6xnmJaRDjg6pri5MBAxb9XwrpQXbtnqEoRV5U2pixnFfwyXC8tRAVC8XxnjK"}, } func TestBase58Check(t *testing.T) { for x, test := range checkEncodingStringTests { // test encoding if res := base58.CheckEncode([]byte(test.in), test.version); res != test.out { t.Errorf("CheckEncode test #%d failed: got %s, want: %s", x, res, test.out) } // test decoding res, version, err := base58.CheckDecode(test.out) switch { case err != nil: t.Errorf("CheckDecode test #%d failed with err: %v", x, err) case version != test.version: t.Errorf("CheckDecode test #%d failed: got version: %d want: %d", x, version, test.version) case string(res) != test.in: t.Errorf("CheckDecode test #%d failed: got: %s want: %s", x, res, test.in) } } // test the two decoding failure cases // case 1: checksum error _, _, err := base58.CheckDecode("3MNQE1Y") if !errors.Is(err, base58.ErrChecksum) { t.Error("Checkdecode test failed, expected ErrChecksum") } // case 2: invalid formats (string lengths below 5 mean the version byte and/or the checksum // bytes are missing). var testString strings.Builder for range 4 { testString.WriteString("x") _, _, err = base58.CheckDecode(testString.String()) if !errors.Is(err, base58.ErrInvalidFormat) { t.Error("Checkdecode test failed, expected ErrInvalidFormat") } } } ================================================ FILE: modules/base58/cov_report.sh ================================================ #!/bin/sh # This script uses gocov to generate a test coverage report. # The gocov tool my be obtained with the following command: # go get github.com/axw/gocov/gocov # # It will be installed to $GOPATH/bin, so ensure that location is in your $PATH. # Check for gocov. type gocov >/dev/null 2>&1 if [ $? -ne 0 ]; then echo >&2 "This script requires the gocov tool." echo >&2 "You may obtain it with the following command:" echo >&2 "go get github.com/axw/gocov/gocov" exit 1 fi gocov test | gocov report ================================================ FILE: modules/base58/doc.go ================================================ // Copyright (c) 2014 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. /* Package base58 provides an API for working with modified base58 and Base58Check encodings. # Modified Base58 Encoding Standard base58 encoding is similar to standard base64 encoding except, as the name implies, it uses a 58 character alphabet which results in an alphanumeric string and allows some characters which are problematic for humans to be excluded. Due to this, there can be various base58 alphabets. The modified base58 alphabet used by Bitcoin, and hence this package, omits the 0, O, I, and l characters that look the same in many fonts and are therefore hard to humans to distinguish. # Base58Check Encoding Scheme The Base58Check encoding scheme is primarily used for Bitcoin addresses at the time of this writing, however it can be used to generically encode arbitrary byte arrays into human-readable strings along with a version byte that can be used to differentiate the same payload. For Bitcoin addresses, the extra version is used to differentiate the network of otherwise identical public keys which helps prevent using an address intended for one network on another. */ package base58 ================================================ FILE: modules/base58/example_test.go ================================================ // Copyright (c) 2014 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. package base58_test import ( "fmt" "github.com/antgroup/hugescm/modules/base58" ) // This example demonstrates how to decode modified base58 encoded data. func ExampleDecode() { // Decode example modified base58 encoded data. encoded := "25JnwSn7XKfNQ" decoded := base58.Decode(encoded) // Show the decoded data. fmt.Println("Decoded Data:", string(decoded)) // Output: // Decoded Data: Test data } // This example demonstrates how to encode data using the modified base58 // encoding scheme. func ExampleEncode() { // Encode example data with the modified base58 encoding scheme. data := []byte("Test data") encoded := base58.Encode(data) // Show the encoded data. fmt.Println("Encoded Data:", encoded) // Output: // Encoded Data: 25JnwSn7XKfNQ } // This example demonstrates how to decode Base58Check encoded data. func ExampleCheckDecode() { // Decode an example Base58Check encoded data. encoded := "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa" decoded, version, err := base58.CheckDecode(encoded) if err != nil { fmt.Println(err) return } // Show the decoded data. fmt.Printf("Decoded data: %x\n", decoded) fmt.Println("Version Byte:", version) // Output: // Decoded data: 62e907b15cbf27d5425399ebf6f0fb50ebb88f18 // Version Byte: 0 } // This example demonstrates how to encode data using the Base58Check encoding // scheme. func ExampleCheckEncode() { // Encode example data with the Base58Check encoding scheme. data := []byte("Test data") encoded := base58.CheckEncode(data, 0) // Show the encoded data. fmt.Println("Encoded Data:", encoded) // Output: // Encoded Data: 182iP79GRURMp7oMHDU } ================================================ FILE: modules/base58/genalphabet.go ================================================ // Copyright (c) 2015 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. //go:build ignore package main import ( "bytes" "io" "log" "os" "strconv" ) var ( start = []byte(`// Copyright (c) 2015 The btcsuite developers // Use of this source code is governed by an ISC // license that can be found in the LICENSE file. // AUTOGENERATED by genalphabet.go; do not edit. package base58 const ( // alphabet is the modified base58 alphabet used by Bitcoin. alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" alphabetIdx0 = '1' ) var b58 = [256]byte{`) end = []byte(`}`) alphabet = []byte("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz") tab = []byte("\t") invalid = []byte("255") comma = []byte(",") space = []byte(" ") nl = []byte("\n") ) func write(w io.Writer, b []byte) { _, err := w.Write(b) if err != nil { log.Fatal(err) } } func main() { fi, err := os.Create("alphabet.go") if err != nil { log.Fatal(err) } defer fi.Close() // nolint write(fi, start) write(fi, nl) for i := range byte(32) { write(fi, tab) for j := range byte(8) { idx := bytes.IndexByte(alphabet, i*8+j) if idx == -1 { write(fi, invalid) } else { write(fi, strconv.AppendInt(nil, int64(idx), 10)) } write(fi, comma) if j != 7 { write(fi, space) } } write(fi, nl) } write(fi, end) write(fi, nl) } ================================================ FILE: modules/binary/read.go ================================================ // Package binary implements syntax-sugar functions on top of the standard // library binary package package binary import ( "bufio" "encoding/binary" "io" "github.com/antgroup/hugescm/modules/plumbing" ) // Read reads structured binary data from r into data. Bytes are read and // decoded in BigEndian order // https://golang.org/pkg/encoding/binary/#Read func Read(r io.Reader, data ...any) error { for _, v := range data { if err := binary.Read(r, binary.BigEndian, v); err != nil { return err } } return nil } // ReadUntil reads from r untin delim is found func ReadUntil(r io.Reader, delim byte) ([]byte, error) { if bufr, ok := r.(*bufio.Reader); ok { return ReadUntilFromBufioReader(bufr, delim) } var buf [1]byte value := make([]byte, 0, 16) for { if _, err := io.ReadFull(r, buf[:]); err != nil { if err == io.EOF { return nil, err } return nil, err } if buf[0] == delim { return value, nil } value = append(value, buf[0]) } } // ReadUntilFromBufioReader is like bufio.ReadBytes but drops the delimiter // from the result. func ReadUntilFromBufioReader(r *bufio.Reader, delim byte) ([]byte, error) { value, err := r.ReadBytes(delim) if err != nil || len(value) == 0 { return nil, err } return value[:len(value)-1], nil } // ReadVariableWidthInt reads and returns an int in Git VLQ special format: // // Ordinary VLQ has some redundancies, example: the number 358 can be // encoded as the 2-octet VLQ 0x8166 or the 3-octet VLQ 0x808166 or the // 4-octet VLQ 0x80808166 and so forth. // // To avoid these redundancies, the VLQ format used in Git removes this // prepending redundancy and extends the representable range of shorter // VLQs by adding an offset to VLQs of 2 or more octets in such a way // that the lowest possible value for such an (N+1)-octet VLQ becomes // exactly one more than the maximum possible value for an N-octet VLQ. // In particular, since a 1-octet VLQ can store a maximum value of 127, // the minimum 2-octet VLQ (0x8000) is assigned the value 128 instead of // 0. Conversely, the maximum value of such a 2-octet VLQ (0xff7f) is // 16511 instead of just 16383. Similarly, the minimum 3-octet VLQ // (0x808000) has a value of 16512 instead of zero, which means // that the maximum 3-octet VLQ (0xffff7f) is 2113663 instead of // just 2097151. And so forth. // // This is how the offset is saved in C: // // dheader[pos] = ofs & 127; // while (ofs >>= 7) // dheader[--pos] = 128 | (--ofs & 127); func ReadVariableWidthInt(r io.Reader) (int64, error) { var c byte if err := Read(r, &c); err != nil { return 0, err } var v = int64(c & maskLength) for c&maskContinue > 0 { v++ if err := Read(r, &c); err != nil { return 0, err } v = (v << lengthBits) + int64(c&maskLength) } return v, nil } const ( maskContinue = uint8(128) // 1000 000 maskLength = uint8(127) // 0111 1111 lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length ) // ReadUint64 reads 8 bytes and returns them as a BigEndian uint32 func ReadUint64(r io.Reader) (uint64, error) { var v uint64 if err := binary.Read(r, binary.BigEndian, &v); err != nil { return 0, err } return v, nil } // ReadUint32 reads 4 bytes and returns them as a BigEndian uint32 func ReadUint32(r io.Reader) (uint32, error) { var v uint32 if err := binary.Read(r, binary.BigEndian, &v); err != nil { return 0, err } return v, nil } // ReadUint16 reads 2 bytes and returns them as a BigEndian uint16 func ReadUint16(r io.Reader) (uint16, error) { var v uint16 if err := binary.Read(r, binary.BigEndian, &v); err != nil { return 0, err } return v, nil } // ReadHash reads a plumbing.Hash from r func ReadHash(r io.Reader) (plumbing.Hash, error) { var h plumbing.Hash if err := binary.Read(r, binary.BigEndian, h[:]); err != nil { return plumbing.ZeroHash, err } return h, nil } const sniffLen = 8000 // IsBinary detects if data is a binary value based on: // http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198 func IsBinary(r io.Reader) (bool, error) { reader := bufio.NewReader(r) c := 0 for c < sniffLen { b, err := reader.ReadByte() if err == io.EOF { break } if err != nil { return false, err } if b == byte(0) { return true, nil } c++ } return false, nil } ================================================ FILE: modules/binary/write.go ================================================ package binary import ( "encoding/binary" "io" ) func Swap16(v uint16) []byte { bs := make([]byte, 2) binary.BigEndian.PutUint16(bs, v) return bs } func Swap32(v uint32) []byte { bs := make([]byte, 4) binary.BigEndian.PutUint32(bs, v) return bs } func Swap64(v uint64) []byte { bs := make([]byte, 8) binary.BigEndian.PutUint64(bs, v) return bs } // Write writes the binary representation of data into w, using BigEndian order // https://golang.org/pkg/encoding/binary/#Write func Write(w io.Writer, data ...any) error { for _, v := range data { if err := binary.Write(w, binary.BigEndian, v); err != nil { return err } } return nil } func WriteVariableWidthInt(w io.Writer, n int64) error { buf := []byte{byte(n & 0x7f)} n >>= 7 for n != 0 { n-- buf = append([]byte{0x80 | (byte(n & 0x7f))}, buf...) n >>= 7 } _, err := w.Write(buf) return err } // WriteUint64 writes the binary representation of a uint64 into w, in BigEndian // order func WriteUint64(w io.Writer, value uint64) error { return binary.Write(w, binary.BigEndian, value) } // WriteUint32 writes the binary representation of a uint32 into w, in BigEndian // order func WriteUint32(w io.Writer, value uint32) error { return binary.Write(w, binary.BigEndian, value) } // WriteUint16 writes the binary representation of a uint16 into w, in BigEndian // order func WriteUint16(w io.Writer, value uint16) error { return binary.Write(w, binary.BigEndian, value) } ================================================ FILE: modules/bitmap/LICENSE ================================================ The MIT License (MIT) Copyright (c) 2018 Miguel Molina Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/bitmap/bitmap.go ================================================ package bitmap import ( "bytes" "encoding/binary" "errors" "fmt" "io" "math" ) // Bitmap is an EWAH-encoded bitmap. // See: https://github.com/lemire/javaewah type Bitmap struct { // n is the number of bits in the bitmap n int64 // w is the list of words in the bitmap w []uint64 // stuff for writing efficiently lastrlw int // stuff for reading efficiently cursor int lastpos int64 acc int64 } // New creates a new empty bitmap. func New() *Bitmap { return &Bitmap{lastrlw: -1} } // FromReader creates a Bitmap from the given reader. func FromReader(r io.Reader, order binary.ByteOrder) (*Bitmap, error) { bits, err := readUint32(r, order) if err != nil { return nil, fmt.Errorf("bitmap: can't read uncompressed bit number: %w", err) } words, err := readUint32(r, order) if err != nil { return nil, fmt.Errorf("bitmap: can't read compressed word number: %w", err) } w := make([]uint64, int(words)) for i := 0; i < int(words); i++ { w[i], err = readUint64(r, order) if err != nil { return nil, fmt.Errorf("bitmap: can't read %dth word: %w", i+1, err) } } lastrlw, err := readUint32(r, order) if err != nil { return nil, fmt.Errorf("bitmap: can't read position of current RLW: %w", err) } return &Bitmap{ n: int64(bits), w: w, lastrlw: int(lastrlw), }, nil } // FromBytes creates a Bitmap from the given bytes. func FromBytes(b []byte, order binary.ByteOrder) (*Bitmap, error) { return FromReader(bytes.NewBuffer(b), order) } // Write will write the Bitmap to a writer with the following format: // https://github.com/git/git/blob/master/Documentation/technical/bitmap-format.txt#L92 func (b *Bitmap) Write(w io.Writer, order binary.ByteOrder) (n int64, err error) { if err := writeUint32(w, order, b.Bits()); err != nil { return 0, err } if err := writeUint32(w, order, uint32(len(b.w))); err != nil { return 0, err } for _, word := range b.w { if err := writeUint64(w, order, word); err != nil { return 0, err } } if err := writeUint32(w, order, uint32(b.lastrlw)); err != nil { return 0, err } return 4*3 + int64(len(b.w))*8, nil } func writeUint32(w io.Writer, bo binary.ByteOrder, num uint32) error { var b = make([]byte, 4) bo.PutUint32(b, num) n, err := w.Write(b) if err != nil { return err } if n != 4 { return fmt.Errorf("unable to write 4 bytes for uint32, wrote %d instead", n) } return nil } func writeUint64(w io.Writer, bo binary.ByteOrder, num uint64) error { var b = make([]byte, 8) bo.PutUint64(b, num) n, err := w.Write(b) if err != nil { return err } if n != 8 { return fmt.Errorf("unable to write 8 bytes for uint64, wrote %d instead", n) } return nil } func readUint32(r io.Reader, bo binary.ByteOrder) (uint32, error) { var buf = make([]byte, 4) _, err := io.ReadAtLeast(r, buf, 4) if err != nil { return 0, err } return bo.Uint32(buf), nil } func readUint64(r io.Reader, bo binary.ByteOrder) (uint64, error) { var buf = make([]byte, 8) _, err := io.ReadAtLeast(r, buf, 8) if err != nil { return 0, err } return bo.Uint64(buf), nil } // ErrInvalidBitSet is returned when there is an attempt to set a bit // before the last written bit. var ErrInvalidBitSet = errors.New("bitmap: attempted to set a bit before the last written bit") const allones = ^uint64(0) const maxUint31 = ^uint32(0) >> 1 // Set sets to 1 the bit at the given position. Take into account that bits // need to be set in ascending order. Setting the 4th bit will return an error // if you already set the 5th bit, for example. func (b *Bitmap) Set(pos int64) error { if b.n > pos { return ErrInvalidBitSet } if b.lastrlw < 0 { b.lastrlw = 0 b.w = append(b.w, uint64(newRlw(false, 0, 0))) } last := len(b.w) - 1 lastrlw := rlw(b.w[b.lastrlw]) idx := uint64(pos % 64) bn := b.size() // it's inside the last word if bn > pos { setbit(&b.w[last], idx) // all bits in this literal are 1s, so transform it into a rlw if b.w[last] == allones { // previous rlw has 1 literal (the one being transformed), so // remove the literal and increase k by 1 only if k does not overflow if lastrlw.b() && lastrlw.l() == 1 && lastrlw.k() < math.MaxUint32 { lastrlw.setk(lastrlw.k() + 1) lastrlw.setl(0) b.w[b.lastrlw] = uint64(lastrlw) b.w = b.w[:last] } else { lastrlw.setl(lastrlw.l() - 1) b.w[last] = uint64(newRlw(true, 1, 0)) b.w[b.lastrlw] = uint64(lastrlw) b.lastrlw = last } } } else { k := (pos - bn) / 64 var literal uint64 setbit(&literal, idx) // increment l only if l does not overflow if k == 0 && lastrlw.l()+1 <= maxUint31 { lastrlw.setl(lastrlw.l() + 1) b.w[b.lastrlw] = uint64(lastrlw) } else if k > 0 && int64(lastrlw.k())+k <= math.MaxUint32 && lastrlw.l() == 0 { // increment k only if k does not overflow lastrlw.setk(lastrlw.k() + uint32(k)) lastrlw.setl(lastrlw.l() + 1) b.w[b.lastrlw] = uint64(lastrlw) } else { b.w = append(b.w, uint64(newRlw(false, uint32(k-math.MaxUint32-1), 1))) b.lastrlw = len(b.w) - 1 } b.w = append(b.w, literal) } b.n = pos + 1 return nil } // Get returns the bit at the given position, being true 1 and false 0. func (b *Bitmap) Get(pos int64) bool { // quick path, if pos has never been written, it cannot be 1 if pos >= b.n { return false } if b.lastpos > pos { b.lastpos = -1 b.cursor = 0 b.acc = 0 } else if b.cursor >= len(b.w) { b.cursor = b.lastrlw } for ; b.cursor < len(b.w); b.cursor++ { acc := b.acc word := rlw(b.w[b.cursor]) kb := int64(word.k()) * 64 if pos < b.acc+kb { b.lastpos = pos return word.b() } acc += kb l := int64(word.l()) if l > 0 && pos < acc+l*64 { for j := 1; j <= int(word.l()); j++ { if pos < acc+64 { w := b.w[b.cursor+j] mask := uint64(1) << (63 - uint64(pos-acc)) return w&mask != 0 } acc += 64 } } else { acc += l * 64 } b.cursor += int(l) b.acc = acc } return false } // Bits returns the number of uncompressed bits in the bitmap. func (b *Bitmap) Bits() uint32 { return uint32(b.n) } // size returns the number of bits allocated, even if // they are not used yet. Result of size() will always be equal // or greater than n. func (b *Bitmap) size() int64 { bn := (b.n / 64) * 64 if b.n%64 != 0 { bn += 64 } return bn } // Bytes returns the number of bytes taken by the compressed bitmap. func (b *Bitmap) Bytes() int64 { return int64(len(b.w)*64) / 8 } // Reset clears the bitmap and sets everything to unused empty zeroes. func (b *Bitmap) Reset() { b.n = 0 b.w = nil b.lastrlw = -1 } // setbit sets to 1 the bit in the given idx. func setbit(word *uint64, idx uint64) { *word |= (uint64(1) << (64 - idx - 1)) } // rlw is a Running Length Word, which has 3 parts: // - (b) 1 bit that is repeated // - (k) 32 bits with the number of repetitions for the previous bit // - (l) 31 bits saying how many literal words follow this rlw type rlw uint64 // 100000000000000000000000000000000000000000000000000000000000000 const bmask = uint64(1) << 63 // 011111111111111111111111111111110000000000000000000000000000000 const kmask = ^uint64(0) >> 32 << 31 // 000000000000000000000000000000000111111111111111111111111111111 const lmask = ^uint64(0) >> 33 // newRlw creates a new rlw with the given bit, k and l. func newRlw(b bool, k, l uint32) rlw { var bit uint64 if b { bit = 1 } return rlw(bit<<63 | uint64(k)<<31 | uint64(l)) } // b returns the bit of this rlw, true for 1, false for 0. func (r rlw) b() bool { return (uint64(r)&bmask)>>63 != 0 } // k returns the number of word repetitions of b. func (r rlw) k() uint32 { return uint32(uint64(r) & kmask >> 31) } // l returns the number of literal words that follow this rlw. func (r rlw) l() uint32 { return uint32(uint64(r) & lmask) } // setk changes the k of this rlw. func (r *rlw) setk(k uint32) { *r = rlw((uint64(*r) & ^kmask) | uint64(k)<<31) } // setl changes the l of this rlw. func (r *rlw) setl(l uint32) { *r = rlw((uint64(*r) & ^lmask) | uint64(l)) } ================================================ FILE: modules/bitmap/bitmap_test.go ================================================ //go:build !386 package bitmap import ( "bytes" "encoding/binary" "errors" "fmt" "math" "os" "reflect" "strconv" "strings" "testing" ) func TestBitmapReadWrite(t *testing.T) { b := newBitmap() buf := bytes.NewBuffer(nil) _, err := b.Write(buf, binary.BigEndian) if err != nil { t.Fatalf("Write error: %v", err) } b2, err := FromBytes(buf.Bytes(), binary.BigEndian) if err != nil { t.Fatalf("FromBytes error: %v", err) } if !reflect.DeepEqual(b, b2) { t.Errorf("Expected %v, got %v", b, b2) } } func TestBitmapGet(t *testing.T) { b := newBitmap() if b.Get(math.MaxInt64) { t.Errorf("Expected false for bit %d", math.MaxInt64) } // check zeroes of the first word for i := range int64(5 * 64) { if b.Get(i) { t.Errorf("Expected false for bit %d", i) } } // check the second word one := int64(5*64 + (63 - 5)) for i := int64(5 * 64); i < 6*64; i++ { if i == one { if !b.Get(i) { t.Errorf("Expected true for bit %d -> %s", i, strconv.FormatUint(b.w[1], 2)) } } else { if b.Get(i) { t.Errorf("Expected false for bit %d", i-5*64) } } } // check third word one = int64(6*64 + (63 - 6)) for i := int64(6 * 64); i < 7*64; i++ { if i == one { if !b.Get(i) { t.Errorf("Expected true for bit %d -> %s", i, strconv.FormatUint(b.w[2], 2)) } } else { if b.Get(i) { t.Errorf("Expected false for bit %d", i-6*64) } } } // check fourth word for i := int64(7 * 64); i < 8*64; i++ { if !b.Get(i) { t.Errorf("Expected true for bit %d", i-(7*64)) } } // check fifth word offset := int64(8 * 64) for i := offset; i < 9*64; i++ { if i < offset+5 { if b.Get(i) { t.Errorf("Expected false for bit %d", i-offset) } } else { if !b.Get(i) { t.Errorf("Expected true for bit %d", i-offset) } } } // check sixth word for i := int64(9 * 64); i < 10*64; i++ { if !b.Get(i) { t.Errorf("Expected true for bit %d", i-9*64) } } } func TestBitmapSet(t *testing.T) { b := New() if err := b.Set(5*64 + (63 - 5)); err != nil { t.Fatalf("Set error: %v", err) } if err := b.Set(6*64 + (63 - 6)); err != nil { t.Fatalf("Set error: %v", err) } if err := b.Set(0); !errors.Is(err, ErrInvalidBitSet) { t.Errorf("Expected ErrInvalidBitSet, got %v", err) } for i := int64(7 * 64); i < 8*64; i++ { if err := b.Set(i); err != nil { t.Fatalf("Set error: %v", err) } } for i := int64(8*64) + 5; i < 9*64; i++ { if err := b.Set(i); err != nil { t.Fatalf("Set error: %v", err) } } for i := int64(9 * 64); i < 10*64; i++ { if err := b.Set(i); err != nil { t.Fatalf("Set error: %v", err) } } expected := newBitmap() if !reflect.DeepEqual(b, expected) { t.Errorf("Expected %v, got %v", expected, b) } } func TestBitmapSetOverflowL(t *testing.T) { if testing.Short() { t.Skip("not running this on short mode") return } if os.Getenv("TRAVIS") == "true" { t.Skip("uses too much memory to run on travis") return } b := New() b.w = make([]uint64, int(maxUint31)+2) b.w[0] = uint64(newRlw(false, 1, uint32(maxUint31))) //nolint:unconvert // rlw -> uint64 conversion is necessary b.n = (int64(maxUint31) + 1) * 64 b.lastrlw = 0 if err := b.Set(b.n + 63); err != nil { t.Fatalf("Set error: %v", err) } if len(b.w) != int(maxUint31)+4 { t.Errorf("Expected %d, got %d", int(maxUint31)+4, len(b.w)) } if b.lastrlw != len(b.w)-2 { t.Errorf("Expected %d, got %d", len(b.w)-2, b.lastrlw) } if b.w[0] != uint64(newRlw(false, 1, uint32(maxUint31))) { //nolint:unconvert // rlw -> uint64 conversion is necessary t.Errorf("Expected %v, got %v", newRlw(false, 1, uint32(maxUint31)), b.w[0]) //nolint:unconvert // rlw -> uint64 conversion is necessary } if b.w[len(b.w)-2] != uint64(newRlw(false, 0, 1)) { t.Errorf("Expected %v, got %v", uint64(newRlw(false, 0, 1)), b.w[len(b.w)-2]) } if b.w[len(b.w)-1] != uint64(1) { t.Errorf("Expected %v, got %v", uint64(1), b.w[len(b.w)-1]) } } func TestBitmapSetOverflowK(t *testing.T) { b := New() b.w = []uint64{uint64(newRlw(false, uint32(math.MaxUint32), 0))} b.n = int64(math.MaxUint32) * 64 b.lastrlw = 0 if err := b.Set(b.n + 127); err != nil { t.Fatalf("Set error: %v", err) } if len(b.w) != 3 { t.Errorf("Expected 3, got %d", len(b.w)) } if b.lastrlw != 1 { t.Errorf("Expected 1, got %d", b.lastrlw) } if b.w[0] != uint64(newRlw(false, uint32(math.MaxUint32), 0)) { t.Errorf("Expected %v, got %v", uint64(newRlw(false, uint32(math.MaxUint32), 0)), b.w[0]) } if b.w[1] != uint64(newRlw(false, 1, 1)) { t.Errorf("Expected %v, got %v", uint64(newRlw(false, 1, 1)), b.w[1]) } if b.w[2] != uint64(1) { t.Errorf("Expected %v, got %v", uint64(1), b.w[2]) } } func TestBitmapSetOverflowKAllOnes(t *testing.T) { b := New() b.w = []uint64{ uint64(newRlw(true, uint32(math.MaxUint32), 1)), ^uint64(0) >> 1 << 1, } b.n = int64(math.MaxUint32+1)*64 - 1 b.lastrlw = 0 if err := b.Set(b.n); err != nil { t.Fatalf("Set error: %v", err) } if len(b.w) != 2 { t.Errorf("Expected 2, got %d", len(b.w)) } if b.lastrlw != 1 { t.Errorf("Expected 1, got %d", b.lastrlw) } if b.w[0] != uint64(newRlw(true, uint32(math.MaxUint32), 0)) { t.Errorf("Expected %v, got %v", uint64(newRlw(true, uint32(math.MaxUint32), 0)), b.w[0]) } if b.w[1] != uint64(newRlw(true, 1, 0)) { t.Errorf("Expected %v, got %v", uint64(newRlw(true, 1, 0)), b.w[1]) } } func TestBitmapSetAllOnesPrevRlw(t *testing.T) { b := New() b.w = []uint64{ uint64(newRlw(true, 1, 1)), ^uint64(0) >> 1 << 1, } b.n = 2*64 - 1 b.lastrlw = 0 if err := b.Set(b.n); err != nil { t.Fatalf("Set error: %v", err) } if len(b.w) != 1 { t.Errorf("Expected 1, got %d", len(b.w)) } if b.lastrlw != 0 { t.Errorf("Expected 0, got %d", b.lastrlw) } if b.w[0] != uint64(newRlw(true, 2, 0)) { t.Errorf("Expected %v, got %v", uint64(newRlw(true, 2, 0)), b.w[0]) } } func TestRlwSetl(t *testing.T) { rlw := ^rlw(0) if rlw.l() != maxUint31 { t.Errorf("Expected %d, got %d", maxUint31, rlw.l()) } rlw.setl(5) if rlw.l() != uint32(5) { t.Errorf("Expected %d, got %d", uint32(5), rlw.l()) } } func TestRlwSetk(t *testing.T) { rlw := ^rlw(0) if rlw.k() != uint32(math.MaxUint32) { t.Errorf("Expected %d, got %d", uint32(math.MaxUint32), rlw.k()) } rlw.setk(10) if rlw.k() != uint32(10) { t.Errorf("Expected %d, got %d", uint32(10), rlw.k()) } } func TestSetBit(t *testing.T) { var n uint64 setbit(&n, 5) expected := strings.Repeat("0", 5) + "1" + strings.Repeat("0", 64-6) result := fmt.Sprintf("%064s", strconv.FormatUint(n, 2)) if result != expected { t.Errorf("Expected %s, got %s", expected, result) } } // see: https://github.com/erizocosmico/go-ewah/issues/1 func TestBug1(t *testing.T) { b := New() arr := []int64{1, 5, 8, 11, 15, 19, 23, 30, 128} for _, e := range arr { _ = b.Set(e) } for _, e := range arr { if !b.Get(e) { t.Errorf("expecting %d to be in bitmap", e) } } } func BenchmarkBitmapGet(b *testing.B) { bitmap := newBitmap() for i := 0; b.Loop(); i++ { _ = bitmap.Get(int64(i) % bitmap.n) } } func BenchmarkBitmapGetSequential(b *testing.B) { bitmap, err := newBigBitmap() if err != nil { b.Fatalf("newBigBitmap error: %v", err) } for b.Loop() { for i := int64(0); i < bitmap.n; i++ { _ = bitmap.Get(i) } } } func BenchmarkBitmapGetNotSequential(b *testing.B) { bitmap, err := newBigBitmap() if err != nil { b.Fatalf("newBigBitmap error: %v", err) } for b.Loop() { for i := bitmap.n; i >= 0; i-- { _ = bitmap.Get(i) } } } func BenchmarkBitmapWrite(b *testing.B) { bitmap := newBitmap() buf := bytes.NewBuffer(nil) for b.Loop() { buf.Reset() _, _ = bitmap.Write(buf, binary.BigEndian) } } func BenchmarkBitmapRead(b *testing.B) { bitmap := newBitmap() buf := bytes.NewBuffer(nil) _, err := bitmap.Write(buf, binary.BigEndian) if err != nil { b.Fatalf("Write error: %v", err) } bytes := buf.Bytes() for b.Loop() { _, err = FromBytes(bytes, binary.BigEndian) if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) } } } func BenchmarkBitmapSet(b *testing.B) { bitmap := New() for i := 0; b.Loop(); i++ { _ = bitmap.Set(int64(i)) } } func newBitmap() *Bitmap { b := New() b.w = []uint64{ uint64(newRlw(false, 5, 2)), uint64(1) << 5, uint64(1) << 6, uint64(newRlw(true, 1, 1)), ^uint64(0) >> 5, uint64(newRlw(true, 1, 0)), } b.n = 10 * 64 b.lastrlw = 5 return b } func newBigBitmap() (*Bitmap, error) { b := New() for i := range int64(100000) { if i%2 == 0 { if err := b.Set(i); err != nil { return nil, err } } } return b, nil } ================================================ FILE: modules/chardet/2022.go ================================================ package chardet import ( "bytes" ) type recognizer2022 struct { charset string escapes [][]byte } func (r *recognizer2022) Match(input *recognizerInput) (output recognizerOutput) { return recognizerOutput{ Charset: r.charset, Confidence: r.matchConfidence(input.input), } } func (r *recognizer2022) matchConfidence(input []byte) int { var hits, misses, shifts int input: for i := 0; i < len(input); i++ { c := input[i] if c == 0x1B { for _, esc := range r.escapes { if bytes.HasPrefix(input[i+1:], esc) { hits++ i += len(esc) continue input } } misses++ } else if c == 0x0E || c == 0x0F { shifts++ } } if hits == 0 { return 0 } quality := (100*hits - 100*misses) / (hits + misses) if hits+shifts < 5 { quality -= (5 - (hits + shifts)) * 10 } if quality < 0 { quality = 0 } return quality } var escapeSequences_2022JP = [][]byte{ {0x24, 0x28, 0x43}, // KS X 1001:1992 {0x24, 0x28, 0x44}, // JIS X 212-1990 {0x24, 0x40}, // JIS C 6226-1978 {0x24, 0x41}, // GB 2312-80 {0x24, 0x42}, // JIS X 208-1983 {0x26, 0x40}, // JIS X 208 1990, 1997 {0x28, 0x42}, // ASCII {0x28, 0x48}, // JIS-Roman {0x28, 0x49}, // Half-width katakana {0x28, 0x4a}, // JIS-Roman {0x2e, 0x41}, // ISO 8859-1 {0x2e, 0x46}, // ISO 8859-7 } var escapeSequences_2022KR = [][]byte{ {0x24, 0x29, 0x43}, } var escapeSequences_2022CN = [][]byte{ {0x24, 0x29, 0x41}, // GB 2312-80 {0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1 {0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2 {0x24, 0x29, 0x45}, // ISO-IR-165 {0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3 {0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4 {0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5 {0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6 {0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7 {0x4e}, // SS2 {0x4f}, // SS3 } func newRecognizer_2022JP() *recognizer2022 { return &recognizer2022{ "ISO-2022-JP", escapeSequences_2022JP, } } func newRecognizer_2022KR() *recognizer2022 { return &recognizer2022{ "ISO-2022-KR", escapeSequences_2022KR, } } func newRecognizer_2022CN() *recognizer2022 { return &recognizer2022{ "ISO-2022-CN", escapeSequences_2022CN, } } ================================================ FILE: modules/chardet/LICENSE ================================================ Copyright (c) 2012 chardet Authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Partial of the Software is derived from ICU project. See icu-license.html for license of the derivative portions. ================================================ FILE: modules/chardet/VERSION ================================================ https://github.com/saintfish/chardet 5e3ef4b5456d970814525f09c1f176294f1751a9 ================================================ FILE: modules/chardet/detector.go ================================================ // Package chardet ports character set detection from ICU. package chardet import ( "errors" "sort" ) // Result contains all the information that charset detector gives. type Result struct { // IANA name of the detected charset. Charset string // IANA name of the detected language. It may be empty for some charsets. Language string // Confidence of the Result. Scale from 1 to 100. The bigger, the more confident. Confidence int } // Detector implements charset detection. type Detector struct { recognizers []recognizer stripTag bool } // List of charset recognizers var recognizers = []recognizer{ newRecognizer_utf8(), newRecognizer_utf16be(), newRecognizer_utf16le(), newRecognizer_utf32be(), newRecognizer_utf32le(), newRecognizer_8859_1_en(), newRecognizer_8859_1_da(), newRecognizer_8859_1_de(), newRecognizer_8859_1_es(), newRecognizer_8859_1_fr(), newRecognizer_8859_1_it(), newRecognizer_8859_1_nl(), newRecognizer_8859_1_no(), newRecognizer_8859_1_pt(), newRecognizer_8859_1_sv(), newRecognizer_8859_2_cs(), newRecognizer_8859_2_hu(), newRecognizer_8859_2_pl(), newRecognizer_8859_2_ro(), newRecognizer_8859_5_ru(), newRecognizer_8859_6_ar(), newRecognizer_8859_7_el(), newRecognizer_8859_8_I_he(), newRecognizer_8859_8_he(), newRecognizer_windows_1251(), newRecognizer_windows_1256(), newRecognizer_KOI8_R(), newRecognizer_8859_9_tr(), newRecognizer_sjis(), newRecognizer_gb_18030(), newRecognizer_euc_jp(), newRecognizer_euc_kr(), newRecognizer_big5(), newRecognizer_2022JP(), newRecognizer_2022KR(), newRecognizer_2022CN(), newRecognizer_IBM424_he_rtl(), newRecognizer_IBM424_he_ltr(), newRecognizer_IBM420_ar_rtl(), newRecognizer_IBM420_ar_ltr(), } // NewTextDetector creates a Detector for plain text. func NewTextDetector() *Detector { return &Detector{recognizers, false} } // NewHtmlDetector creates a Detector for Html. func NewHtmlDetector() *Detector { return &Detector{recognizers, true} } var ( ErrNotDetected = errors.New("charset not detected") ) // DetectBest returns the Result with highest Confidence. func (d *Detector) DetectBest(b []byte) (r *Result, err error) { input := newRecognizerInput(b, d.stripTag) outputChan := make(chan recognizerOutput) for _, r := range d.recognizers { go matchHelper(r, input, outputChan) } var output Result for i := 0; i < len(d.recognizers); i++ { o := <-outputChan if output.Confidence < o.Confidence { output = Result(o) } } if output.Confidence == 0 { return nil, ErrNotDetected } return &output, nil } // DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order. func (d *Detector) DetectAll(b []byte) ([]Result, error) { input := newRecognizerInput(b, d.stripTag) outputChan := make(chan recognizerOutput) for _, r := range d.recognizers { go matchHelper(r, input, outputChan) } outputs := make(recognizerOutputs, 0, len(d.recognizers)) for i := 0; i < len(d.recognizers); i++ { o := <-outputChan if o.Confidence > 0 { outputs = append(outputs, o) } } if len(outputs) == 0 { return nil, ErrNotDetected } sort.Sort(outputs) dedupOutputs := make([]Result, 0, len(outputs)) foundCharsets := make(map[string]struct{}, len(outputs)) for _, o := range outputs { if _, found := foundCharsets[o.Charset]; !found { dedupOutputs = append(dedupOutputs, Result(o)) foundCharsets[o.Charset] = struct{}{} } } if len(dedupOutputs) == 0 { return nil, ErrNotDetected } return dedupOutputs, nil } func matchHelper(r recognizer, input *recognizerInput, outputChan chan<- recognizerOutput) { outputChan <- r.Match(input) } type recognizerOutputs []recognizerOutput func (r recognizerOutputs) Len() int { return len(r) } func (r recognizerOutputs) Less(i, j int) bool { return r[i].Confidence > r[j].Confidence } func (r recognizerOutputs) Swap(i, j int) { r[i], r[j] = r[j], r[i] } ================================================ FILE: modules/chardet/encoding.go ================================================ package chardet import ( "fmt" "io" "strings" "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" "golang.org/x/text/encoding/japanese" "golang.org/x/text/encoding/korean" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/encoding/traditionalchinese" "golang.org/x/text/encoding/unicode" ) var encodings = map[string]encoding.Encoding{ "iso-8859-2": charmap.ISO8859_2, "iso-8859-3": charmap.ISO8859_3, "iso-8859-4": charmap.ISO8859_4, "iso-8859-5": charmap.ISO8859_5, "iso-8859-6": charmap.ISO8859_6, "iso-8859-7": charmap.ISO8859_7, "iso-8859-8": charmap.ISO8859_8, "iso-8859-8I": charmap.ISO8859_8I, "iso-8859-10": charmap.ISO8859_10, "iso-8859-13": charmap.ISO8859_13, "iso-8859-14": charmap.ISO8859_14, "iso-8859-15": charmap.ISO8859_15, "iso-8859-16": charmap.ISO8859_16, "koi8-r": charmap.KOI8R, "koi8-u": charmap.KOI8U, "windows-874": charmap.Windows874, "windows-1250": charmap.Windows1250, "windows-1251": charmap.Windows1251, "windows-1252": charmap.Windows1252, "windows-1253": charmap.Windows1253, "windows-1254": charmap.Windows1254, "windows-1255": charmap.Windows1255, "windows-1256": charmap.Windows1256, "windows-1257": charmap.Windows1257, "windows-1258": charmap.Windows1258, "gbk": simplifiedchinese.GBK, "gb18030": simplifiedchinese.GB18030, "big5": traditionalchinese.Big5, "euc-jp": japanese.EUCJP, "iso-2022-jp": japanese.ISO2022JP, "shift_jis": japanese.ShiftJIS, "euc-kr": korean.EUCKR, "utf-16be": unicode.UTF16(unicode.BigEndian, unicode.UseBOM), "utf-16le": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), } // NewReader: convert text from other encodings to UTF-8 func NewReader(r io.Reader, charset string) io.Reader { if e, ok := encodings[strings.ToLower(charset)]; ok { return e.NewDecoder().Reader(r) } return r } // NewWriter: convert UTF-8 encoding to other encodings func NewWriter(w io.Writer, charset string) io.Writer { if e, ok := encodings[strings.ToLower(charset)]; ok { return e.NewEncoder().Writer(w) } return w } // DecodeFromCharset decode input to utf8 func DecodeFromCharset(input []byte, charset string) ([]byte, error) { if enc, ok := encodings[strings.ToLower(charset)]; ok { return enc.NewDecoder().Bytes(input) } return nil, fmt.Errorf("unrecognized charset %s", charset) } // EncodeToCharset encode input to charset func EncodeToCharset(input []byte, charset string) ([]byte, error) { if e, ok := encodings[strings.ToLower(charset)]; ok { return e.NewEncoder().Bytes(input) } return nil, fmt.Errorf("unrecognized charset %s", charset) } ================================================ FILE: modules/chardet/icu-license.html ================================================ ICU License - ICU 1.8.1 and later

ICU License - ICU 1.8.1 and later

COPYRIGHT AND PERMISSION NOTICE

Copyright (c) 1995-2012 International Business Machines Corporation and others

All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, provided that the above copyright notice(s) and this permission notice appear in all copies of the Software and that both the above copyright notice(s) and this permission notice appear in supporting documentation.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization of the copyright holder.


All trademarks and registered trademarks mentioned herein are the property of their respective owners.

================================================ FILE: modules/chardet/multi_byte.go ================================================ package chardet import ( "errors" "math" ) type recognizerMultiByte struct { charset string language string decoder charDecoder commonChars []uint16 } type charDecoder interface { DecodeOneChar([]byte) (c uint16, remain []byte, err error) } func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) { return recognizerOutput{ Charset: r.charset, Language: r.language, Confidence: r.matchConfidence(input), } } func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int { raw := input.raw var c uint16 var err error var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) { totalCharCount++ if err != nil { badCharCount++ } else if c <= 0xFF { singleByteCharCount++ } else { doubleByteCharCount++ if r.commonChars != nil && binarySearch(r.commonChars, c) { commonCharCount++ } } if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount { return 0 } } if doubleByteCharCount <= 10 && badCharCount == 0 { if doubleByteCharCount == 0 && totalCharCount < 10 { return 0 } else { return 10 } } if doubleByteCharCount < 20*badCharCount { return 0 } if r.commonChars == nil { confidence := min(30+doubleByteCharCount-20*badCharCount, 100) return confidence } maxVal := math.Log(float64(doubleByteCharCount) / 4) scaleFactor := 90 / maxVal confidence := max(min(int(math.Log(float64(commonCharCount)+1)*scaleFactor+10), 100), 0) return confidence } func binarySearch(l []uint16, c uint16) bool { start := 0 end := len(l) - 1 for start <= end { mid := (start + end) / 2 if c == l[mid] { return true } else if c < l[mid] { end = mid - 1 } else { start = mid + 1 } } return false } var ( ErrEndOfInputBuffer = errors.New("end of input buffer") ErrBadCharDecode = errors.New("decode a bad char") ) type charDecoder_sjis struct { } func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { if len(input) == 0 { return 0, nil, ErrEndOfInputBuffer } first := input[0] c = uint16(first) remain = input[1:] if first <= 0x7F || (first > 0xA0 && first <= 0xDF) { return } if len(remain) == 0 { return c, remain, ErrBadCharDecode } second := remain[0] remain = remain[1:] c = c<<8 | uint16(second) if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) { } else { err = ErrBadCharDecode } return } var commonChars_sjis = []uint16{ 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa, } func newRecognizer_sjis() *recognizerMultiByte { return &recognizerMultiByte{ "Shift_JIS", "ja", charDecoder_sjis{}, commonChars_sjis, } } type charDecoder_euc struct { } func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { if len(input) == 0 { return 0, nil, ErrEndOfInputBuffer } first := input[0] remain = input[1:] c = uint16(first) if first <= 0x8D { return uint16(first), remain, nil } if len(remain) == 0 { return 0, nil, ErrEndOfInputBuffer } second := remain[0] remain = remain[1:] c = c<<8 | uint16(second) if first >= 0xA1 && first <= 0xFE { if second < 0xA1 { err = ErrBadCharDecode } return } if first == 0x8E { if second < 0xA1 { err = ErrBadCharDecode } return } if first == 0x8F { if len(remain) == 0 { return 0, nil, ErrEndOfInputBuffer } third := remain[0] remain = remain[1:] c = c<<0 | uint16(third) if third < 0xa1 { err = ErrBadCharDecode } } return } var commonChars_euc_jp = []uint16{ 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1, } var commonChars_euc_kr = []uint16{ 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad, } func newRecognizer_euc_jp() *recognizerMultiByte { return &recognizerMultiByte{ "EUC-JP", "ja", charDecoder_euc{}, commonChars_euc_jp, } } func newRecognizer_euc_kr() *recognizerMultiByte { return &recognizerMultiByte{ "EUC-KR", "ko", charDecoder_euc{}, commonChars_euc_kr, } } type charDecoder_big5 struct { } func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { if len(input) == 0 { return 0, nil, ErrEndOfInputBuffer } first := input[0] remain = input[1:] c = uint16(first) if first <= 0x7F || first == 0xFF { return } if len(remain) == 0 { return c, nil, ErrEndOfInputBuffer } second := remain[0] remain = remain[1:] c = c<<8 | uint16(second) if second < 0x40 || second == 0x7F || second == 0xFF { err = ErrBadCharDecode } return } var commonChars_big5 = []uint16{ 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f, } func newRecognizer_big5() *recognizerMultiByte { return &recognizerMultiByte{ "Big5", "zh", charDecoder_big5{}, commonChars_big5, } } type charDecoder_gb_18030 struct { } func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { if len(input) == 0 { return 0, nil, ErrEndOfInputBuffer } first := input[0] remain = input[1:] c = uint16(first) if first <= 0x80 { return } if len(remain) == 0 { return 0, nil, ErrEndOfInputBuffer } second := remain[0] remain = remain[1:] c = c<<8 | uint16(second) if first >= 0x81 && first <= 0xFE { if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) { return } if second >= 0x30 && second <= 0x39 { if len(remain) == 0 { return 0, nil, ErrEndOfInputBuffer } third := remain[0] remain = remain[1:] if third >= 0x81 && third <= 0xFE { if len(remain) == 0 { return 0, nil, ErrEndOfInputBuffer } fourth := remain[0] remain = remain[1:] if fourth >= 0x30 && fourth <= 0x39 { c = uint16(third)<<8 | uint16(fourth) return } } } err = ErrBadCharDecode } return } var commonChars_gb_18030 = []uint16{ 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0, } func newRecognizer_gb_18030() *recognizerMultiByte { return &recognizerMultiByte{ "GB18030", "zh", charDecoder_gb_18030{}, commonChars_gb_18030, } } ================================================ FILE: modules/chardet/recognizer.go ================================================ package chardet type recognizer interface { Match(*recognizerInput) recognizerOutput } type recognizerOutput Result type recognizerInput struct { raw []byte input []byte tagStripped bool byteStats []int hasC1Bytes bool } func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput { input, stripped := mayStripInput(raw, stripTag) byteStats := computeByteStats(input) return &recognizerInput{ raw: raw, input: input, tagStripped: stripped, byteStats: byteStats, hasC1Bytes: computeHasC1Bytes(byteStats), } } func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) { const inputBufferSize = 8192 out = make([]byte, 0, inputBufferSize) var badTags, openTags int32 inMarkup := false stripped = false if stripTag { stripped = true for _, c := range raw { if c == '<' { if inMarkup { badTags += 1 } inMarkup = true openTags += 1 } if !inMarkup { out = append(out, c) if len(out) >= inputBufferSize { break } } if c == '>' { inMarkup = false } } } if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) { limit := min(len(raw), inputBufferSize) out = make([]byte, limit) copy(out, raw[:limit]) stripped = false } return } func computeByteStats(input []byte) []int { r := make([]int, 256) for _, c := range input { r[c] += 1 } return r } func computeHasC1Bytes(byteStats []int) bool { for _, count := range byteStats[0x80 : 0x9F+1] { if count > 0 { return true } } return false } ================================================ FILE: modules/chardet/single_byte.go ================================================ package chardet // Recognizer for single byte charset family type recognizerSingleByte struct { charset string hasC1ByteCharset string language string charMap *[256]byte ngram *[64]uint32 } func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput { charset := r.charset if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 { charset = r.hasC1ByteCharset } return recognizerOutput{ Charset: charset, Language: r.language, Confidence: r.parseNgram(input.input), } } type ngramState struct { ngram uint32 ignoreSpace bool ngramCount, ngramHit uint32 table *[64]uint32 } func newNgramState(table *[64]uint32) *ngramState { return &ngramState{ ngram: 0, ignoreSpace: false, ngramCount: 0, ngramHit: 0, table: table, } } func (s *ngramState) AddByte(b byte) { const ngramMask = 0xFFFFFF if b != 0x20 || !s.ignoreSpace { s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask s.ignoreSpace = (s.ngram == 0x20) s.ngramCount++ if s.lookup() { s.ngramHit++ } } s.ignoreSpace = (b == 0x20) } func (s *ngramState) HitRate() float32 { if s.ngramCount == 0 { return 0 } return float32(s.ngramHit) / float32(s.ngramCount) } func (s *ngramState) lookup() bool { var index int if s.table[index+32] <= s.ngram { index += 32 } if s.table[index+16] <= s.ngram { index += 16 } if s.table[index+8] <= s.ngram { index += 8 } if s.table[index+4] <= s.ngram { index += 4 } if s.table[index+2] <= s.ngram { index += 2 } if s.table[index+1] <= s.ngram { index += 1 } if s.table[index] > s.ngram { index -= 1 } if index < 0 || s.table[index] != s.ngram { return false } return true } func (r *recognizerSingleByte) parseNgram(input []byte) int { state := newNgramState(r.ngram) for _, inChar := range input { c := r.charMap[inChar] if c != 0 { state.AddByte(c) } } state.AddByte(0x20) rate := state.HitRate() if rate > 0.33 { return 98 } return int(rate * 300) } var charMap_8859_1 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, } var ngrams_8859_1_en = [64]uint32{ 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, } var ngrams_8859_1_da = [64]uint32{ 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, } var ngrams_8859_1_de = [64]uint32{ 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, } var ngrams_8859_1_es = [64]uint32{ 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, } var ngrams_8859_1_fr = [64]uint32{ 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, } var ngrams_8859_1_it = [64]uint32{ 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, } var ngrams_8859_1_nl = [64]uint32{ 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, } var ngrams_8859_1_no = [64]uint32{ 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, } var ngrams_8859_1_pt = [64]uint32{ 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, } var ngrams_8859_1_sv = [64]uint32{ 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, } func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-1", hasC1ByteCharset: "windows-1252", language: language, charMap: &charMap_8859_1, ngram: ngram, } } func newRecognizer_8859_1_en() *recognizerSingleByte { return newRecognizer_8859_1("en", &ngrams_8859_1_en) } func newRecognizer_8859_1_da() *recognizerSingleByte { return newRecognizer_8859_1("da", &ngrams_8859_1_da) } func newRecognizer_8859_1_de() *recognizerSingleByte { return newRecognizer_8859_1("de", &ngrams_8859_1_de) } func newRecognizer_8859_1_es() *recognizerSingleByte { return newRecognizer_8859_1("es", &ngrams_8859_1_es) } func newRecognizer_8859_1_fr() *recognizerSingleByte { return newRecognizer_8859_1("fr", &ngrams_8859_1_fr) } func newRecognizer_8859_1_it() *recognizerSingleByte { return newRecognizer_8859_1("it", &ngrams_8859_1_it) } func newRecognizer_8859_1_nl() *recognizerSingleByte { return newRecognizer_8859_1("nl", &ngrams_8859_1_nl) } func newRecognizer_8859_1_no() *recognizerSingleByte { return newRecognizer_8859_1("no", &ngrams_8859_1_no) } func newRecognizer_8859_1_pt() *recognizerSingleByte { return newRecognizer_8859_1("pt", &ngrams_8859_1_pt) } func newRecognizer_8859_1_sv() *recognizerSingleByte { return newRecognizer_8859_1("sv", &ngrams_8859_1_sv) } var charMap_8859_2 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, } var ngrams_8859_2_cs = [64]uint32{ 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, } var ngrams_8859_2_hu = [64]uint32{ 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, } var ngrams_8859_2_pl = [64]uint32{ 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, } var ngrams_8859_2_ro = [64]uint32{ 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, } func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-2", hasC1ByteCharset: "windows-1250", language: language, charMap: &charMap_8859_2, ngram: ngram, } } func newRecognizer_8859_2_cs() *recognizerSingleByte { return newRecognizer_8859_2("cs", &ngrams_8859_2_cs) } func newRecognizer_8859_2_hu() *recognizerSingleByte { return newRecognizer_8859_2("hu", &ngrams_8859_2_hu) } func newRecognizer_8859_2_pl() *recognizerSingleByte { return newRecognizer_8859_2("pl", &ngrams_8859_2_pl) } func newRecognizer_8859_2_ro() *recognizerSingleByte { return newRecognizer_8859_2("ro", &ngrams_8859_2_ro) } var charMap_8859_5 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, } var ngrams_8859_5_ru = [64]uint32{ 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, } func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-5", language: language, charMap: &charMap_8859_5, ngram: ngram, } } func newRecognizer_8859_5_ru() *recognizerSingleByte { return newRecognizer_8859_5("ru", &ngrams_8859_5_ru) } var charMap_8859_6 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, } var ngrams_8859_6_ar = [64]uint32{ 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, } func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-6", language: language, charMap: &charMap_8859_6, ngram: ngram, } } func newRecognizer_8859_6_ar() *recognizerSingleByte { return newRecognizer_8859_6("ar", &ngrams_8859_6_ar) } var charMap_8859_7 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, } var ngrams_8859_7_el = [64]uint32{ 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, } func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-7", hasC1ByteCharset: "windows-1253", language: language, charMap: &charMap_8859_7, ngram: ngram, } } func newRecognizer_8859_7_el() *recognizerSingleByte { return newRecognizer_8859_7("el", &ngrams_8859_7_el) } var charMap_8859_8 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, } var ngrams_8859_8_I_he = [64]uint32{ 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, } var ngrams_8859_8_he = [64]uint32{ 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, } func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-8", hasC1ByteCharset: "windows-1255", language: language, charMap: &charMap_8859_8, ngram: ngram, } } func newRecognizer_8859_8_I_he() *recognizerSingleByte { r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he) r.charset = "ISO-8859-8-I" return r } func newRecognizer_8859_8_he() *recognizerSingleByte { return newRecognizer_8859_8("he", &ngrams_8859_8_he) } var charMap_8859_9 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, } var ngrams_8859_9_tr = [64]uint32{ 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, } func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: "ISO-8859-9", hasC1ByteCharset: "windows-1254", language: language, charMap: &charMap_8859_9, ngram: ngram, } } func newRecognizer_8859_9_tr() *recognizerSingleByte { return newRecognizer_8859_9("tr", &ngrams_8859_9_tr) } var charMap_windows_1256 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, } var ngrams_windows_1256 = [64]uint32{ 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, } func newRecognizer_windows_1256() *recognizerSingleByte { return &recognizerSingleByte{ charset: "windows-1256", language: "ar", charMap: &charMap_windows_1256, ngram: &ngrams_windows_1256, } } var charMap_windows_1251 = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, } var ngrams_windows_1251 = [64]uint32{ 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, } func newRecognizer_windows_1251() *recognizerSingleByte { return &recognizerSingleByte{ charset: "windows-1251", language: "ar", charMap: &charMap_windows_1251, ngram: &ngrams_windows_1251, } } var charMap_KOI8_R = [256]byte{ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, } var ngrams_KOI8_R = [64]uint32{ 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, } func newRecognizer_KOI8_R() *recognizerSingleByte { return &recognizerSingleByte{ charset: "KOI8-R", language: "ru", charMap: &charMap_KOI8_R, ngram: &ngrams_KOI8_R, } } var charMap_IBM424_he = [256]byte{ /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, /* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, } var ngrams_IBM424_he_rtl = [64]uint32{ 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, } var ngrams_IBM424_he_ltr = [64]uint32{ 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, } func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: charset, language: "he", charMap: &charMap_IBM424_he, ngram: ngram, } } func newRecognizer_IBM424_he_rtl() *recognizerSingleByte { return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl) } func newRecognizer_IBM424_he_ltr() *recognizerSingleByte { return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr) } var charMap_IBM420_ar = [256]byte{ /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, } var ngrams_IBM420_ar_rtl = [64]uint32{ 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, } var ngrams_IBM420_ar_ltr = [64]uint32{ 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156, } func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte { return &recognizerSingleByte{ charset: charset, language: "ar", charMap: &charMap_IBM420_ar, ngram: ngram, } } func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte { return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl) } func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte { return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr) } ================================================ FILE: modules/chardet/unicode.go ================================================ package chardet import ( "bytes" ) var ( utf16beBom = []byte{0xFE, 0xFF} utf16leBom = []byte{0xFF, 0xFE} utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF} utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00} ) type recognizerUtf16be struct { } func newRecognizer_utf16be() *recognizerUtf16be { return &recognizerUtf16be{} } func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) { output = recognizerOutput{ Charset: "UTF-16BE", } if bytes.HasPrefix(input.raw, utf16beBom) { output.Confidence = 100 } return } type recognizerUtf16le struct { } func newRecognizer_utf16le() *recognizerUtf16le { return &recognizerUtf16le{} } func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) { output = recognizerOutput{ Charset: "UTF-16LE", } if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) { output.Confidence = 100 } return } type recognizerUtf32 struct { name string bom []byte decodeChar func(input []byte) uint32 } func decodeUtf32be(input []byte) uint32 { return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3]) } func decodeUtf32le(input []byte) uint32 { return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0]) } func newRecognizer_utf32be() *recognizerUtf32 { return &recognizerUtf32{ "UTF-32BE", utf32beBom, decodeUtf32be, } } func newRecognizer_utf32le() *recognizerUtf32 { return &recognizerUtf32{ "UTF-32LE", utf32leBom, decodeUtf32le, } } func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) { output = recognizerOutput{ Charset: r.name, } hasBom := bytes.HasPrefix(input.raw, r.bom) var numValid, numInvalid uint32 for b := input.raw; len(b) >= 4; b = b[4:] { if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) { numInvalid++ } else { numValid++ } } if hasBom && numInvalid == 0 { output.Confidence = 100 } else if hasBom && numValid > numInvalid*10 { output.Confidence = 80 } else if numValid > 3 && numInvalid == 0 { output.Confidence = 100 } else if numValid > 0 && numInvalid == 0 { output.Confidence = 80 } else if numValid > numInvalid*10 { output.Confidence = 25 } return } ================================================ FILE: modules/chardet/utf8.go ================================================ package chardet import ( "bytes" ) var utf8Bom = []byte{0xEF, 0xBB, 0xBF} type recognizerUtf8 struct { } func newRecognizer_utf8() *recognizerUtf8 { return &recognizerUtf8{} } func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { output = recognizerOutput{ Charset: "UTF-8", } hasBom := bytes.HasPrefix(input.raw, utf8Bom) inputLen := len(input.raw) var numValid, numInvalid uint32 var trailBytes uint8 for i := 0; i < inputLen; i++ { c := input.raw[i] if c&0x80 == 0 { continue } if c&0xE0 == 0xC0 { trailBytes = 1 } else if c&0xF0 == 0xE0 { trailBytes = 2 } else if c&0xF8 == 0xF0 { trailBytes = 3 } else { numInvalid++ if numInvalid > 5 { break } trailBytes = 0 } for i++; i < inputLen; i++ { c = input.raw[i] if c&0xC0 != 0x80 { numInvalid++ break } if trailBytes--; trailBytes == 0 { numValid++ break } } } if hasBom && numInvalid == 0 { output.Confidence = 100 } else if hasBom && numValid > numInvalid*10 { output.Confidence = 80 } else if numValid > 3 && numInvalid == 0 { output.Confidence = 100 } else if numValid > 0 && numInvalid == 0 { output.Confidence = 80 } else if numValid == 0 && numInvalid == 0 { // Plain ASCII output.Confidence = 10 } else if numValid > numInvalid*10 { output.Confidence = 25 } return } ================================================ FILE: modules/command/command.go ================================================ package command import ( "bytes" "context" "errors" "io" "os" "os/exec" "strconv" "strings" "sync" "time" ) const ( STDERR_BUFFER_LIMIT = 8 * 1024 STDERR_BUFFER_GROUP = 512 ) type LimitStderr struct { *strings.Builder limit int } func NewStderr() *LimitStderr { b := &strings.Builder{} b.Grow(STDERR_BUFFER_GROUP) return &LimitStderr{Builder: b, limit: STDERR_BUFFER_LIMIT} } func (w *LimitStderr) Bytes() []byte { return []byte(w.String()) } func (w *LimitStderr) Write(p []byte) (int, error) { n := len(p) var err error if w.limit > 0 { if n > w.limit { p = p[:w.limit] } w.limit -= len(p) _, err = w.Builder.Write(p) } return n, err } type Command struct { rawCmd *exec.Cmd context context.Context startTime time.Time s *shepherd detached bool once sync.Once waitError error } func (c *Command) Start() error { c.startTime = time.Now() if c.rawCmd.Stderr == nil { c.rawCmd.Stderr = os.Stderr } if err := c.rawCmd.Start(); err != nil { return err } c.s.inc() return nil } func (c *Command) wait() { if err := c.rawCmd.Wait(); err != nil && c.context.Err() != context.DeadlineExceeded { c.waitError = err return } c.waitError = c.context.Err() } func (c *Command) Wait() error { c.once.Do(func() { if c.rawCmd == nil { return } c.wait() c.s.dec() }) return c.waitError } func (c *Command) UseTime() time.Duration { return time.Since(c.startTime) } func (c *Command) Run() error { if err := c.Start(); err != nil { return err } return c.Wait() } // prefixSuffixSaver is an io.Writer which retains the first N bytes // and the last N bytes written to it. The Bytes() methods reconstructs // it with a pretty error message. type prefixSuffixSaver struct { N int // max size of prefix or suffix prefix []byte suffix []byte // ring buffer once len(suffix) == N suffixOff int // offset to write into suffix skipped int64 // TODO(bradfitz): we could keep one large []byte and use part of it for // the prefix, reserve space for the '... Omitting N bytes ...' message, // then the ring buffer suffix, and just rearrange the ring buffer // suffix when Bytes() is called, but it doesn't seem worth it for // now just for error messages. It's only ~64KB anyway. } func (w *prefixSuffixSaver) Write(p []byte) (n int, err error) { lenp := len(p) p = w.fill(&w.prefix, p) // Only keep the last w.N bytes of suffix data. if overage := len(p) - w.N; overage > 0 { p = p[overage:] w.skipped += int64(overage) } p = w.fill(&w.suffix, p) // w.suffix is full now if p is non-empty. Overwrite it in a circle. for len(p) > 0 { // 0, 1, or 2 iterations. n := copy(w.suffix[w.suffixOff:], p) p = p[n:] w.skipped += int64(n) w.suffixOff += n if w.suffixOff == w.N { w.suffixOff = 0 } } return lenp, nil } // fill appends up to len(p) bytes of p to *dst, such that *dst does not // grow larger than w.N. It returns the un-appended suffix of p. func (w *prefixSuffixSaver) fill(dst *[]byte, p []byte) (pRemain []byte) { if remain := w.N - len(*dst); remain > 0 { add := minInt(len(p), remain) *dst = append(*dst, p[:add]...) p = p[add:] } return p } func (w *prefixSuffixSaver) Bytes() []byte { if w.suffix == nil { return w.prefix } if w.skipped == 0 { return append(w.prefix, w.suffix...) } var buf bytes.Buffer buf.Grow(len(w.prefix) + len(w.suffix) + 50) buf.Write(w.prefix) buf.WriteString("\n... omitting ") buf.WriteString(strconv.FormatInt(w.skipped, 10)) buf.WriteString(" bytes ...\n") buf.Write(w.suffix[w.suffixOff:]) buf.Write(w.suffix[:w.suffixOff]) return buf.Bytes() } func minInt(a, b int) int { if a < b { return a } return b } func (c *Command) Environ() []string { return c.rawCmd.Environ() } func (c *Command) StdoutPipe() (io.ReadCloser, error) { return c.rawCmd.StdoutPipe() } func (c *Command) StderrPipe() (io.ReadCloser, error) { return c.rawCmd.StderrPipe() } func (c *Command) StdinPipe() (io.WriteCloser, error) { return c.rawCmd.StdinPipe() } func (c *Command) Output() ([]byte, error) { if c.rawCmd.Stdout != nil { return nil, errors.New("exec: Stdout already set") } var stdout bytes.Buffer c.rawCmd.Stdout = &stdout captureErr := c.rawCmd.Stderr == nil if captureErr { c.rawCmd.Stderr = &prefixSuffixSaver{N: 32 << 10} } err := c.Run() if err != nil && captureErr { if ee, ok := errors.AsType[*exec.ExitError](err); ok { ee.Stderr = c.rawCmd.Stderr.(*prefixSuffixSaver).Bytes() } } return stdout.Bytes(), err } func (c *Command) OneLine() (string, error) { b, err := c.Output() if err != nil { return "", err } return strings.TrimSpace(string(b)), nil } func (c *Command) RunEx() error { captureErr := c.rawCmd.Stderr == nil if captureErr { c.rawCmd.Stderr = &prefixSuffixSaver{N: 32 << 10} } err := c.Run() if err != nil && captureErr { if ee, ok := errors.AsType[*exec.ExitError](err); ok { ee.Stderr = c.rawCmd.Stderr.(*prefixSuffixSaver).Bytes() } } return err } func (c *Command) String() string { b := new(strings.Builder) b.WriteString("[") b.WriteString(c.rawCmd.Dir) b.WriteString("] ") b.WriteString(c.rawCmd.Path) for _, a := range c.rawCmd.Args[1:] { b.WriteByte(' ') b.WriteString(a) } return b.String() } func (c *Command) Exit() error { cleanExit(c.rawCmd, c.detached) return c.Wait() } ================================================ FILE: modules/command/shepherd.go ================================================ package command import ( "context" "io" "os/exec" "sync/atomic" "github.com/antgroup/hugescm/modules/env" ) type RunOpts struct { Environ []string // As environ ExtraEnv []string // append to env RepoPath string // dir Stderr io.Writer // stderr Stdout io.Writer // stdout Stdin io.Reader // stdin Detached bool //Detached If true, the child process will not be terminated when the parent process ends NoSetpgid bool } type Shepherd interface { // NewFromOptions: Create command with options NewFromOptions(ctx context.Context, opt *RunOpts, name string, arg ...string) *Command // New: Create a process with environment variable isolation New(ctx context.Context, repoPath string, name string, arg ...string) *Command // ProcessesCount: Get the number of child processes ProcessesCount() int32 } type shepherd struct { env.Builder count int32 } func (s *shepherd) inc() int32 { return atomic.AddInt32(&s.count, 1) } func (s *shepherd) dec() int32 { return atomic.AddInt32(&s.count, -1) } func (s *shepherd) ProcessesCount() int32 { return atomic.LoadInt32(&s.count) } func NewShepherd(b env.Builder) Shepherd { return &shepherd{Builder: b} } // New new command: func (s *shepherd) New(ctx context.Context, repoPath string, name string, arg ...string) *Command { return s.NewFromOptions(ctx, &RunOpts{RepoPath: repoPath}, name, arg...) } func (s *shepherd) NewFromOptions(ctx context.Context, opt *RunOpts, name string, arg ...string) *Command { cmd := exec.CommandContext(ctx, name, arg...) cmd.Dir = opt.RepoPath if len(opt.Environ) == 0 { cmd.Env = append(cmd.Env, s.Environ()...) } else { cmd.Env = append(cmd.Env, opt.Environ...) } if len(opt.ExtraEnv) != 0 { cmd.Env = append(cmd.Env, opt.ExtraEnv...) } cmd.Stderr = opt.Stderr cmd.Stdout = opt.Stdout cmd.Stdin = opt.Stdin c := &Command{rawCmd: cmd, context: ctx, s: s, detached: opt.Detached} if !opt.NoSetpgid { setSysProcAttribute(cmd, c.detached) } return c } var ( sd = NewShepherd(env.NewBuilder()) ) // Create an isolated process based on shepherd func NewFromOptions(ctx context.Context, opt *RunOpts, name string, arg ...string) *Command { return sd.NewFromOptions(ctx, opt, name, arg...) } // Create an isolated process based on shepherd func New(ctx context.Context, repoPath string, name string, arg ...string) *Command { return sd.New(ctx, repoPath, name, arg...) } // ProcessesCount: Get the number of child processes of the default shepherd func ProcessesCount() int32 { return sd.ProcessesCount() } ================================================ FILE: modules/command/shepherd_linux.go ================================================ //go:build linux package command import ( "os/exec" "syscall" ) func setSysProcAttribute(c *exec.Cmd, detached bool) { c.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, } if !detached { c.SysProcAttr.Pdeathsig = syscall.SIGTERM } } func cleanExit(c *exec.Cmd, detached bool) { if c.Process == nil || c.Process.Pid <= 0 { return } if c.SysProcAttr != nil && c.SysProcAttr.Setpgid && !detached { _ = syscall.Kill(-c.Process.Pid, syscall.SIGTERM) return } _ = syscall.Kill(c.Process.Pid, syscall.SIGTERM) } ================================================ FILE: modules/command/shepherd_test.go ================================================ package command import ( "context" "fmt" "os" "strings" "testing" "time" ) func TestNewCommand(t *testing.T) { cmd := New(t.Context(), ".", "git", "version") line, err := cmd.OneLine() if err != nil { fmt.Fprintf(os.Stderr, "error: %v", err) return } fmt.Fprintf(os.Stderr, "%s\nCount: %d\n", line, ProcessesCount()) } func TestNewCommand2(t *testing.T) { var stdout strings.Builder cmd := NewFromOptions(t.Context(), &RunOpts{RepoPath: ".", Stdout: &stdout}, "git", "version") if err := cmd.Start(); err != nil { fmt.Fprintf(os.Stderr, "error: %v", err) return } fmt.Fprintf(os.Stderr, "[%s]\nCount: %d\n", stdout.String(), ProcessesCount()) if err := cmd.Wait(); err != nil { fmt.Fprintf(os.Stderr, "error: %v", err) return } fmt.Fprintf(os.Stderr, "[%s]\nCount: %d\n", stdout.String(), ProcessesCount()) } func TestNewCommand3(t *testing.T) { cmd := New(t.Context(), ".", "git", "version---") b, err := cmd.Output() if err != nil { fmt.Fprintf(os.Stderr, "error: %v\nCount: %d\n", FromError(err), ProcessesCount()) return } fmt.Fprintf(os.Stderr, "%s\nCount: %d\n", b, ProcessesCount()) } func TestNewCommand4(t *testing.T) { cmd := New(t.Context(), ".", "git", "help") b, err := cmd.Output() if err != nil { fmt.Fprintf(os.Stderr, "error: %v\nCount: %d\n", FromError(err), ProcessesCount()) return } fmt.Fprintf(os.Stderr, "%s\nCount: %d\nuse time: %v\n", b, ProcessesCount(), cmd.UseTime()) } func TestWaitTimeout(t *testing.T) { newCtx, cancelCtx := context.WithTimeout(t.Context(), time.Second*4) defer cancelCtx() cmd := NewFromOptions(newCtx, &RunOpts{ Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, }, "git", "upload-pack", "/tmp/ssh.git") if err := cmd.Run(); err != nil { fmt.Fprintf(os.Stderr, "error: %v\nCount: %d\n", FromError(err), ProcessesCount()) return } } func TestChildProcess(t *testing.T) { newCtx, cancelCtx := context.WithTimeout(t.Context(), time.Second*10) defer cancelCtx() cmd := NewFromOptions(newCtx, &RunOpts{ Stderr: os.Stderr, Stdout: os.Stdout, Stdin: os.Stdin, }, "sh", "-c", "git upload-pack /root/dev/batman/.git") if err := cmd.Run(); err != nil { fmt.Fprintf(os.Stderr, "error: %v\nCount: %d\n", FromError(err), ProcessesCount()) return } } ================================================ FILE: modules/command/shepherd_unix.go ================================================ //go:build !windows && !linux package command import ( "os/exec" "syscall" ) func setSysProcAttribute(c *exec.Cmd, _ bool) { c.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} } func cleanExit(c *exec.Cmd, detached bool) { if c.Process == nil || c.Process.Pid <= 0 { return } if c.SysProcAttr != nil && c.SysProcAttr.Setpgid && !detached { _ = syscall.Kill(-c.Process.Pid, syscall.SIGTERM) return } _ = syscall.Kill(c.Process.Pid, syscall.SIGTERM) } ================================================ FILE: modules/command/shepherd_win.go ================================================ //go:build windows package command import "os/exec" func setSysProcAttribute(c *exec.Cmd, detached bool) { // placeholders } func cleanExit(c *exec.Cmd, _ bool) { if c != nil && c.Process != nil { _ = c.Process.Kill() } } ================================================ FILE: modules/command/util.go ================================================ package command import ( "errors" "os/exec" "github.com/antgroup/hugescm/modules/strengthen" ) const ( NoDir = "" ) func FromError(err error) string { if err == nil { return "" } if e, ok := errors.AsType[*exec.ExitError](err); ok { if len(e.Stderr) > 0 { return strengthen.ByteCat([]byte(e.Error()), []byte(". stderr: "), e.Stderr) } return e.Error() } return err.Error() } func FromErrorCode(err error) int { if err == nil { return 0 } if e, ok := errors.AsType[*exec.ExitError](err); ok { return e.ExitCode() } return -1 } ================================================ FILE: modules/crc/reader.go ================================================ package crc import ( "bufio" "encoding/hex" "errors" "fmt" "hash" "hash/crc64" "io" "strings" ) type Crc64Writer struct { io.Writer Base io.Writer h hash.Hash } type Finisher interface { Finish() (string, error) } func NewCrc64Writer(w io.Writer) *Crc64Writer { h := crc64.New(crc64.MakeTable(crc64.ISO)) return &Crc64Writer{ Writer: io.MultiWriter(w, h), Base: w, h: h, } } func (cw *Crc64Writer) Finish() (string, error) { if cw.h == nil { return "", nil } checksum := hex.EncodeToString(cw.h.Sum(nil)) if _, err := cw.Write([]byte(checksum)); err != nil { return "", errors.New("write checksum error") } return checksum, nil } type Crc64Reader struct { br *bufio.Reader h hash.Hash } func (cr *Crc64Reader) Read(p []byte) (n int, err error) { n, err = cr.br.Read(p) if err == nil { cr.h.Write(p[:n]) } return } func NewCrc64Reader(r io.Reader) *Crc64Reader { return &Crc64Reader{br: bufio.NewReader(r), h: crc64.New(crc64.MakeTable(crc64.ISO))} } func (cr *Crc64Reader) Verify() error { var sum [16]byte if _, err := io.ReadFull(cr.br, sum[:]); err != nil { return err } want := string(sum[:]) got := hex.EncodeToString(cr.h.Sum(nil)) if strings.EqualFold(got, want) { return nil } return fmt.Errorf("unexpected crc64 checksum got '%s' want '%s'", got, want) } ================================================ FILE: modules/deflect/az.go ================================================ package deflect import ( "slices" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" ) const ( // MaxLooseObjects is the threshold for "too many" loose objects (1000) MaxLooseObjects = 1000 // MaxPacks is the threshold for "too many" pack files (3) MaxPacks = 3 // MinPackSize is the minimum size (4GB) for considering packs as "small" in housekeeping MinPackSize = 4 * strengthen.GiByte ) // Pack represents a Git pack file with its name and size type Pack struct { Name string // Full path to pack file Size int64 // Size in bytes } // Result contains the results of repository housekeeping scan type Result struct { Size int64 // Total repository size in bytes LooseObjects int // Number of loose objects Packs []*Pack // List of pack files TmpPacks uint32 // Count of temporary pack files } // IsUntidy determines if the repository needs housekeeping/maintenance // Returns true if any of these conditions are met: // - Has temporary pack files // - Has too many loose objects (> 1000) // - Has many pack files (> 3) and at least one is small (< 4GB) func (r *Result) IsUntidy() bool { if r.TmpPacks > 0 { return true } if r.LooseObjects > MaxLooseObjects { return true } return len(r.Packs) > MaxPacks && slices.ContainsFunc(r.Packs, func(p *Pack) bool { return p.Size < MinPackSize }) } // HousekeepingScan performs a repository housekeeping analysis // Returns Result struct with repository statistics and maintenance status // This function is useful for determining if a repository needs git gc/repack func HousekeepingScan(repoPath string) (*Result, error) { shaFormat, err := git.HashFormatResult(repoPath) if err != nil { return nil, err } au := NewAuditor(repoPath, shaFormat, &Option{ Limit: strengthen.GiByte, QuarantineMode: false, OnOversized: func(oid string, size int64) error { return nil }, }) if err := au.Du(); err != nil { return nil, err } result := &Result{ Size: au.size, LooseObjects: int(au.counts), Packs: make([]*Pack, 0, len(au.packs)), TmpPacks: au.tmpPacks, } for _, p := range au.packs { result.Packs = append(result.Packs, &Pack{ Name: p.path, Size: p.size, }) } return result, nil } ================================================ FILE: modules/deflect/deflect.go ================================================ package deflect import ( "fmt" "os" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" ) // Typical .git/config format: // [core] // repositoryformatversion = 1 // filemode = true // bare = false // logallrefupdates = true // ignorecase = true // precomposeunicode = true // [extensions] // objectformat = sha256 const ( // DefaultFileSizeLimit is the default file size threshold (50 MiB) for identifying large files DefaultFileSizeLimit = strengthen.MiByte * 50 // hugeSizeLimit defines the threshold (15 MiB) for considering files as "huge" for statistics hugeSizeLimit = strengthen.MiByte * 15 ) // Option configures the auditing behavior for repository analysis type Option struct { // Limit is the file size threshold in bytes. Files larger than this will be rejected. Limit int64 // OnOversized is a callback function called for each file that exceeds the limit. // Returns an error to stop processing, or nil to continue. OnOversized func(oid string, size int64) error // QuarantineMode enables analysis of incoming objects in Git quarantine mode. // When enabled, analyzes both the main repository and quarantine directory. QuarantineMode bool } // pack represents a Git pack file with its path and size type pack struct { path string // Full path to the .pack file size int64 // Size of the pack file in bytes } // Auditor is the main analyzer for Git repository large file detection type Auditor struct { *Option // Embedded auditing configuration repoPath string // Path to the Git repository root directory size int64 // Total size of all objects in bytes delta int64 // Size increment for quarantine mode analysis hugeSum int64 // Total size of files exceeding hugeSizeLimit rawsz int64 // Size of hash values (20 for SHA1, 32 for SHA256) counts uint32 // Total number of objects analyzed packs []pack // List of pack files to be analyzed tmpPacks uint32 // Count of temporary pack files (tmp_*.pack) } // NewAuditor creates a new Auditor instance for analyzing a Git repository // Parameters: // - repoPath: path to the Git repository directory // - shaFormat: the hash format (SHA1 or SHA256) used by the repository // - opts: optional filtering configuration (nil for defaults) func NewAuditor(repoPath string, shaFormat git.HashFormat, opts *Option) *Auditor { au := &Auditor{ repoPath: repoPath, rawsz: int64(shaFormat.RawSize()), } if opts == nil { au.Option = &Option{ Limit: DefaultFileSizeLimit, } return au } au.Option = &Option{ Limit: opts.Limit, OnOversized: opts.OnOversized, QuarantineMode: opts.QuarantineMode, } if au.Limit <= 0 { au.Limit = DefaultFileSizeLimit // avoid --> au.Limit <= 0 } return au } // HashLen returns the hash length in bytes (20 for SHA1, 32 for SHA256) func (a *Auditor) HashLen() int64 { return a.rawsz } // Counts returns the total number of objects analyzed func (a *Auditor) Counts() uint32 { return a.counts } // Size returns the total size of all objects in bytes func (a *Auditor) Size() int64 { return a.size } // Delta returns the size increment for quarantine mode analysis func (a *Auditor) Delta() int64 { return a.delta } // HugeSUM returns the total size of files exceeding hugeSizeLimit func (a *Auditor) HugeSUM() int64 { return a.hugeSum } // Execute performs the complete repository analysis: // 1. Analyzes disk usage of loose objects and pack files // 2. Calls the SizeReceiver callback with total size if provided // 3. Analyzes each pack file for large objects func (a *Auditor) Execute() error { if err := a.Du(); err != nil { return err } for _, p := range a.packs { if err := a.analyzePack(&p); err != nil { return err } } return nil } // onOversized handles rejected large files by calling the configured Rejector or printing to stderr func (a *Auditor) onOversized(oid string, size int64) error { if a.OnOversized == nil { fmt.Fprintf(os.Stderr, "blob: %s compressed size: %s\n", oid, strengthen.FormatSize(size)) return nil } return a.OnOversized(oid, size) } // Du is a convenience function that calculates the total disk usage of a Git repository // Returns the total size in bytes and any error encountered func Du(repoPath string) (int64, error) { shaFormat, err := git.HashFormatResult(repoPath) if err != nil { return 0, err } au := NewAuditor(repoPath, shaFormat, &Option{ Limit: strengthen.GiByte, QuarantineMode: false, OnOversized: func(oid string, size int64) error { return nil }, }) if err := au.Du(); err != nil { return 0, err } return au.Size(), nil } ================================================ FILE: modules/deflect/deflect_test.go ================================================ package deflect_test import ( "fmt" "os" "path/filepath" "runtime" "testing" "github.com/antgroup/hugescm/modules/deflect" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/strengthen" ) func TestDeflectFilter(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) shaFormat, err := git.HashFormatResult(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "repo: %v", err) return } au := deflect.NewAuditor(repoPath, shaFormat, nil) if err := au.Execute(); err != nil { return } fmt.Fprintf(os.Stderr, "RepoSize: %d, Q: %d hashLen: %d\n", au.Size(), au.Delta(), au.HashLen()) } // TestDeflectFilter2 tests quarantine mode behavior with an intentional edge case // // NOTE: This test intentionally sets GIT_QUARANTINE_PATH to the main objects directory // to verify the quarantine mode's handling of overlapping directories. This is NOT a // realistic Git quarantine scenario (which would use a separate temporary directory), // but serves as a stress test for the following behaviors: // // 1. The same directory being analyzed twice (once as main repo, once as quarantine) // 2. Verification that quarantine mode correctly accumulates delta statistics // 3. Edge case handling when quarantine path points to existing repository objects // // Expected behavior: // - RepoSize will be larger than TestDeflectFilter because objects are counted twice // - Delta (Q) will show the size increment from the quarantine analysis // - The test should complete without errors despite the overlapping directories // // In production, GIT_QUARANTINE_PATH should point to a separate temporary directory // used by Git during push operations to store incoming objects before they are // integrated into the main repository. func TestDeflectFilter2(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) shaFormat, err := git.HashFormatResult(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "repo: %v", err) return } t.Setenv(deflect.ENV_GIT_QUARANTINE_PATH, filepath.Join(repoPath, "objects")) fe := deflect.NewAuditor(repoPath, shaFormat, &deflect.Option{ Limit: 10 << 20, OnOversized: nil, QuarantineMode: true, }) if err := fe.Execute(); err != nil { return } fmt.Fprintf(os.Stderr, "RepoSize: %d, Q: %d hashLen: %d\n", fe.Size(), fe.Delta(), fe.HashLen()) } func TestRepoSize(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) size, err := deflect.Du(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "error %v", err) return } fmt.Fprintf(os.Stderr, "%s repo size: %s\n", repoPath, strengthen.FormatSize(size)) if size <= 0 { t.Errorf("Expected size > 0, got %d", size) } } func TestHousekeepingScan(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) result, err := deflect.HousekeepingScan(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "Error %v\n", err) return } fmt.Fprintf(os.Stderr, "repo %s needs maintenance: %v packs: %d loose objects: %d size: %s\n", repoPath, result.IsUntidy(), len(result.Packs), result.LooseObjects, strengthen.FormatSize(result.Size)) } // TestOnOversizedCallback tests the OnOversized callback functionality // This increases coverage of the onOversized method and verifies that oversized files are properly reported func TestOnOversizedCallback(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) shaFormat, err := git.HashFormatResult(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "repo: %v", err) return } var oversizedCount int var oversizedFiles []string fe := deflect.NewAuditor(repoPath, shaFormat, &deflect.Option{ Limit: 10 << 20, // 10MB limit OnOversized: func(oid string, size int64) error { oversizedCount++ oversizedFiles = append(oversizedFiles, oid) fmt.Fprintf(os.Stderr, "Found oversized file: %s size: %d\n", oid, size) return nil }, QuarantineMode: false, }) if err := fe.Execute(); err != nil { fmt.Fprintf(os.Stderr, "execute error: %v", err) return } fmt.Fprintf(os.Stderr, "Oversized files count: %d\n", oversizedCount) fmt.Fprintf(os.Stderr, "Total objects: %d\n", fe.Counts()) fmt.Fprintf(os.Stderr, "Huge SUM: %d\n", fe.HugeSUM()) } // TestDuWithLooseObjects tests disk usage analysis with loose objects // This increases coverage of duObject which handles loose Git objects func TestDuWithLooseObjects(t *testing.T) { // Use the test repository created with loose objects repoPath := "/tmp/test-repo-deflect" shaFormat, err := git.HashFormatResult(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "repo: %v", err) return } fe := deflect.NewAuditor(repoPath, shaFormat, &deflect.Option{ Limit: 1 << 20, // 1MB limit QuarantineMode: false, }) if err := fe.Du(); err != nil { fmt.Fprintf(os.Stderr, "du error: %v", err) return } fmt.Fprintf(os.Stderr, "Loose objects count: %d\n", fe.Counts()) fmt.Fprintf(os.Stderr, "Total size: %s\n", strengthen.FormatSize(fe.Size())) fmt.Fprintf(os.Stderr, "Huge SUM: %s\n", strengthen.FormatSize(fe.HugeSUM())) // Verify we have loose objects if fe.Counts() == 0 { fmt.Fprintf(os.Stderr, "Warning: No loose objects found\n") } } // TestFilterAccessors tests all accessor methods of Filter // This increases coverage of Counts(), HugeSUM(), Delta(), HashLen(), Size() func TestFilterAccessors(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) shaFormat, err := git.HashFormatResult(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "repo: %v", err) return } fe := deflect.NewAuditor(repoPath, shaFormat, &deflect.Option{ Limit: 10 << 20, QuarantineMode: false, }) if err := fe.Execute(); err != nil { fmt.Fprintf(os.Stderr, "execute error: %v", err) return } // Test all accessor methods fmt.Fprintf(os.Stderr, "HashLen: %d\n", fe.HashLen()) fmt.Fprintf(os.Stderr, "Counts: %d\n", fe.Counts()) fmt.Fprintf(os.Stderr, "Size: %d\n", fe.Size()) fmt.Fprintf(os.Stderr, "Delta: %d\n", fe.Delta()) fmt.Fprintf(os.Stderr, "HugeSUM: %d\n", fe.HugeSUM()) // Verify basic invariants if fe.HashLen() != 20 && fe.HashLen() != 32 { fmt.Fprintf(os.Stderr, "Error: Invalid hash length: %d\n", fe.HashLen()) } if fe.Counts() == 0 { fmt.Fprintf(os.Stderr, "Warning: No objects counted\n") } } // TestOnOversizedCallbackNil tests onOversized when OnOversized callback is nil // This increases coverage of onOversized method with nil callback (prints to stderr) func TestOnOversizedCallbackNil(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := git.RevParseRepoPath(t.Context(), filepath.Dir(filename)) shaFormat, err := git.HashFormatResult(repoPath) if err != nil { fmt.Fprintf(os.Stderr, "repo: %v", err) return } fe := deflect.NewAuditor(repoPath, shaFormat, &deflect.Option{ Limit: 1, // 1 byte limit - this should trigger oversized files OnOversized: nil, // No callback, should print to stderr QuarantineMode: false, }) if err := fe.Execute(); err != nil { fmt.Fprintf(os.Stderr, "execute error: %v", err) return } // The test passes if Execute completes without error // onOversized will print to stderr for oversized files fmt.Fprintf(os.Stderr, "Test completed with nil OnOversized callback\n") } ================================================ FILE: modules/deflect/du.go ================================================ package deflect import ( "os" "path/filepath" "strings" ) const ( // ENV_GIT_QUARANTINE_PATH is the environment variable used by Git for incoming objects ENV_GIT_QUARANTINE_PATH = "GIT_QUARANTINE_PATH" ) // ReadDir reads directory entries from the specified path // Returns a slice of directory entries or an error func ReadDir(name string) ([]os.DirEntry, error) { f, err := os.Open(name) if err != nil { return nil, err } defer f.Close() // nolint dirs, err := f.ReadDir(-1) return dirs, err } // duObject analyzes loose Git objects in a single hash prefix directory (e.g., objects/ab/) // Parameters: // - p: path to the hash prefix directory // - name: hash prefix (2 characters) for constructing object IDs // - hugeReject: whether to reject files exceeding size limit // - deltaSUM: whether to accumulate sizes for quarantine mode // // Note: This function silently skips directories that cannot be read because: // 1. Git objects directory may not contain all 256 hash prefix directories // 2. Some prefix directories may not exist or be temporarily inaccessible // 3. Partial statistics are preferable to complete failure in this context func (a *Auditor) duObject(p, name string, hugeReject, deltaSUM bool) error { ds, err := ReadDir(p) if err != nil { // Silently skip directories that cannot be read - this is intentional design return nil } for _, d := range ds { if d.IsDir() { continue } fi, err := d.Info() if err != nil { continue } a.counts++ size := fi.Size() a.size += size if deltaSUM { a.delta += size } if size > hugeSizeLimit { a.hugeSum += size } if hugeReject && size > a.Limit { if err := a.onOversized(name+d.Name(), size); err != nil { return err } } } return nil } func (a *Auditor) duPacks(packdir string, hugeReject, deltaSUM bool) error { ds, err := ReadDir(packdir) if err != nil { return err } for _, d := range ds { if d.IsDir() { continue } fi, err := d.Info() if err != nil { return err } size := fi.Size() a.size += size if deltaSUM { a.delta += size } dirName := fi.Name() if strings.HasPrefix(dirName, "tmp_") { a.tmpPacks++ } if filepath.Ext(dirName) != ".pack" { continue } if !hugeReject { continue } // quarantine environment mode optimization: skip small pack if a.QuarantineMode && size < a.Limit { continue } a.packs = append(a.packs, pack{path: filepath.Join(packdir, fi.Name()), size: size}) } return nil } // objects/ // |-00/ // | - hash // |-01 // |-pack // |- pack-$hash.pack // |- pack-$hash.idx // |- pack-$hash.bitmap // |-info // duInternal analyzes the Git objects directory structure // Parameters: // - objectsDir: path to the objects directory (main or quarantine) // - hugeReject: whether to analyze for large objects // - deltaSUM: whether to accumulate sizes (for quarantine mode) // // This function traverses both loose object directories (00-ff) and pack files func (a *Auditor) duInternal(objectsDir string, hugeReject, deltaSUM bool) error { ds, err := ReadDir(objectsDir) if err != nil { return err } for _, d := range ds { if !d.IsDir() { continue } name := d.Name() if len(name) == 2 { p := filepath.Join(objectsDir, name) if err := a.duObject(p, name, hugeReject, deltaSUM); err != nil { return err } continue } if name == "pack" { if err := a.duPacks(filepath.Join(objectsDir, "pack"), hugeReject, deltaSUM); err != nil { return err } } } return nil } // Du performs disk usage analysis of the Git repository // In quarantine mode, also analyzes incoming objects in GIT_QUARANTINE_PATH func (a *Auditor) Du() error { if err := a.duInternal(filepath.Join(a.repoPath, "objects"), !a.QuarantineMode, false); err != nil { return err } if !a.QuarantineMode { return nil } incomingPath := os.Getenv(ENV_GIT_QUARANTINE_PATH) if len(incomingPath) == 0 { return nil } if err := a.duInternal(incomingPath, true, true); err != nil { return err } return nil } ================================================ FILE: modules/deflect/pack.go ================================================ package deflect // We only support Git pack index file version 2 (SHA1/SHA256) // Reference: https://forcemz.net/git/2017/11/22/GitNativeHookDepthOptimization/ import ( "bufio" "bytes" "encoding/binary" "encoding/hex" "errors" "io" "os" "sort" "strings" ) var ( // ErrUnsupportedVersion is returned when the pack index file version is not supported ErrUnsupportedVersion = errors.New("idxfile: Unsupported version") // ErrMalformedIdxFile is returned when the pack index file is corrupted or invalid ErrMalformedIdxFile = errors.New("idxfile: Malformed IDX file") ) const ( // fanout is the number of fanout table entries (256 for SHA1/SHA256) fanout = 256 // VersionSupported is the only pack index version supported (v2) // Version 3 supports SHA1/SHA256 hybrid object storage but we only support v2 VersionSupported uint32 = 2 // isO64Mask is used to identify 64-bit offsets in the offset table isO64Mask = uint64(1) << 31 // offsetMask extracts the actual offset value from a 32-bit offset entry offsetMask = int(0x7fffffff) ) var ( // idxHeader is the magic header for Git pack index files: "\xfftOc" idxHeader = []byte{255, 't', 'O', 'c'} ) // validateHeader reads and validates the pack index file header func validateHeader(r io.Reader) error { var h = make([]byte, 4) if _, err := io.ReadFull(r, h); err != nil { return err } if !bytes.Equal(h, idxHeader) { return ErrMalformedIdxFile } return nil } // hashFromIndex extracts object hash from pack index file at the given index // Parameters: // - rs: ReadSeeker for the pack index file // - i: object index position // // Returns the hexadecimal encoded hash string func (a *Auditor) hashFromIndex(rs io.ReadSeeker, i int64) (string, error) { bin := make([]byte, a.rawsz) // Pack index file format v2 offset calculation: // - 4 bytes: magic header // - 4 bytes: version (2) // - 4 bytes: fanout count (256) // - 255*4 bytes: fanout table (256 entries, 4 bytes each) const ob int64 = 4 + 4 + 4 + 255*4 if _, err := rs.Seek(ob+i*a.rawsz, io.SeekStart); err != nil { return "", err } if _, err := io.ReadFull(rs, bin[0:a.rawsz]); err != nil { return "", err } return hex.EncodeToString(bin[0:a.rawsz]), nil } // analyzePack analyzes a single pack file to find large objects // Opens the corresponding .idx file and determines whether to use // 32-bit or 64-bit offset processing based on file size func (a *Auditor) analyzePack(p *pack) error { idx := strings.TrimSuffix(p.path, ".pack") + ".idx" fd, err := os.Open(idx) if err != nil { return err } defer fd.Close() // nolint fi, err := fd.Stat() if err != nil { return err } if err = validateHeader(fd); err != nil { return err } var v, nr uint32 if err := binary.Read(fd, binary.BigEndian, &v); err != nil { return err } if v != VersionSupported { return ErrUnsupportedVersion } if _, err := fd.Seek(255*4, io.SeekCurrent); err != nil { return err } /// number of entries in pack file if err := binary.Read(fd, binary.BigEndian, &nr); err != nil { return err } a.counts += nr /* * Minimum pack index file size calculation: * - 8 bytes of header (4 magic + 4 version) * - 256 fanout entries, 4 bytes each * - object ID entry * nr * - 4-byte crc entry * nr * - 4-byte offset entry * nr * - packfile hash * - file checksum * And after the 4-byte offset table there might be a * variable sized table containing 8-byte entries * for offsets larger than 2^31. */ // hash + offset + crc32 + magic + version + fanout minSize := (a.rawsz+4+4)*int64(nr) + 4 + 4 + 4*fanout + a.rawsz + a.rawsz if minSize < fi.Size() { return a.analyzePack64(fd, nr, p.size) } return a.analyzePack32(fd, nr, p.size) } // analyzePack32 processes pack files with 32-bit offsets (< 2GB) // Uses sorting algorithm to estimate object sizes by comparing consecutive offsets func (a *Auditor) analyzePack32(rs io.ReadSeeker, nr uint32, packsz int64) error { seekTo := int64(nr)*(a.rawsz+4) + 4 + 4 + fanout*4 if _, err := rs.Seek(seekTo, io.SeekStart); err != nil { return err } br := bufio.NewReader(rs) objs := make(object32s, nr) for i := range nr { objs[i].index = i var offset uint32 if err := binary.Read(br, binary.BigEndian, &offset); err != nil { return err } objs[i].offset = offset } sort.Sort(objs) pre := packsz - a.rawsz for _, o := range objs { sz := pre - int64(o.offset) //nolint:unconvert // uint32 -> int64 conversion for size calculation pre = int64(o.offset) //nolint:unconvert // uint32 -> int64 conversion for size calculation if sz > hugeSizeLimit { a.hugeSum += sz } if sz < a.Limit { continue } hs, err := a.hashFromIndex(rs, int64(o.index)) if err != nil { return err } if err := a.onOversized(hs, sz); err != nil { return err } } return nil } // analyzePack64 processes pack files with 64-bit offsets (>= 2GB) // Handles both 32-bit and 64-bit offset entries, using the 64-bit offset table // when the MSB (most significant bit) is set in the 32-bit offset field func (a *Auditor) analyzePack64(rs io.ReadSeeker, nr uint32, packsz int64) error { seekTo := int64(nr)*(a.rawsz+4) + 4 + 4 + fanout*4 if _, err := rs.Seek(seekTo, io.SeekStart); err != nil { return err } bindata := make([]byte, nr*4) if _, err := io.ReadFull(rs, bindata); err != nil { return err } objs := make(object64s, nr) for i := range nr { objs[i].index = i objs[i].offset = int64(binary.BigEndian.Uint32(bindata[i*4:])) // Check if this is a large offset (MSB set) if objs[i].offset&int64(isO64Mask) != 0 { off := objs[i].offset & int64(offsetMask) if _, err := rs.Seek(seekTo+int64(nr)*4+off*8, io.SeekStart); err != nil { return err } if err := binary.Read(rs, binary.BigEndian, &objs[i].offset); err != nil { return err } } } sort.Sort(objs) pre := packsz - a.rawsz for _, o := range objs { sz := pre - o.offset pre = o.offset if sz > hugeSizeLimit { a.hugeSum += sz } if sz < a.Limit { continue } hs, err := a.hashFromIndex(rs, int64(o.index)) if err != nil { return err } if err := a.onOversized(hs, sz); err != nil { return err } } return nil } ================================================ FILE: modules/deflect/struct.go ================================================ package deflect // object32 represents an object in pack files with 32-bit offsets (< 4GB) // The offset and index are used to sort objects by position in pack file type object32 struct { offset uint32 // Object offset in pack file (32-bit) index uint32 // Original object index in pack index file } // object64 represents an object in pack files with 64-bit offsets (>= 4GB) // Used for large pack files where 32-bit offsets are insufficient type object64 struct { offset int64 // Object offset in pack file (64-bit) index uint32 // Original object index in pack index file } // Object size calculation strategy: // Offsets are arranged in ascending order, then subtracted one by one // to estimate the rough size of each object in the pack file. // This provides size estimation without decompressing each object. type object32s []object32 // Len implements sort.Interface for object32s func (o object32s) Len() int { return len(o) } // Less implements sort.Interface for object32s // Descending order by offset (largest offset first) func (o object32s) Less(i, j int) bool { return o[i].offset > o[j].offset } // Swap implements sort.Interface for object32s func (o object32s) Swap(i, j int) { o[i], o[j] = o[j], o[i] } type object64s []object64 // Len implements sort.Interface for object64s func (o object64s) Len() int { return len(o) } // Less implements sort.Interface for object64s // Descending order by offset (largest offset first) func (o object64s) Less(i, j int) bool { return o[i].offset > o[j].offset } // Swap implements sort.Interface for object64s func (o object64s) Swap(i, j int) { o[i], o[j] = o[j], o[i] } ================================================ FILE: modules/diferenco/MERGE_PARALLEL.md ================================================ # MergeParallel 实现文档 > **本实现由 GLM-5 (智谱 AI) 生成** > > MergeParallel 和 HasConflictParallel 是基于 Diff3 论文的三路合并实现, > 由 GLM-5 大语言模型生成并经过全面测试验证和 GPT review 优化。 ## 项目概述 基于 Diff3 论文重新实现了三路合并功能,使用 Go 1.26+ 现代化代码风格,包含全面的测试覆盖和性能优化。 --- ## 文件清单 | 文件 | 行数 | 描述 | |------|------|------| | `merge_parallel.go` | ~420 | 核心三路合并实现(GLM-5 生成),包含 MergeParallel 和 HasConflictParallel | | `merge_parallel_test.go` | 850+ | 完整测试套件 | | `merge_parallel_bench_test.go` | ~140 | 性能基准测试 | --- ## 核心特性 ### 算法设计 ``` MergeParallel() └─> newMergeInternal() ├─> 并行计算两个 diff(O→A, O→B)← 核心优化 ├─> 区域划分算法(O(n log n) 排序 + O(n) 遍历) ├─> 冲突检测(使用实际索引列表,避免 range compression bug) └─> 生成输出(支持 3 种冲突样式) HasConflictParallel() └─> 并行计算 O→A 和 O→B 的 diff ├─> 使用 findMergeRegions 查找合并区域 ├─> 使用 slices.ContainsFunc 快速检测冲突 └─> 返回布尔值(不生成输出,更高效) ``` ### 数据结构 ```go // 使用实际索引列表,避免 range compression bug type mergeRegion struct { start, end int // 在 O 中的范围 changesAIndices []int // 实际的 change 索引列表 changesBIndices []int // 实际的 change 索引列表 isConflict bool } ``` --- ## 性能基准测试 ### MergeParallel vs Merge 性能对比 | 数据规模 | 函数 | 时间 | 内存分配 | 性能对比 | |---------|------|------|---------|---------| | 100 行 | MergeParallel | 63,915 ns/op | 1326 allocs | 基本持平 | | 100 行 | Merge | 60,000 ns/op | 1222 allocs | 基准 | | **1000 行** | **MergeParallel** | **3,974,403 ns/op** | 104,565 allocs | **快 22%** ✅ | | **1000 行** | Merge | 5,123,843 ns/op | 103,553 allocs | 基准 | **结论**: - ✅ **中等规模数据(1000 行)MergeParallel 快 22%** - ✅ 小规模数据两者性能基本持平 - ✅ 内存分配次数相当(MergeParallel 多约 1%) --- ## 已实现的优化 | 优化 | 描述 | 效果 | |------|------|------| | **并行 Diff** | 使用 `errgroup` 并行计算两个 diff | 中等规模快 **28%** | | **实际索引列表** | mergeRegion 使用索引列表而非范围 | 避免 range compression bug | | **零分配冲突处理** | writeConflictRegion 不分配额外切片 | 减少 GC 压力 | | **预分配容量** | 预分配 regions 和 allChanges | 避免切片扩容 | | **标准库优化** | 使用 `slices.ContainsFunc`、`slices.Equal`、`cmp.Compare` | 代码更简洁 | | **结构体内存布局** | 优化 mergeRegion 字段顺序 | 减少 padding,节省 8 bytes/region | --- ## GPT Review 修复的问题 ### 正确性问题 | 问题 | 描述 | 状态 | |------|------|------| | **first change 初始化** | 第一个 change 没有正确计入 region | ✅ 已修复 | | **range compression bug** | 使用 min/max 索引会包含不属于该 region 的 change | ✅ 已修复 | | **overlap 判断** | 使用 `<=` 导致相邻修改被错误合并 | ✅ 已修复为 `<` | | **插入操作 overlap** | 纯插入操作(Del=0)需要特殊处理 | ✅ 已修复 | ### 性能优化 | 问题 | 描述 | 状态 | |------|------|------| | **conflict slice 分配** | writeConflictRegion 每次分配两个切片 | ✅ 已优化为零分配 | | **slices.SortFunc 写法** | 使用 `cmp.Compare` 更简洁 | ✅ 已优化 | | **并行计算无 cancel** | 一个失败另一个继续运行 | ✅ 使用 errgroup | ### 代码质量 | 问题 | 描述 | 状态 | |------|------|------| | **参数命名不清晰** | `idx` 参数难以理解 | ✅ 已改为 `lineIndex` | | **未使用的参数** | findMergeRegions 参数签名简化 | ✅ 已修复 | --- ## 测试覆盖 | 测试套件 | 测试用例 | 通过率 | |---------|---------|--------| | `TestMergeParallelBasic` | 3 | 100% | | `TestMergeParallelVsMerge` | 10 | 100% | | `TestMergeParallelConflictStyles` | 3 | 100% | | `TestMergeParallelAlgorithms` | 5 | 100% | | `TestMergeParallelComplexConflicts` | 4 | 100% | | `TestMergeParallelEdgeModifications` | 6 | 100% | | `TestHasConflictParallel` | 16 | 100% | | **总计** | **62+** | **100%** | --- ## 行为差异说明 ### Merge vs MergeParallel **Overlap 判断差异**: | 情况 | Merge | MergeParallel | |------|-------|---------------| | 相邻删除 (line2 vs line3) | 冲突 | **不冲突** ✅ | | 相邻修改 (line2 vs line3) | 冲突 | **不冲突** ✅ | | 同位置插入不同内容 | 冲突 | 冲突 ✅ | | 同位置插入相同内容 | 冲突 | **不冲突** ✅ | MergeParallel 的行为更符合 diff3 标准:**相邻但不重叠的修改不应该冲突**。 --- ## 使用示例 ```go ctx := context.Background() opts := &MergeOptions{ TextO: "line1\nline2\nline3\n", TextA: "line1a\nline2\nline3\n", TextB: "line1b\nline2\nline3\n", Style: STYLE_DEFAULT, A: Histogram, } result, hasConflict, err := MergeParallel(ctx, opts) if err != nil { log.Fatal(err) } if hasConflict { log.Println("合并有冲突") } fmt.Println(result) ``` --- ## 文件目录 ``` modules/diferenco/ ├── merge.go # 原始 Merge 实现 ├── merge_parallel.go # MergeParallel 实现(GLM-5 生成,并行优化) ├── merge_parallel_test.go # 完整测试套件 ├── merge_parallel_bench_test.go # 性能基准测试 └── MERGE_PARALLEL.md # 本文档 ``` --- ## 最终评分 (GPT Review) | 方面 | 评分 | |------|------| | 算法正确性 | 9/10 ✅ | | 性能 | 8.5/10 ✅ | | 代码结构 | 9/10 ✅ | | Go idiomatic | 9/10 ✅ | | **综合评分** | **9/10** | > **GPT 评价**:这版已经是可以直接发布为库的 diff3 merge 实现了。 --- **完成日期**: 2026-03-16 **Go 版本**: 1.21+ **生成模型**: GLM-5 (智谱 AI) **Review**: GPT-4 (OpenAI) **审核**: CodeFuse AI Assistant ================================================ FILE: modules/diferenco/README.md ================================================ # Diferenco - Advanced Diff Algorithms [![Go Version](https://img.shields.io/badge/Go-1.22+-00ADD8?style=flat&logo=go)](https://golang.org) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](../../LICENSE) **Diferenco** is a comprehensive diff and merge library for Go that provides multiple algorithms for computing differences between sequences. It supports text, rune-level, and word-level diffing, along with three-way merge capabilities. **Diferenco** 是一个全面的 Go 语言 diff 和 merge 库,提供多种算法来计算序列之间的差异。支持文本、字符级和词级 diff,以及三路合并功能。 ## Features / 特性 - **Multiple Diff Algorithms / 多种 Diff 算法** - **Myers** - Classic O(ND) algorithm, good for general use / 经典 O(ND) 算法,适合通用场景 - **Histogram** - Fast and accurate, optimized for small files / 快速准确,针对小文件优化 - **ONP** - O(NP) algorithm, efficient for large files with few changes / O(NP) 算法,适合大文件少改动 - **Patience** - Unique-line based, best for code with reordering / 唯一行算法,适合代码重排序 - **Minimal** - Simple implementation for basic use cases / 简单实现,适合基础场景 - **SuffixArray** - LCS-based, efficient for text and binary data / 基于 LCS,适合文本和二进制数据 - **Multi-level Diffing / 多级 Diff** - Line-level diff / 行级 diff - Rune-level diff (character-based) / 字符级 diff - Word-level diff / 词级 diff - **Advanced Features / 高级特性** - Three-way merge (diff3) / 三路合并 - Unified diff output / 统一 diff 输出 - Multiple conflict styles / 多种冲突样式 - Context cancellation support / 支持上下文取消 - Character set detection / 字符集检测 ## Installation / 安装 ```bash go get github.com/antgroup/hugescm/modules/diferenco ``` ## Quick Start / 快速开始 ### Basic Line Diff / 基本行级 Diff ```go package main import ( "context" "fmt" "github.com/antgroup/hugescm/modules/diferenco" ) func main() { ctx := context.Background() before := []string{ "Hello, World!", "This is line 2", "This is line 3", } after := []string{ "Hello, World!", "This is modified line 2", "This is line 3", "This is new line 4", } // Compute diff using Histogram algorithm / 使用 Histogram 算法计算 diff changes, err := diferenco.DiffSlices(ctx, before, after, diferenco.Histogram) if err != nil { panic(err) } // Print changes / 打印变更 for _, change := range changes { if change.Del > 0 { fmt.Printf("Deleted %d lines at position %d\n", change.Del, change.P1) } if change.Ins > 0 { fmt.Printf("Inserted %d lines at position %d\n", change.Ins, change.P2) } } } ``` ## Algorithm Comparison / 算法对比 | Algorithm | Time Complexity | Space Complexity | Best For | |-----------|----------------|------------------|----------| | **Myers** | O(ND) | O(D) | General use, balanced performance / 通用场景,均衡性能 | | **Histogram** | O(N log N) | O(N) | Small files, high accuracy / 小文件,高精度 | | **ONP** | O(NP) | O(N) | Large files with few changes / 大文件少改动 | | **Patience** | O(N log N) | O(N) | Code with reordering, unique lines / 代码重排序,唯一行 | | **Minimal** | O(N²) | O(N) | Simple use cases / 简单场景 | | **SuffixArray** | O((N+M) log N) | O(N) | Text and binary data, LCS / 文本和二进制,LCS | > N = total length, D = edit distance, P = number of changes / N=总长度,D=编辑距离,P=改动数 ## Algorithm Details / 算法详解 ### Myers Algorithm / Myers 算法 **English:** The Myers algorithm, developed by Eugene Myers in 1986, is the classic diff algorithm used by Git. It finds the **shortest edit script (SES)** between two sequences. **Core Idea / 核心思想:** - Build an **edit graph** where each point (x,y) represents matching sequence1[0..x] with sequence2[0..y] - Find the **shortest path** from (0,0) to (N,M) - Diagonal moves (↘) are "free" (matching elements) - Horizontal (→) = deletion, Vertical (↓) = insertion **Implementation / 实现:** ``` sequence1 (N) ──────────────── │ . . . . . . . . │ . . . . . . . . sequence│ . . . . ────────► 2 (M) │ . . . .│ D │ │ . . . .│ │ ▼ . . . .└─────┘ (x,y) = endpoint ``` **Time Complexity / 时间复杂度:** O(ND) where D is the edit distance - Worst case: O(N×M) when sequences are completely different - Best case: O(N+M) when sequences are identical **Pros / 优点:** - Produces minimal edit scripts / 产生最小编辑脚本 - Well-tested, stable / 经过充分测试,稳定 **Cons / 缺点:** - Can be slow for large files with many changes / 大文件多改动时可能较慢 - May produce unstable diffs with moved blocks / 移动块可能产生不稳定 diff --- ### Histogram Algorithm / Histogram 算法 **English:** The Histogram algorithm is Git's default diff algorithm since 2010. It's based on the **patience diff** but uses **token frequency analysis** to find matches more intelligently. **Core Idea / 核心思想:** 1. Build a **histogram** of token occurrences in both sequences 2. Find the **least frequent token** (most unique) to start matching 3. Extend matches forward and backward to find longest common subsequences 4. Recursively process unmatched regions **Key Optimization / 关键优化:** ```go // Prefer longest match first, then lowest occurrences for stability // 优先最长匹配,长度相同时选择出现次数最少的(更稳定) if length > s.lcs.length || (length == s.lcs.length && occurrences < s.minOccurrences) { // select this match / 选择此匹配 } ``` **Time Complexity / 时间复杂度:** O(N log N) average case **Pros / 优点:** - Fast for most real-world cases / 大多数实际场景很快 - Produces clean, readable diffs / 产生清晰可读的 diff - Avoids cross-matches / 避免交叉匹配 **Cons / 缺点:** - Can degrade to O(N²) in worst case / 最坏情况可能退化为 O(N²) --- ### ONP Algorithm / ONP 算法 **English:** The ONP (O(NP) Sequence Comparison) algorithm, developed by Sun Wu, Udi Manber, and Gene Myers, optimizes for the case where sequences have **few differences**. **Core Idea / 核心思想:** - Similar to Myers but optimizes for **small P** (number of changes) - Uses a **greedy approach** with snake optimization - Performance scales with **edit distance**, not total size **Key Formula / 关键公式:** ``` Time = O((N+M) * D) where D is edit distance = O(NP) where P is min(N,M) for worst case ``` **Implementation / 实现:** ```go // Uses furthest reaching path in each diagonal // 使用每条对角线上最远可达路径 V[k] = furthest X value on diagonal k ``` **Pros / 优点:** - Extremely fast for similar sequences / 相似序列极快 - Memory efficient / 内存高效 **Cons / 缺点:** - Slow for completely different sequences / 完全不同序列较慢 --- ### Patience Algorithm / Patience 算法 **English:** The Patience algorithm, developed by Bram Cohen (creator of BitTorrent), focuses on finding **unique lines** as "anchors" and uses **LIS (Longest Increasing Subsequence)** to maintain order. **Core Idea / 核心思想:** 1. Find lines that appear **exactly once** in both sequences (unique lines) 2. Match unique lines between sequences 3. Use **LIS** to find the longest sequence of matches that preserve order 4. Recursively diff the regions between anchors **Why "Patience"? / 为什么叫 "Patience"?** Named after the card game "Patience" (Solitaire), as the algorithm resembles sorting cards. **Implementation / 实现:** ```go // 1. Find unique lines / 找出唯一行 for i, e := range a { if count[e] == 1 { // unique element / 唯一元素 } } // 2. LIS using binary search (O(N log N)) // 2. 使用二分查找的 LIS 算法 (O(N log N)) tails := make([]int, 0) for _, p := range pairs { // binary search / 二分查找 lo, hi := 0, len(tails) for lo < hi { mid := (lo + hi) / 2 if pairs[tails[mid]].j < p.j { lo = mid + 1 } else { hi = mid } } } ``` **Time Complexity / 时间复杂度:** - LIS: O(N log N) (optimized) / 优化后 - Overall: O(N log N) average case **Pros / 优点:** - Excellent for code with moved blocks / 适合移动块的代码 - Stable diffs, avoids jitter / 稳定的 diff,避免抖动 - Good for merge operations / 适合合并操作 **Cons / 缺点:** - May miss non-unique matches / 可能错过非唯一匹配 - Requires enough unique lines / 需要足够多的唯一行 --- ### Minimal Algorithm / Minimal 算法 **English:** A simple implementation focused on correctness and ease of understanding. Uses a straightforward dynamic programming approach. **Core Idea / 核心思想:** - Build a **DP table** where `dp[i][j]` = LCS length for seq1[0..i] and seq2[0..j] - Backtrack to find the actual changes **Implementation / 实现:** ```go // DP table / DP 表 for i := 1; i <= len(a); i++ { for j := 1; j <= len(b); j++ { if a[i-1] == b[j-1] { dp[i][j] = dp[i-1][j-1] + 1 } else { dp[i][j] = max(dp[i-1][j], dp[i][j-1]) } } } ``` **Time Complexity / 时间复杂度:** O(N×M) **Pros / 优点:** - Simple, easy to understand / 简单易懂 - Good for learning / 适合学习 **Cons / 缺点:** - Slow for large inputs / 大输入较慢 - O(N×M) memory / O(N×M) 内存 --- ### SuffixArray Algorithm / SuffixArray 算法 **English:** The SuffixArray algorithm uses a **suffix array** data structure to find the **longest common substring (LCS)** between sequences. This is different from LCS (Longest Common Subsequence). **Core Idea / 核心思想:** 1. Build a **suffix array** for the first sequence 2. For each position in the second sequence, find the longest match in the suffix array 3. Recursively process unmatched regions **Suffix Array / 后缀数组:** ``` Text: "banana" Suffixes: Sorted Suffixes: banana [0] a [5] anana [1] ana [3] nana [2] anana [1] ana [3] banana [0] na [4] na [4] a [5] nana [2] Suffix Array: [5, 3, 1, 0, 4, 2] ``` **Implementation / 实现:** ```go // Build suffix array using comparison sort // 使用比较排序构建后缀数组 slices.SortFunc(indices, func(i, j int) int { return cmp.Compare(s[i], s[j]) }) // Find longest match using binary search // 使用二分查找找最长匹配 slices.BinarySearchFunc(sa, target, func(idx int, target E) int { return cmp.Compare(data[idx], target) }) ``` **Time Complexity / 时间复杂度:** O((N+M) log N) - Suffix array construction: O(N log N) - Finding matches: O(M log N) **Pros / 优点:** - Efficient for text and binary data / 文本和二进制数据高效 - Good for finding repeated patterns / 适合查找重复模式 - Works with comparable types / 适用于可比较类型 **Cons / 缺点:** - Requires `cmp.Ordered` types (int, string, etc.) / 需要 cmp.Ordered 类型 - Falls back to ONP for unsupported types / 不支持类型回退到 ONP --- ## Algorithm Selection Guide / 算法选择指南 ### By Use Case / 按场景选择 | Use Case / 场景 | Recommended Algorithm / 推荐算法 | |-----------------|-------------------------------| | General purpose / 通用 | Myers, Histogram | | Large files, few changes / 大文件少改动 | ONP | | Code review, moved blocks / 代码审查,移动块 | Patience | | Binary data / 二进制数据 | SuffixArray | | Text with repeated patterns / 重复模式文本 | SuffixArray, Histogram | | Small files / 小文件 | Histogram | | Learning/Debugging / 学习/调试 | Minimal | ### By Performance / 按性能选择 ``` Few Changes (D small) / 少改动: ONP > Histogram ≈ Patience > Myers > SuffixArray > Minimal Many Changes (D large) / 多改动: Histogram > Patience > SuffixArray > Myers > ONP > Minimal Large Files (N large) / 大文件: ONP > SuffixArray > Histogram > Patience > Myers > Minimal ``` ## Advanced Usage / 高级用法 ### Unified Diff Output / 统一 Diff 输出 ```go opts := &diferenco.Options{ From: &diferenco.File{ Name: "old.txt", Hash: "abc123", Mode: 0644, }, To: &diferenco.File{ Name: "new.txt", Hash: "def456", Mode: 0644, }, S1: "old file content", S2: "new file content", A: diferenco.Histogram, } unified, err := diferenco.Unified(ctx, opts) if err != nil { panic(err) } fmt.Println(unified.String()) ``` ### Character-level Diff / 字符级 Diff ```go ctx := context.Background() a := "The quick brown fox jumps over the lazy dog" b := "The quick brown dog leaps over the lazy cat" diffs, err := diferenco.DiffRunes(ctx, a, b, diferenco.Histogram) if err != nil { panic(err) } for _, diff := range diffs { switch diff.Type { case diferenco.Equal: fmt.Print(diff.Text) case diferenco.Insert: fmt.Printf("\x1b[32m%s\x1b[0m", diff.Text) // Green / 绿色 case diferenco.Delete: fmt.Printf("\x1b[31m%s\x1b[0m", diff.Text) // Red / 红色 } } ``` ### Three-way Merge / 三路合并 ```go opts := &diferenco.MergeOptions{ TextO: "Base content", // Original / 原始 TextA: "Branch A content", // Your changes / 你的改动 TextB: "Branch B content", // Their changes / 他人的改动 LabelO: "base", LabelA: "yours", LabelB: "theirs", A: diferenco.Histogram, } // Using classic merge / 使用经典合并 result, hasConflicts, err := diferenco.Merge(ctx, opts) if err != nil { panic(err) } if hasConflicts { fmt.Println("Merge conflicts detected! / 检测到合并冲突!") } else { fmt.Println("Merge successful! / 合并成功!") } fmt.Println(result) ``` ### Modern Three-way Merge (Recommended) / 现代三路合并(推荐) ```go // MergeParallel uses Go 1.26+ modern code style with better readability // MergeParallel 使用 Go 1.26+ 现代代码风格,可读性更好 result, hasConflicts, err := diferenco.MergeParallel(ctx, opts) ``` ### Fast Conflict Detection / 快速冲突检测 ```go // Only check for conflicts without generating merged result // 仅检查冲突,不生成合并结果(更高效) hasConflicts, err := diferenco.HasConflictParallel(ctx, textO, textA, textB) if err != nil { panic(err) } if hasConflicts { fmt.Println("Conflicts detected! / 检测到冲突!") } ``` ### Context Cancellation / 上下文取消 ```go ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() changes, err := diferenco.DiffSlices(ctx, largeBefore, largeAfter, diferenco.Myers) if err == context.DeadlineExceeded { fmt.Println("Diff operation timed out / Diff 操作超时") } ``` ## Performance Tips / 性能建议 1. **Choose the right algorithm / 选择正确的算法** - Histogram for small files (< 5000 lines) / 小文件 (< 5000 行) - ONP for large files with few changes / 大文件少改动 - Patience for code with reordering / 代码重排序 - SuffixArray for text/binary data / 文本/二进制数据 2. **Pre-process when possible / 预处理** - Remove trailing whitespace / 移除尾部空白 - Normalize line endings / 规范化行结束符 - Filter out comments if appropriate / 适当过滤注释 3. **Use context with timeout / 使用带超时的上下文** - Prevent long-running operations / 防止长时间运行 - Handle cancellation gracefully / 优雅处理取消 ## Testing / 测试 ```bash # Run all tests / 运行所有测试 go test ./... # Run with race detector / 运行竞态检测 go test -race ./... # Run benchmarks / 运行基准测试 go test -bench=. -benchmem ``` ## API Reference / API 参考 ### Diff Functions / Diff 函数 ```go // Generic slice diff (recommended) / 泛型切片 diff(推荐) func DiffSlices[E comparable](ctx context.Context, a, b []E, algo Algorithm) ([]Change, error) // Rune-level diff / 字符级 diff func DiffRunes(ctx context.Context, a, b string, algo Algorithm) ([]StringDiff, error) // Word-level diff / 词级 diff func DiffWords(ctx context.Context, a, b string, algo Algorithm, splitFunc func(string) []string) ([]StringDiff, error) // Unified diff output / 统一 diff 输出 func Unified(ctx context.Context, opts *Options) (*Patch, error) // Get file statistics / 获取文件统计 func Stat(ctx context.Context, opts *Options) (*FileStat, error) ``` ### Merge Functions / 合并函数 ```go // Classic three-way merge / 经典三路合并 func Merge(ctx context.Context, opts *MergeOptions) (string, bool, error) // GLM three-way merge (Go 1.26+) / GLM 三路合并 func MergeParallel(ctx context.Context, opts *MergeOptions) (string, bool, error) // Fast conflict detection / 快速冲突检测 func HasConflictParallel(ctx context.Context, textO, textA, textB string) (bool, error) ``` ### Algorithm Selection / 算法选择 ```go // Parse algorithm name / 解析算法名称 func AlgorithmFromName(s string) (Algorithm, error) // Available algorithms / 可用算法 const ( Unspecified Algorithm = iota // Auto-select / 自动选择 Histogram // Default for small files / 小文件默认 ONP // Large files, few changes / 大文件少改动 Myers // Classic algorithm / 经典算法 Minimal // Simple implementation / 简单实现 Patience // Code with reordering / 代码重排序 SuffixArray // Text and binary / 文本和二进制 ) ``` ## Project Structure / 项目结构 ``` modules/diferenco/ ├── diferenco.go # Core functionality and public API / 核心功能和公共 API ├── myers.go # Myers algorithm / Myers 算法 ├── histogram.go # Histogram algorithm / Histogram 算法 ├── onp.go # ONP algorithm / ONP 算法 ├── patience.go # Patience algorithm / Patience 算法 ├── minimal.go # Minimal algorithm / Minimal 算法 ├── suffixarray.go # SuffixArray algorithm / SuffixArray 算法 ├── merge.go # Classic three-way merge / 经典三路合并 ├── merge_parallel.go # Modern three-way merge with parallel diff / 现代三路合并(并行计算) ├── sink.go # Line parsing and indexing / 行解析和索引 ├── text.go # Text processing and charset detection / 文本处理和字符集检测 ├── unified.go # Unified diff output / 统一 diff 输出 ├── unified_encoder.go # Unified diff encoder / 统一 diff 编码器 ├── unicode.go # Unicode utilities (CJK/Emoji) / Unicode 工具 ├── color/ # Color output utilities / 颜色输出工具 │ └── color.go └── lcs/ # LCS implementation / LCS 实现 ├── common.go ├── labels.go ├── old.go └── sequence.go ``` ## License / 许可证 Apache License 2.0 - see [LICENSE](../../LICENSE) for details. Apache License 2.0 - 详见 [LICENSE](../../LICENSE)。 ## Acknowledgments / 致谢 - Myers algorithm inspired by [Microsoft VSCode](https://github.com/microsoft/vscode) - Histogram algorithm based on [imara-diff](https://github.com/pascalkuthe/imara-diff) - ONP algorithm from [hattya/go.diff](https://github.com/hattya/go.diff) - Patience algorithm based on [Peter Evans' implementation](https://github.com/peter-evans/patience) - SuffixArray algorithm inspired by [diff-match-patch](https://github.com/google/diff-match-patch) ================================================ FILE: modules/diferenco/algorithms_bench_test.go ================================================ package diferenco import ( "context" "fmt" "math/rand" "strings" "testing" ) // Benchmark helpers to generate test data func generateSequence(size int, changeRate float64) []string { seq := make([]string, size) for i := range size { if rand.Float64() < changeRate { seq[i] = fmt.Sprintf("item_%d_variant", i) } else { seq[i] = fmt.Sprintf("item_%d", i) } } return seq } func generateModifiedSequence(base []string, changeRate float64) []string { modified := make([]string, len(base)) copy(modified, base) for i := range modified { if rand.Float64() < changeRate { modified[i] = fmt.Sprintf("modified_%d", i) } } return modified } // BenchmarkMyersAlgorithm benchmarks the Myers algorithm func BenchmarkMyersAlgorithm(b *testing.B) { ctx := context.Background() algos := []struct { name string algo Algorithm size int change float64 }{ {"small_10pct_change", Myers, 100, 0.1}, {"small_50pct_change", Myers, 100, 0.5}, {"medium_10pct_change", Myers, 1000, 0.1}, {"medium_50pct_change", Myers, 1000, 0.5}, {"large_10pct_change", Myers, 5000, 0.1}, {"large_50pct_change", Myers, 5000, 0.5}, } for _, tt := range algos { b.Run(tt.name, func(b *testing.B) { before := generateSequence(tt.size, 0) after := generateModifiedSequence(before, tt.change) b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, tt.algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkHistogramAlgorithm benchmarks the Histogram algorithm func BenchmarkHistogramAlgorithm(b *testing.B) { ctx := context.Background() algos := []struct { name string algo Algorithm size int change float64 }{ {"small_10pct_change", Histogram, 100, 0.1}, {"small_50pct_change", Histogram, 100, 0.5}, {"medium_10pct_change", Histogram, 1000, 0.1}, {"medium_50pct_change", Histogram, 1000, 0.5}, {"large_10pct_change", Histogram, 5000, 0.1}, {"large_50pct_change", Histogram, 5000, 0.5}, } for _, tt := range algos { b.Run(tt.name, func(b *testing.B) { before := generateSequence(tt.size, 0) after := generateModifiedSequence(before, tt.change) b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, tt.algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkONPAlgorithm benchmarks the ONP algorithm func BenchmarkONPAlgorithm(b *testing.B) { ctx := context.Background() algos := []struct { name string algo Algorithm size int change float64 }{ {"small_10pct_change", ONP, 100, 0.1}, {"small_50pct_change", ONP, 100, 0.5}, {"medium_10pct_change", ONP, 1000, 0.1}, {"medium_50pct_change", ONP, 1000, 0.5}, {"large_10pct_change", ONP, 5000, 0.1}, {"large_50pct_change", ONP, 5000, 0.5}, } for _, tt := range algos { b.Run(tt.name, func(b *testing.B) { before := generateSequence(tt.size, 0) after := generateModifiedSequence(before, tt.change) b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, tt.algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkPatienceAlgorithm benchmarks the Patience algorithm func BenchmarkPatienceAlgorithm(b *testing.B) { ctx := context.Background() algos := []struct { name string algo Algorithm size int change float64 }{ {"small_10pct_change", Patience, 100, 0.1}, {"small_50pct_change", Patience, 100, 0.5}, {"medium_10pct_change", Patience, 1000, 0.1}, {"medium_50pct_change", Patience, 1000, 0.5}, {"large_10pct_change", Patience, 5000, 0.1}, {"large_50pct_change", Patience, 5000, 0.5}, } for _, tt := range algos { b.Run(tt.name, func(b *testing.B) { before := generateSequence(tt.size, 0) after := generateModifiedSequence(before, tt.change) b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, tt.algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkMinimalAlgorithm benchmarks the Minimal algorithm func BenchmarkMinimalAlgorithm(b *testing.B) { ctx := context.Background() algos := []struct { name string algo Algorithm size int change float64 }{ {"small_10pct_change", Minimal, 100, 0.1}, {"small_50pct_change", Minimal, 100, 0.5}, {"medium_10pct_change", Minimal, 1000, 0.1}, {"medium_50pct_change", Minimal, 1000, 0.5}, {"large_10pct_change", Minimal, 5000, 0.1}, {"large_50pct_change", Minimal, 5000, 0.5}, } for _, tt := range algos { b.Run(tt.name, func(b *testing.B) { before := generateSequence(tt.size, 0) after := generateModifiedSequence(before, tt.change) b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, tt.algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkSuffixArrayAlgorithm benchmarks the SuffixArray algorithm func BenchmarkSuffixArrayAlgorithm(b *testing.B) { ctx := context.Background() algos := []struct { name string algo Algorithm size int change float64 }{ {"small_10pct_change", SuffixArray, 100, 0.1}, {"small_50pct_change", SuffixArray, 100, 0.5}, {"medium_10pct_change", SuffixArray, 1000, 0.1}, {"medium_50pct_change", SuffixArray, 1000, 0.5}, {"large_10pct_change", SuffixArray, 5000, 0.1}, {"large_50pct_change", SuffixArray, 5000, 0.5}, } for _, tt := range algos { b.Run(tt.name, func(b *testing.B) { before := generateSequence(tt.size, 0) after := generateModifiedSequence(before, tt.change) b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, tt.algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkAlgorithmComparison compares all algorithms with the same input func BenchmarkAlgorithmComparison(b *testing.B) { ctx := context.Background() sizes := []int{100, 1000, 5000} changeRates := []float64{0.1, 0.5} for _, size := range sizes { for _, changeRate := range changeRates { before := generateSequence(size, 0) after := generateModifiedSequence(before, changeRate) name := fmt.Sprintf("size_%d_change_%.0f", size, changeRate*100) b.Run(name+"_myers", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, Myers) } }) b.Run(name+"_histogram", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, Histogram) } }) b.Run(name+"_onp", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, ONP) } }) b.Run(name+"_patience", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, Patience) } }) b.Run(name+"_suffixarray", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, SuffixArray) } }) } } } // BenchmarkSpecialCases benchmarks special edge cases func BenchmarkSpecialCases(b *testing.B) { ctx := context.Background() // Benchmark identical inputs b.Run("identical", func(b *testing.B) { input := generateSequence(1000, 0) b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, input, input, Myers) } }) // Benchmark completely different inputs b.Run("completely_different", func(b *testing.B) { before := generateSequence(1000, 0) after := generateSequence(1000, 1) b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, Myers) } }) // Benchmark single insertion b.Run("single_insertion", func(b *testing.B) { before := generateSequence(1000, 0) after := make([]string, len(before)+1) copy(after[:500], before[:500]) after[500] = "inserted_line" copy(after[501:], before[500:]) b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, Myers) } }) // Benchmark single deletion b.Run("single_deletion", func(b *testing.B) { before := generateSequence(1000, 0) after := make([]string, len(before)-1) copy(after[:500], before[:500]) copy(after[500:], before[501:]) b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, before, after, Myers) } }) } // BenchmarkDiffRunes benchmarks rune-level diff func BenchmarkDiffRunes(b *testing.B) { ctx := context.Background() tests := []struct { name string algo Algorithm a string b string }{ {"small_myers", Myers, "Hello World", "Hello There"}, {"small_histogram", Histogram, "Hello World", "Hello There"}, {"medium_myers", Myers, strings.Repeat("Hello World ", 100), strings.Repeat("Hello There ", 100)}, {"medium_histogram", Histogram, strings.Repeat("Hello World ", 100), strings.Repeat("Hello There ", 100)}, {"large_myers", Myers, strings.Repeat("Hello World ", 1000), strings.Repeat("Hello There ", 1000)}, {"large_histogram", Histogram, strings.Repeat("Hello World ", 1000), strings.Repeat("Hello There ", 1000)}, } for _, tt := range tests { b.Run(tt.name, func(b *testing.B) { b.ResetTimer() for range b.N { _, err := DiffRunes(ctx, tt.a, tt.b, tt.algo) if err != nil { b.Fatalf("DiffRunes() error = %v", err) } } }) } } // BenchmarkDiffWords benchmarks word-level diff func BenchmarkDiffWords(b *testing.B) { ctx := context.Background() tests := []struct { name string algo Algorithm a string b string }{ {"small_myers", Myers, "The quick brown fox", "The quick brown dog"}, {"small_histogram", Histogram, "The quick brown fox", "The quick brown dog"}, {"medium_myers", Myers, strings.Repeat("The quick brown fox jumps ", 50), strings.Repeat("The quick brown dog jumps ", 50)}, {"medium_histogram", Histogram, strings.Repeat("The quick brown fox jumps ", 50), strings.Repeat("The quick brown dog jumps ", 50)}, {"large_myers", Myers, strings.Repeat("The quick brown fox jumps ", 500), strings.Repeat("The quick brown dog jumps ", 500)}, {"large_histogram", Histogram, strings.Repeat("The quick brown fox jumps ", 500), strings.Repeat("The quick brown dog jumps ", 500)}, } for _, tt := range tests { b.Run(tt.name, func(b *testing.B) { b.ResetTimer() for range b.N { _, err := DiffWords(ctx, tt.a, tt.b, tt.algo, nil) if err != nil { b.Fatalf("DiffWords() error = %v", err) } } }) } } // BenchmarkHelperFunctions benchmarks helper functions func BenchmarkHelperFunctions(b *testing.B) { // Benchmark commonPrefixLength b.Run("commonPrefixLength", func(b *testing.B) { a := generateSequence(1000, 0) b_ := generateSequence(1000, 0.1) b.ResetTimer() for range b.N { _ = commonPrefixLength(a, b_) } }) // Benchmark commonSuffixLength b.Run("commonSuffixLength", func(b *testing.B) { a := generateSequence(1000, 0) b_ := generateSequence(1000, 0.1) b.ResetTimer() for range b.N { _ = commonSuffixLength(a, b_) } }) } // BenchmarkWithRealWorldData simulates real-world diff scenarios func BenchmarkWithRealWorldData(b *testing.B) { ctx := context.Background() // Simulate code file with function changes codeBefore := ` package main import "fmt" func main() { fmt.Println("Hello, World!") greet("Alice") greet("Bob") process(100) } func greet(name string) { fmt.Printf("Hello, %s!\n", name) } func process(n int) { for range n { fmt.Println("processed") } } ` codeAfter := ` package main import "fmt" func main() { fmt.Println("Hello, World!") greet("Alice") greet("Charlie") process(1000) cleanup() } func greet(name string) { fmt.Printf("Greetings, %s!\n", name) } func process(n int) { for i := range n { fmt.Printf("Processing: %d\n", i) } } func cleanup() { fmt.Println("Cleaning up...") } ` b.Run("code_diff_myers", func(b *testing.B) { beforeLines := splitLines(codeBefore) afterLines := splitLines(codeAfter) b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, beforeLines, afterLines, Myers) } }) b.Run("code_diff_histogram", func(b *testing.B) { beforeLines := splitLines(codeBefore) afterLines := splitLines(codeAfter) b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, beforeLines, afterLines, Histogram) } }) // Simulate text document changes textBefore := strings.Repeat("This is a sample document with some content. ", 100) textAfter := strings.Replace(textBefore, "sample", "detailed", 10) textAfter = strings.Replace(textAfter, "content", "information", 15) b.Run("text_diff_runes", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffRunes(ctx, textBefore, textAfter, Histogram) } }) b.Run("text_diff_words", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffWords(ctx, textBefore, textAfter, Histogram, nil) } }) } // BenchmarkMemoryAllocation benchmarks memory allocation patterns func BenchmarkMemoryAllocation(b *testing.B) { ctx := context.Background() algos := []Algorithm{Myers, Histogram, ONP, Patience, SuffixArray} for _, algo := range algos { b.Run(algo.String(), func(b *testing.B) { before := generateSequence(1000, 0) after := generateModifiedSequence(before, 0.3) b.ReportAllocs() b.ResetTimer() for range b.N { _, err := DiffSlices(ctx, before, after, algo) if err != nil { b.Fatalf("DiffSlices() error = %v", err) } } }) } } // BenchmarkParallel benchmarks parallel execution func BenchmarkParallel(b *testing.B) { ctx := context.Background() before := generateSequence(1000, 0) after := generateModifiedSequence(before, 0.3) b.RunParallel(func(pb *testing.PB) { for pb.Next() { _, err := DiffSlices(ctx, before, after, Myers) if err != nil { b.Fatal(err) } } }) } // Helper function to split text into lines func splitLines(text string) []string { lines := make([]string, 0) start := 0 for i, r := range text { if r == '\n' { lines = append(lines, text[start:i]) start = i + 1 } } if start < len(text) { lines = append(lines, text[start:]) } return lines } // In Go 1.20+, the random generator is automatically seeded ================================================ FILE: modules/diferenco/color/color.go ================================================ package color // TODO read colors from a github.com/go-git/go-git/plumbing/format/config.Config struct // TODO implement color parsing, see https://github.com/git/git/blob/v2.47.1/color.c import "maps" // Colors. See https://github.com/git/git/blob/v2.47.1/color.h#L25-L66. const ( Normal = "" Reset = "\033[0m" Bold = "\033[1m" Black = "\033[30m" Red = "\033[31m" Green = "\033[32m" Yellow = "\033[33m" Blue = "\033[34m" Magenta = "\033[35m" Cyan = "\033[36m" White = "\033[37m" Default = "\033[39m" BoldBlack = "\033[1;30m" BoldRed = "\033[1;31m" BoldGreen = "\033[1;32m" BoldYellow = "\033[1;33m" BoldBlue = "\033[1;34m" BoldMagenta = "\033[1;35m" BoldCyan = "\033[1;36m" BoldWhite = "\033[1;37m" BoldDefault = "\033[1;39m" FaintBlack = "\033[2;30m" FaintRed = "\033[2;31m" FaintGreen = "\033[2;32m" FaintYellow = "\033[2;33m" FaintBlue = "\033[2;34m" FaintMagenta = "\033[2;35m" FaintCyan = "\033[2;36m" FaintWhite = "\033[2;37m" FaintDefault = "\033[2;39m" BgBlack = "\033[40m" BgRed = "\033[41m" BgGreen = "\033[42m" BgYellow = "\033[43m" BgBlue = "\033[44m" BgMagenta = "\033[45m" BgCyan = "\033[46m" BgWhite = "\033[47m" BgDefault = "\033[49m" Faint = "\033[2m" FaintItalic = "\033[2;3m" Reverse = "\033[7m" ) // A ColorKey is a key into a ColorConfig map and also equal to the key in the // diff.color subsection of the config. See // https://github.com/git/git/blob/v2.26.2/diff.c#L83-L106. type ColorKey string // ColorKeys. const ( Context ColorKey = "context" Meta ColorKey = "meta" Frag ColorKey = "frag" Old ColorKey = "old" New ColorKey = "new" Commit ColorKey = "commit" Whitespace ColorKey = "whitespace" Func ColorKey = "func" OldMoved ColorKey = "oldMoved" OldMovedAlternative ColorKey = "oldMovedAlternative" OldMovedDimmed ColorKey = "oldMovedDimmed" OldMovedAlternativeDimmed ColorKey = "oldMovedAlternativeDimmed" NewMoved ColorKey = "newMoved" NewMovedAlternative ColorKey = "newMovedAlternative" NewMovedDimmed ColorKey = "newMovedDimmed" NewMovedAlternativeDimmed ColorKey = "newMovedAlternativeDimmed" ContextDimmed ColorKey = "contextDimmed" OldDimmed ColorKey = "oldDimmed" NewDimmed ColorKey = "newDimmed" ContextBold ColorKey = "contextBold" OldBold ColorKey = "oldBold" NewBold ColorKey = "newBold" ) // A ColorConfig is a color configuration. A nil or empty ColorConfig // corresponds to no color. type ColorConfig map[ColorKey]string // A ColorConfigOption sets an option on a ColorConfig. type ColorConfigOption func(ColorConfig) // WithColor sets the color for key. func WithColor(key ColorKey, color string) ColorConfigOption { return func(cc ColorConfig) { cc[key] = color } } // defaultColorConfig is the default color configuration. See // https://github.com/git/git/blob/v2.26.2/diff.c#L57-L81. var defaultColorConfig = ColorConfig{ Context: Normal, Meta: Bold, Frag: Cyan, Old: Red, New: Green, Commit: Yellow, Whitespace: BgRed, Func: Normal, OldMoved: BoldMagenta, OldMovedAlternative: BoldBlue, OldMovedDimmed: Faint, OldMovedAlternativeDimmed: FaintItalic, NewMoved: BoldCyan, NewMovedAlternative: BoldYellow, NewMovedDimmed: Faint, NewMovedAlternativeDimmed: FaintItalic, ContextDimmed: Faint, OldDimmed: FaintRed, NewDimmed: FaintGreen, ContextBold: Bold, OldBold: BoldRed, NewBold: BoldGreen, } // NewColorConfig returns a new ColorConfig. func NewColorConfig(options ...ColorConfigOption) ColorConfig { cc := make(ColorConfig) maps.Copy(cc, defaultColorConfig) for _, option := range options { option(cc) } return cc } // Reset returns the ANSI escape sequence to reset the color with key set from // cc. If no color was set then no reset is needed so it returns the empty // string. func (cc ColorConfig) Reset(key ColorKey) string { if cc[key] == "" { return "" } return Reset } ================================================ FILE: modules/diferenco/diferenco.go ================================================ package diferenco import ( "context" "errors" "fmt" "io" "slices" "strings" ) // https://github.com/Wilfred/difftastic/wiki/Line-Based-Diffs // https://neil.fraser.name/writing/diff/ // https://prettydiff.com/2/guide/unrelated_diff.xhtml // https://blog.robertelder.org/diff-algorithm/ // https://news.ycombinator.com/item?id=33417466 // Operation defines the operation of a diff item. type Operation int8 const ( // Delete item represents a delete hunk. Delete Operation = -1 // Insert item represents an insert hunk. Insert Operation = 1 // Equal item represents an equal hunk. Equal Operation = 0 ) type Algorithm int const ( Unspecified Algorithm = iota Histogram ONP Myers Minimal Patience SuffixArray ) var ( // ErrUnknownAlgorithm is returned when an unknown algorithm name or value is specified ErrUnknownAlgorithm = errors.New("unknown algorithm") ) var ( algorithmValueMap = map[string]Algorithm{ "histogram": Histogram, "onp": ONP, "myers": Myers, "patience": Patience, "minimal": Minimal, "suffixarray": SuffixArray, } algorithmNameMap = map[Algorithm]string{ Unspecified: "unspecified", Histogram: "histogram", ONP: "onp", Myers: "myers", Minimal: "minimal", Patience: "patience", SuffixArray: "suffixarray", } ) func (a Algorithm) String() string { n, ok := algorithmNameMap[a] if ok { return n } return "unspecified" } func AlgorithmFromName(s string) (Algorithm, error) { s = strings.TrimSpace(strings.ToLower(s)) if a, ok := algorithmValueMap[s]; ok { return a, nil } // Provide helpful error message with available options var options []string for name := range algorithmValueMap { options = append(options, name) } slices.Sort(options) return Unspecified, fmt.Errorf("%w: '%s' (available options: %s)", ErrUnknownAlgorithm, s, strings.Join(options, ", ")) } // commonPrefixLength returns the length of the common prefix of two T slices. func commonPrefixLength[E comparable](a, b []E) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } // commonSuffixLength returns the length of the common suffix of two rune slices. func commonSuffixLength[E comparable](a, b []E) int { i1, i2 := len(a), len(b) n := min(i1, i2) i := 0 for i < n && a[i1-1-i] == b[i2-1-i] { i++ } return i } type Change struct { P1 int // before: position in before P2 int // after: position in after Del int // number of elements that deleted from a Ins int // number of elements that inserted into b } // StringDiff represents one diff operation type StringDiff struct { Type Operation Text string } type FileStat struct { Addition, Deletion, Hunks int Name string } type Options struct { From, To *File S1, S2 string R1, R2 io.Reader A Algorithm // algorithm } // Name returns the filename from To or From. func (o *Options) Name() string { if o.To != nil && o.To.Name != "" { return o.To.Name } if o.From != nil && o.From.Name != "" { return o.From.Name } return "" } // DiffSlices computes the differences between two slices using the specified algorithm. // For Unspecified algorithm, it automatically selects Histogram for small inputs (< 5000 elements) // or ONP for larger inputs. func DiffSlices[E comparable](ctx context.Context, L1, L2 []E, algo Algorithm) ([]Change, error) { // Check context before starting select { case <-ctx.Done(): return nil, ctx.Err() default: } // Select algorithm based on input size switch algo { case Unspecified: // Automatically select best algorithm based on input size if len(L1) < 5000 && len(L2) < 5000 { return histogram(ctx, L1, L2) } return onp(ctx, L1, L2) case Histogram: return histogram(ctx, L1, L2) case ONP: return onp(ctx, L1, L2) case Myers: return myers(ctx, L1, L2) case Minimal: return minimal(ctx, L1, L2) case Patience: return patience(ctx, L1, L2) case SuffixArray: return suffixArray(ctx, L1, L2) default: return nil, fmt.Errorf("%w: %s", ErrUnknownAlgorithm, algo.String()) } } func Stat(ctx context.Context, opts *Options) (*FileStat, error) { sink := &Sink{ Index: make(map[string]int), } a, err := sink.parseLines(opts.R1, opts.S1) if err != nil { return nil, err } b, err := sink.parseLines(opts.R2, opts.S2) if err != nil { return nil, err } changes, err := DiffSlices(ctx, a, b, opts.A) if err != nil { return nil, err } stats := &FileStat{ Hunks: len(changes), Name: opts.Name(), } for _, ch := range changes { stats.Addition += ch.Ins stats.Deletion += ch.Del } return stats, nil } func DiffRunes(ctx context.Context, a, b string, algo Algorithm) ([]StringDiff, error) { runesA := []rune(a) runesB := []rune(b) changes, err := DiffSlices(ctx, runesA, runesB, algo) if err != nil { return nil, err } diffs := make([]StringDiff, 0, 10) i := 0 for _, c := range changes { if i < c.P1 { diffs = append(diffs, StringDiff{Type: Equal, Text: string(runesA[i:c.P1])}) } if c.Del != 0 { diffs = append(diffs, StringDiff{Type: Delete, Text: string(runesA[c.P1 : c.P1+c.Del])}) } if c.Ins != 0 { diffs = append(diffs, StringDiff{Type: Insert, Text: string(runesB[c.P2 : c.P2+c.Ins])}) } i = c.P1 + c.Del } if i < len(runesA) { diffs = append(diffs, StringDiff{Type: Equal, Text: string(runesA[i:])}) } return diffs, nil } func DiffWords(ctx context.Context, a, b string, algo Algorithm, splitFunc func(string) []string) ([]StringDiff, error) { if splitFunc == nil { splitFunc = SplitWords } wordsA := splitFunc(a) wordsB := splitFunc(b) changes, err := DiffSlices(ctx, wordsA, wordsB, algo) if err != nil { return nil, err } diffs := make([]StringDiff, 0, 10) i := 0 for _, c := range changes { if i < c.P1 { diffs = append(diffs, StringDiff{Type: Equal, Text: strings.Join(wordsA[i:c.P1], "")}) } if c.Del != 0 { diffs = append(diffs, StringDiff{Type: Delete, Text: strings.Join(wordsA[c.P1:c.P1+c.Del], "")}) } if c.Ins != 0 { diffs = append(diffs, StringDiff{Type: Insert, Text: strings.Join(wordsB[c.P2:c.P2+c.Ins], "")}) } i = c.P1 + c.Del } if i < len(wordsA) { diffs = append(diffs, StringDiff{Type: Equal, Text: strings.Join(wordsA[i:], "")}) } return diffs, nil } ================================================ FILE: modules/diferenco/diferenco_test.go ================================================ package diferenco import ( "fmt" "os" "path/filepath" "runtime" "testing" "time" "github.com/antgroup/hugescm/modules/diferenco/color" ) func TestDiff(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) aa := []Algorithm{Histogram, Myers, ONP, Patience} for _, a := range aa { now := time.Now() u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", }, To: nil, S1: textA, S2: textB, A: a, }) if err != nil { return } fmt.Fprintf(os.Stderr, "\x1b[32m%s --> use time: %v\x1b[0m\n%s\n", a, time.Since(now), u) } } func TestPatchFD(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) fd, err := os.Open(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } defer fd.Close() // nolint bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, R1: fd, S2: textB, }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestPatch(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, S1: textA, S2: textB, }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestPatchNew(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) u, err := Unified(t.Context(), &Options{ From: nil, To: &File{ Name: "a.txt", Hash: "6547898", Mode: 0o10644, }, S1: "", S2: textB, }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestPatchDelete(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", Hash: "6547898", Mode: 0o10644, }, To: nil, S1: textA, S2: "", }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestDiff2(t *testing.T) { textA := `hello world foo c07e640b246c7885cbc3d5c627acbcb2d2ab9c95` textB := `hello novel world foo bar 31df1778815171897c907daf454c4419cfaa46f9` u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", Hash: "6547898", Mode: 0o10644, }, To: nil, S1: textA, S2: textB, }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestPatchScss(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/simple_1.scss")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/simple_2.scss")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, S1: textA, S2: textB, }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestPatchCss(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/css_1.css")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/css_2.css")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) u, err := Unified(t.Context(), &Options{ From: &File{ Name: "a.txt", Hash: "4789568", Mode: 0o10644, }, To: &File{ Name: "b.txt", Hash: "6547898", Mode: 0o10644, }, S1: textA, S2: textB, }) if err != nil { return } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestShowPatch(t *testing.T) { patch := []*Patch{ { From: &File{ Name: "docs/a.png", Hash: "1ab12893fc666524ed79caae503e12c20a748e2f92db7730c8be09d981970f96", Mode: 33188, }, IsBinary: true, }, { To: &File{ Name: "images/windows7.iso", Hash: "adba50d9794b9ef3f7ec8cbc680f7f1fa3fbf9df0ac8d1f9b9ccab6d941bc11b", Mode: 33188, }, IsFragments: true, }, } e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode(patch) } func TestDiffRunes(t *testing.T) { a := "The quick brown fox jumps over the lazy dog" b := "The quick brown dog leaps over the lazy cat" sd, err := DiffRunes(t.Context(), a, b, ONP) if err != nil { fmt.Fprintf(os.Stderr, "diff error: %v\n", err) return } for _, d := range sd { switch d.Type { case Equal: fmt.Fprintf(os.Stderr, "%s", d.Text) case Insert: fmt.Fprintf(os.Stderr, "\x1b[32m%s\x1b[0m", d.Text) case Delete: fmt.Fprintf(os.Stderr, "\x1b[31m%s\x1b[0m", d.Text) } } fmt.Fprintf(os.Stderr, "\n") } func TestDiffWords(t *testing.T) { a := "The quick brown fox jumps over the lazy dog" b := "The quick brown dog leaps over the lazy cat" sd, err := DiffWords(t.Context(), a, b, Histogram, nil) if err != nil { fmt.Fprintf(os.Stderr, "diff error: %v\n", err) return } for _, d := range sd { switch d.Type { case Equal: fmt.Fprintf(os.Stderr, "%s", d.Text) case Insert: fmt.Fprintf(os.Stderr, "\x1b[32m%s\x1b[0m", d.Text) case Delete: fmt.Fprintf(os.Stderr, "\x1b[31m%s\x1b[0m", d.Text) } } fmt.Fprintf(os.Stderr, "\n") } func TestDiffWords2(t *testing.T) { a := "The quick 你好brown fox jumps over the lazy dog" b := "The quick 你好 brown dog leaps over the lazy cat" sd, err := DiffWords(t.Context(), a, b, Histogram, nil) if err != nil { fmt.Fprintf(os.Stderr, "diff error: %v\n", err) return } for _, d := range sd { switch d.Type { case Equal: fmt.Fprintf(os.Stderr, "%s", d.Text) case Insert: fmt.Fprintf(os.Stderr, "\x1b[32m%s\x1b[0m", d.Text) case Delete: fmt.Fprintf(os.Stderr, "\x1b[31m%s\x1b[0m", d.Text) } } fmt.Fprintf(os.Stderr, "\n") } ================================================ FILE: modules/diferenco/gen_unicode.go ================================================ //go:build ignore package main import ( "bufio" "bytes" "fmt" "io" "net/http" "os" "sort" "strconv" "strings" ) const ( eastAsianWidthURL = "https://unicode.org/Public/UNIDATA/EastAsianWidth.txt" emojiDataURL = "https://unicode.org/Public/UNIDATA/emoji/emoji-data.txt" outputFile = "unicode_data.go" outputPackage = "diferenco" ) type interval struct { first rune last rune } func main() { if err := run(); err != nil { fmt.Fprintf(os.Stderr, "gen_unicode: %v\n", err) os.Exit(1) } } func run() error { cjkRanges, err := fetchRanges(eastAsianWidthURL, func(prop string) bool { return prop == "W" || prop == "F" }) if err != nil { return fmt.Errorf("load EastAsianWidth data: %w", err) } emojiRanges, err := fetchRanges(emojiDataURL, func(prop string) bool { return prop == "Extended_Pictographic" || prop == "Emoji" || prop == "Emoji_Component" }) if err != nil { return fmt.Errorf("load emoji data: %w", err) } var buf bytes.Buffer writeHeader(&buf) writeIntervals(&buf, "cjkRanges", cjkRanges) writeIntervals(&buf, "emojiRanges", emojiRanges) if err := writeFileAtomically(outputFile, buf.Bytes(), 0o644); err != nil { return fmt.Errorf("write %s: %w", outputFile, err) } return nil } func fetchRanges(url string, wantProperty func(string) bool) ([]interval, error) { resp, err := http.Get(url) if err != nil { return nil, fmt.Errorf("GET %s: %w", url, err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 4<<10)) return nil, fmt.Errorf("GET %s: status %s: %s", url, resp.Status, strings.TrimSpace(string(body))) } var ranges []interval scanner := bufio.NewScanner(resp.Body) for lineNo := 1; scanner.Scan(); lineNo++ { line := stripComment(scanner.Text()) if line == "" { continue } codePointPart, propertyPart, ok := strings.Cut(line, ";") if !ok { continue } property := strings.TrimSpace(propertyPart) if !wantProperty(property) { continue } r, err := parseInterval(strings.TrimSpace(codePointPart)) if err != nil { return nil, fmt.Errorf("%s:%d: %w", url, lineNo, err) } ranges = append(ranges, r) } if err := scanner.Err(); err != nil { return nil, fmt.Errorf("scan %s: %w", url, err) } return mergeIntervals(ranges), nil } func stripComment(s string) string { s, _, _ = strings.Cut(s, "#") return strings.TrimSpace(s) } func parseInterval(s string) (interval, error) { if start, end, ok := strings.Cut(s, ".."); ok { first, err := parseHexRune(start) if err != nil { return interval{}, fmt.Errorf("invalid range start %q: %w", start, err) } last, err := parseHexRune(end) if err != nil { return interval{}, fmt.Errorf("invalid range end %q: %w", end, err) } if first > last { return interval{}, fmt.Errorf("invalid range %q: start > end", s) } return interval{first: first, last: last}, nil } r, err := parseHexRune(s) if err != nil { return interval{}, fmt.Errorf("invalid code point %q: %w", s, err) } return interval{first: r, last: r}, nil } func parseHexRune(s string) (rune, error) { v, err := strconv.ParseUint(strings.TrimSpace(s), 16, 32) if err != nil { return 0, err } return rune(v), nil } func mergeIntervals(ranges []interval) []interval { if len(ranges) == 0 { return nil } sort.Slice(ranges, func(i, j int) bool { if ranges[i].first != ranges[j].first { return ranges[i].first < ranges[j].first } return ranges[i].last < ranges[j].last }) out := make([]interval, 0, len(ranges)) out = append(out, ranges[0]) for _, r := range ranges[1:] { last := &out[len(out)-1] if r.first <= last.last+1 { if r.last > last.last { last.last = r.last } continue } out = append(out, r) } return out } func writeHeader(w io.Writer) { fmt.Fprintln(w, "// Code generated by gen_unicode.go. DO NOT EDIT.") fmt.Fprintln(w) fmt.Fprintf(w, "package %s\n\n", outputPackage) } func writeIntervals(w io.Writer, name string, ranges []interval) { fmt.Fprintf(w, "var %s = []interval{\n", name) for _, r := range ranges { fmt.Fprintf(w, "\t{0x%04X, 0x%04X},\n", r.first, r.last) } fmt.Fprintln(w, "}") fmt.Fprintln(w) } func writeFileAtomically(name string, data []byte, perm os.FileMode) error { tmp := name + ".tmp" if err := os.WriteFile(tmp, data, perm); err != nil { return err } if err := os.Rename(tmp, name); err != nil { _ = os.Remove(tmp) return err } return nil } ================================================ FILE: modules/diferenco/histogram.go ================================================ // Refer to https://github.com/pascalkuthe/imara-diff reimplemented in Golang. package diferenco import "context" // https://stackoverflow.com/questions/32365271/whats-the-difference-between-git-diff-patience-and-git-diff-histogram/32367597#32367597 // https://arxiv.org/abs/1902.02467 const MaxChainLen = 63 type histogramIndex[E comparable] struct { tokenOccurrences map[E][]int } func (h *histogramIndex[E]) populate(a []E) { for i, e := range a { if p, ok := h.tokenOccurrences[e]; ok { h.tokenOccurrences[e] = append(p, i) continue } h.tokenOccurrences[e] = []int{i} } } func (h *histogramIndex[E]) numTokenOccurrences(e E) int { if p, ok := h.tokenOccurrences[e]; ok { return len(p) } return 0 } func (h *histogramIndex[E]) clear() { // runtime: clear() is slow for maps with big capacity and small number of items // https://github.com/golang/go/issues/70617 h.tokenOccurrences = make(map[E][]int) } type lcsMatch struct { beforeStart int afterStart int length int } type lcsFinder[E comparable] struct { lcs lcsMatch minOccurrences int foundCS bool } func (s *lcsFinder[E]) run(before, after []E, h *histogramIndex[E]) { pos := 0 for pos < len(after) { e := after[pos] if num := h.numTokenOccurrences(e); num != 0 { s.foundCS = true if num <= s.minOccurrences { pos = s.updateLcs(before, after, pos, e, h) continue } } pos++ } h.clear() } func (s *lcsFinder[E]) updateLcs(before, after []E, afterPos int, token E, h *histogramIndex[E]) int { nextTokenIndex2 := afterPos + 1 tokenOccurrences := h.tokenOccurrences[token] tokenIndex1 := tokenOccurrences[0] pos := 1 occurrencesIter: for { occurrences := h.numTokenOccurrences(token) s1, s2 := tokenIndex1, afterPos for s1 != 0 && s2 != 0 { t1, t2 := before[s1-1], after[s2-1] if t1 != t2 { break } s1-- s2-- occurrences = min(h.numTokenOccurrences(t1), occurrences) } e1, e2 := tokenIndex1+1, afterPos+1 for e1 < len(before) && e2 < len(after) { t1, t2 := before[e1], after[e2] if t1 != t2 { break } occurrences = min(h.numTokenOccurrences(t1), occurrences) e1++ e2++ } if nextTokenIndex2 < e2 { nextTokenIndex2 = e2 } length := e2 - s2 // Heuristic: prefer longest match first, then lowest occurrences for stability if length > s.lcs.length || (length == s.lcs.length && occurrences < s.minOccurrences) { s.minOccurrences = occurrences s.lcs = lcsMatch{ beforeStart: s1, afterStart: s2, length: length, } } for { if pos >= len(tokenOccurrences) { break occurrencesIter } nextTokenIndex := tokenOccurrences[pos] pos++ if nextTokenIndex > e2 { tokenIndex1 = nextTokenIndex break } } } return nextTokenIndex2 } func (s *lcsFinder[E]) ok() bool { return !s.foundCS || s.minOccurrences <= MaxChainLen } func findLcs[E comparable](before, after []E, index *histogramIndex[E]) *lcsMatch { s := lcsFinder[E]{ minOccurrences: MaxChainLen + 1, } s.run(before, after, index) if s.ok() { return &s.lcs } return nil } type changesOut struct { changes []Change } func (h *histogramIndex[E]) run(ctx context.Context, before []E, beforePos int, after []E, afterPos int, o *changesOut) error { for { select { case <-ctx.Done(): return ctx.Err() default: } if len(before) == 0 { if len(after) != 0 { o.changes = append(o.changes, Change{P1: beforePos, P2: afterPos, Ins: len(after)}) } return nil } if len(after) == 0 { o.changes = append(o.changes, Change{P1: beforePos, P2: afterPos, Del: len(before)}) return nil } h.populate(before) lcs := findLcs(before, after, h) if lcs == nil { changes, err := onpCompute(ctx, before, beforePos, after, afterPos) if err != nil { return err } o.changes = append(o.changes, changes...) return nil } if lcs.length == 0 { o.changes = append(o.changes, Change{P1: beforePos, P2: afterPos, Del: len(before), Ins: len(after)}) return nil } if err := h.run(ctx, before[:lcs.beforeStart], beforePos, after[:lcs.afterStart], afterPos, o); err != nil { return err } e1 := lcs.beforeStart + lcs.length before = before[e1:] beforePos += e1 e2 := lcs.afterStart + lcs.length after = after[e2:] afterPos += e2 } } // histogram: calculates the difference using the histogram algorithm func histogram[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) { prefix := commonPrefixLength(L1, L2) L1 = L1[prefix:] L2 = L2[prefix:] suffix := commonSuffixLength(L1, L2) L1 = L1[:len(L1)-suffix] L2 = L2[:len(L2)-suffix] h := &histogramIndex[E]{ tokenOccurrences: make(map[E][]int, len(L1)), } o := &changesOut{changes: make([]Change, 0, 100)} if err := h.run(ctx, L1, prefix, L2, prefix, o); err != nil { return nil, err } return o.changes, nil } ================================================ FILE: modules/diferenco/histogram_test.go ================================================ package diferenco import ( "fmt" "os" "path/filepath" "runtime" "testing" "github.com/antgroup/hugescm/modules/diferenco/color" ) func TestHistogram(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestHistogramGit(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig()), WithVCS("git")) _ = e.Encode([]*Patch{u}) } func TestHistogram2(t *testing.T) { lines1 := `A x A A A x A A A` lines2 := `A x A Z A x A A A` sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(lines1) b := sink.SplitLines(lines2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestHistogram3(t *testing.T) { lines1 := `a b c a b c` lines2 := `x b z a b c` sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(lines1) b := sink.SplitLines(lines2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } func TestHistogram4(t *testing.T) { lines1 := `a b c a b c a b c` lines2 := `a b c a1 a2 a3 b c1 a b c` sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(lines1) b := sink.SplitLines(lines2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } // TestHistogramHeuristic demonstrates the improved heuristic effect func TestHistogramHeuristic(t *testing.T) { // Case 1: Multiple potential anchors - should pick the most unique one t.Log("\n=== Case 1: Prefer unique anchor over common lines ===") t.Log("Before optimization: might pick any matching line") t.Log("After optimization: picks the most unique (lowest occurrences) line") { text1 := `start unique_anchor middle common common end` text2 := `start unique_anchor middle common end` sink := &Sink{Index: make(map[string]int)} a := sink.SplitLines(text1) b := sink.SplitLines(text2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) // Verify: should have 1 delete totalDel := 0 for _, c := range changes { totalDel += c.Del } t.Logf("Result: %d changes, %d deletions (expected: 1 deletion)", len(changes), totalDel) } // Case 2: Longer match vs more unique match - prefer longer t.Log("\n=== Case 2: Prefer longer match over more unique ===") t.Log("Before optimization: might pick shorter unique match") t.Log("After optimization: picks the longest common substring") { text1 := `header block_start line1 line2 line3 block_end trailer` text2 := `header block_start line1 line2 line3 block_end new_trailer` sink := &Sink{Index: make(map[string]int)} a := sink.SplitLines(text1) b := sink.SplitLines(text2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) totalDel, totalIns := 0, 0 for _, c := range changes { totalDel += c.Del totalIns += c.Ins } t.Logf("Result: %d changes, %d deletions, %d insertions", len(changes), totalDel, totalIns) t.Logf("Expected: 1 delete (trailer) + 1 insert (new_trailer)") } // Case 3: Cross-match scenario - classic diff problem t.Log("\n=== Case 3: Cross-match avoidance ===") t.Log("Without heuristic: might match wrong braces") t.Log("With heuristic: matches unique function signatures correctly") { text1 := `func foo() { return 1; } func bar() { return 2; }` text2 := `func foo() { return 1; } func bar() { return 99; }` sink := &Sink{Index: make(map[string]int)} a := sink.SplitLines(text1) b := sink.SplitLines(text2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) totalDel, totalIns := 0, 0 for _, c := range changes { totalDel += c.Del totalIns += c.Ins } t.Logf("Result: %d deletions, %d insertions (expected: 1 del + 1 ins)", totalDel, totalIns) } // Case 4: Identical repeated blocks - stability test t.Log("\n=== Case 4: Repeated blocks stability ===") t.Log("Multiple identical blocks should be matched correctly") { text1 := `block { a b } block { a b }` text2 := `block { a X } block { a Y }` sink := &Sink{Index: make(map[string]int)} a := sink.SplitLines(text1) b := sink.SplitLines(text2) changes, _ := DiffSlices(t.Context(), a, b, Histogram) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) t.Logf("Result: %d changes (expected: 2 changes - one per block)", len(changes)) } } ================================================ FILE: modules/diferenco/lcs/LICENSE ================================================ Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: modules/diferenco/lcs/common.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "log" "sort" ) // lcs is a longest common sequence type lcs []diag // A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i l[j].Len }) return l } // validate that the elements of the lcs do not overlap // (can only happen when the two-sided algorithm ends early) // expects the lcs to be sorted func (l lcs) valid() bool { for i := 1; i < len(l); i++ { if l[i-1].X+l[i-1].Len > l[i].X { return false } if l[i-1].Y+l[i-1].Len > l[i].Y { return false } } return true } // repair overlapping lcs // only called if two-sided stops early func (l lcs) fix() lcs { // from the set of diagonals in l, find a maximal non-conflicting set // this problem may be NP-complete, but we use a greedy heuristic, // which is quadratic, but with a better data structure, could be D log D. // independent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs // which has to have monotone x and y if len(l) == 0 { return nil } sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len }) tmp := make(lcs, 0, len(l)) tmp = append(tmp, l[0]) for i := 1; i < len(l); i++ { var dir direction nxt := l[i] for _, in := range tmp { if dir, nxt = overlap(in, nxt); dir == empty || dir == bad { break } } if nxt.Len > 0 && dir != bad { tmp = append(tmp, nxt) } } tmp.sort() if false && !tmp.valid() { // debug checking log.Fatalf("here %d", len(tmp)) } return tmp } type direction int const ( empty direction = iota // diag is empty (so not in lcs) leftdown // proposed acceptably to the left and below rightup // proposed diag is acceptably to the right and above bad // proposed diag is inconsistent with the lcs so far ) // overlap trims the proposed diag prop so it doesn't overlap with // the existing diag that has already been added to the lcs. func overlap(exist, prop diag) (direction, diag) { if prop.X <= exist.X && exist.X < prop.X+prop.Len { // remove the end of prop where it overlaps with the X end of exist delta := prop.X + prop.Len - exist.X prop.Len -= delta if prop.Len <= 0 { return empty, prop } } if exist.X <= prop.X && prop.X < exist.X+exist.Len { // remove the beginning of prop where overlaps with exist delta := exist.X + exist.Len - prop.X prop.Len -= delta if prop.Len <= 0 { return empty, prop } prop.X += delta prop.Y += delta } if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len { // remove the end of prop that overlaps (in Y) with exist delta := prop.Y + prop.Len - exist.Y prop.Len -= delta if prop.Len <= 0 { return empty, prop } } if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len { // remove the beginning of peop that overlaps with exist delta := exist.Y + exist.Len - prop.Y prop.Len -= delta if prop.Len <= 0 { return empty, prop } prop.X += delta // no test reaches this code prop.Y += delta } if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y { return leftdown, prop } if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y { return rightup, prop } // prop can't be in an lcs that contains exist return bad, prop } // manipulating Diag and lcs // prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs // or to its first Diag. prepend is only called to extend diagonals // the backward direction. func (lcs lcs) prepend(x, y int) lcs { if len(lcs) > 0 { d := &lcs[0] if d.X == x+1 && d.Y == y+1 { // extend the diagonal down and to the left d.X, d.Y = x, y d.Len++ return lcs } } r := diag{X: x, Y: y, Len: 1} lcs = append([]diag{r}, lcs...) return lcs } // append appends a diagonal, or extends the existing one. // by adding the edge (x,y)-(x+1.y+1). append is only called // to extend diagonals in the forward direction. func (lcs lcs) append(x, y int) lcs { if len(lcs) > 0 { last := &lcs[len(lcs)-1] // Expand last element if adjoining. if last.X+last.Len == x && last.Y+last.Len == y { last.Len++ return lcs } } return append(lcs, diag{X: x, Y: y, Len: 1}) } // enforce constraint on d, k func ok(d, k int) bool { return d >= 0 && -d <= k && k <= d } ================================================ FILE: modules/diferenco/lcs/common_test.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "log" "math/rand/v2" "slices" "strings" "testing" ) type Btest struct { a, b string lcs []string } var Btests = []Btest{ {"aaabab", "abaab", []string{"abab", "aaab"}}, {"aabbba", "baaba", []string{"aaba"}}, {"cabbx", "cbabx", []string{"cabx", "cbbx"}}, {"c", "cb", []string{"c"}}, {"aaba", "bbb", []string{"b"}}, {"bbaabb", "b", []string{"b"}}, {"baaabb", "bbaba", []string{"bbb", "baa", "bab"}}, {"baaabb", "abbab", []string{"abb", "bab", "aab"}}, {"baaba", "aaabba", []string{"aaba"}}, {"ca", "cba", []string{"ca"}}, {"ccbcbc", "abba", []string{"bb"}}, {"ccbcbc", "aabba", []string{"bb"}}, {"ccb", "cba", []string{"cb"}}, {"caef", "axe", []string{"ae"}}, {"bbaabb", "baabb", []string{"baabb"}}, // Example from Myers: {"abcabba", "cbabac", []string{"caba", "baba", "cbba"}}, {"3456aaa", "aaa", []string{"aaa"}}, {"aaa", "aaa123", []string{"aaa"}}, {"aabaa", "aacaa", []string{"aaaa"}}, {"1a", "a", []string{"a"}}, {"abab", "bb", []string{"bb"}}, {"123", "ab", []string{""}}, {"a", "b", []string{""}}, {"abc", "123", []string{""}}, {"aa", "aa", []string{"aa"}}, {"abcde", "12345", []string{""}}, {"aaa3456", "aaa", []string{"aaa"}}, {"abcde", "12345a", []string{"a"}}, {"ab", "123", []string{""}}, {"1a2", "a", []string{"a"}}, // for two-sided {"babaab", "cccaba", []string{"aba"}}, {"aabbab", "cbcabc", []string{"bab"}}, {"abaabb", "bcacab", []string{"baab"}}, {"abaabb", "abaaaa", []string{"abaa"}}, {"bababb", "baaabb", []string{"baabb"}}, {"abbbaa", "cabacc", []string{"aba"}}, {"aabbaa", "aacaba", []string{"aaaa", "aaba"}}, } func init() { log.SetFlags(log.Lshortfile) } func check(t *testing.T, str string, lcs lcs, want []string) { t.Helper() if !lcs.valid() { t.Errorf("bad lcs %v", lcs) } var got strings.Builder for _, dd := range lcs { got.WriteString(str[dd.X : dd.X+dd.Len]) } ans := got.String() if slices.Contains(want, ans) { return } t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans) } func checkDiffs(t *testing.T, before string, diffs []Diff, after string) { t.Helper() var ans strings.Builder sofar := 0 // index of position in before for _, d := range diffs { if sofar < d.Start { ans.WriteString(before[sofar:d.Start]) } ans.WriteString(after[d.ReplStart:d.ReplEnd]) sofar = d.End } ans.WriteString(before[sofar:]) if ans.String() != after { t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after) } } func lcslen(l lcs) int { ans := 0 for _, d := range l { ans += d.Len } return ans } // return a random string of length n made of characters from s func randstr(s string, n int) string { src := []rune(s) x := make([]rune, n) for i := range n { x[i] = src[rand.IntN(len(src))] } return string(x) } func TestLcsFix(t *testing.T) { tests := []struct{ before, after lcs }{ {lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}}, {lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}}, {lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}}, {lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}}, {lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}}, {lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}}, {lcs{}, lcs{}}, {lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}}, } for n, x := range tests { got := x.before.fix() if len(got) != len(x.after) { t.Errorf("got %v, expected %v, for %v", got, x.after, x.before) } olen := lcslen(x.after) glen := lcslen(got) if olen != glen { t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before) } } } ================================================ FILE: modules/diferenco/lcs/doc.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // package lcs contains code to find longest-common-subsequences // (and diffs) package lcs /* Compute longest-common-subsequences of two slices A, B using algorithms from Myers' paper. A longest-common-subsequence (LCS from now on) of A and B is a maximal set of lexically increasing pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but they all have the same length. An LCS determines a sequence of edits that changes A into B. The key concept is the edit graph of A and B. If A has length N and B has length M, then the edit graph has vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a horizontal edge from v[i][j] to v[i+1][j] whenever both are in the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly. When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1]. A path between in the graph between (0,0) and (N,M) determines a sequence of edits converting A into B: each horizontal edge corresponds to removing an element of A, and each vertical edge corresponds to inserting an element of B. A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph is of length D if it has D non-diagonal edges. The algorithms generate forward paths (in which at least one of x,y increases at each edge), or backward paths (in which at least one of x,y decreases at each edge), or a combination. (Note that the orientation is the traditional mathematical one, with the origin in the lower-left corner.) Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.) ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ b | | | ___/‾‾‾ | ___/‾‾‾ | | | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ c | | | | | | | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a a b b a a The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at the end of a maximal path of length D. (Because x-y=k it suffices to remember only the x coordinate of the vertex.) The forward algorithm: Find the longest diagonal starting at (0,0) and label its end with D=0,k=0. From that vertex take a vertical step and then follow the longest diagonal (up and to the right), and label that vertex with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow the longest diagonal (up and to the right) and label that vertex D=1,k=1. In the same way, having labelled all the D vertices, from a vertex labelled D,k find two vertices tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same diagonal, in which case take the one with the larger x. Eventually the path gets to (N,M), and the diagonals on it are the LCS. Here is the edit graph with the ends of D-paths labelled. (So, for instance, 0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first step is to go up the longest diagonal from (0,0).) A:"aabbaa", B:"aacaba" ⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6) a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙ b | | | ___/‾‾‾ | ___/‾‾‾ | | | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙ c | | | | | | | ⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2) a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a a b b a a The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected, there are 4 non-diagonal steps, and the diagonals form an LCS. There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon): A:"aabbaa", B:"aacaba" ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙ b | | | ____/‾‾‾ | ____/‾‾‾ | | | ⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | (:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙ c | | | | | | | ⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | (:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | (:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙ a a b b a a Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short. We want to control how big D can be, by stopping when it gets too large. The forward algorithm then privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable asymmetry. Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in the edit graph look like. A:"aabbaa", B:"aacaba" ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙ b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙ c | | | | | | | ⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ a a b b a a The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward 2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path. Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path. If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointedness may fail and the two computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two to form a best-effort LCS. In the worst case the forward partial LCS may have to be recomputed. */ /* Eugene Myers paper is titled "An O(ND) Difference Algorithm and Its Variations" and can be found at http://www.xmailserver.org/diff2.pdf (There is a generic implementation of the algorithm the repository with git hash b9ad7e4ade3a686d608e44475390ad428e60e7fc) */ ================================================ FILE: modules/diferenco/lcs/git.sh ================================================ #!/bin/bash # # Copyright 2022 The Go Authors. All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. # # Creates a zip file containing all numbered versions # of the commit history of a large source file, for use # as input data for the tests of the diff algorithm. # # Run script from root of the x/tools repo. set -eu # WARNING: This script will install the latest version of $file # The largest real source file in the x/tools repo. # file=internal/golang/completion/completion.go # file=internal/golang/diagnostics.go file=internal/protocol/tsprotocol.go tmp=$(mktemp -d) git log $file | awk '/^commit / {print $2}' | nl -ba -nrz | while read n hash; do git checkout --quiet $hash $file cp -f $file $tmp/$n done (cd $tmp && zip -q - *) > testdata.zip rm -fr $tmp git restore --staged $file git restore $file echo "Created testdata.zip" ================================================ FILE: modules/diferenco/lcs/labels.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "fmt" ) // For each D, vec[D] has length D+1, // and the label for (D, k) is stored in vec[D][(D+k)/2]. type label struct { vec [][]int } // Temporary checking DO NOT COMMIT true TO PRODUCTION CODE const debug = false // debugging. check that the (d,k) pair is valid // (that is, -d<=k<=d and d+k even) func checkDK(D, k int) { if k >= -D && k <= D && (D+k)%2 == 0 { return } panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k)) } func (t *label) set(D, k, x int) { if debug { checkDK(D, k) } for len(t.vec) <= D { t.vec = append(t.vec, nil) } if t.vec[D] == nil { t.vec[D] = make([]int, D+1) } t.vec[D][(D+k)/2] = x // known that D+k is even } func (t *label) get(d, k int) int { if debug { checkDK(d, k) } return t.vec[d][(d+k)/2] } func newtriang(limit int) label { if limit < 100 { // Preallocate if limit is not large. return label{vec: make([][]int, limit)} } return label{} } ================================================ FILE: modules/diferenco/lcs/old.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs // TODO(adonovan): remove unclear references to "old" in this package. import ( "fmt" ) // A Diff is a replacement of a portion of A by a portion of B. type Diff struct { Start, End int // offsets of portion to delete in A ReplStart, ReplEnd int // offset of replacement text in B } // DiffStrings returns the differences between two strings. // It does not respect rune boundaries. func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) } // DiffBytes returns the differences between two byte sequences. // It does not respect rune boundaries. func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) } // DiffRunes returns the differences between two rune sequences. func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) } func diff(seqs sequences) []Diff { // A limit on how deeply the LCS algorithm should search. The value is just a guess. const maxDiffs = 100 diff, _ := compute(seqs, twosided, maxDiffs/2) return diff } func DiffSlices[E comparable](a, b []E) []Diff { return diff(comparableSeqs[E]{a, b}) } // compute computes the list of differences between two sequences, // along with the LCS. It is exercised directly by tests. // The algorithm is one of {forward, backward, twosided}. func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) { if limit <= 0 { limit = 1 << 25 // effectively infinity } alen, blen := seqs.lengths() g := &editGraph{ seqs: seqs, vf: newtriang(limit), vb: newtriang(limit), limit: limit, ux: alen, uy: blen, delta: alen - blen, } lcs := algo(g) diffs := lcs.toDiffs(alen, blen) return diffs, lcs } // editGraph carries the information for computing the lcs of two sequences. type editGraph struct { seqs sequences vf, vb label // forward and backward labels limit int // maximal value of D // the bounding rectangle of the current edit graph lx, ly, ux, uy int delta int // common subexpression: (ux-lx)-(uy-ly) } // toDiffs converts an LCS to a list of edits. func (lcs lcs) toDiffs(alen, blen int) []Diff { var diffs []Diff var pa, pb int // offsets in a, b for _, l := range lcs { if pa < l.X || pb < l.Y { diffs = append(diffs, Diff{pa, l.X, pb, l.Y}) } pa = l.X + l.Len pb = l.Y + l.Len } if pa < alen || pb < blen { diffs = append(diffs, Diff{pa, alen, pb, blen}) } return diffs } // --- FORWARD --- // fdone decides if the forward path has reached the upper right // corner of the rectangle. If so, it also returns the computed lcs. func (e *editGraph) fdone(D, k int) (bool, lcs) { // x, y, k are relative to the rectangle x := e.vf.get(D, k) y := x - k if x == e.ux && y == e.uy { return true, e.forwardlcs(D, k) } return false, nil } // run the forward algorithm, until success or up to the limit on D. func forward(e *editGraph) lcs { e.setForward(0, 0, e.lx) if ok, ans := e.fdone(0, 0); ok { return ans } // from D to D+1 for D := range e.limit { e.setForward(D+1, -(D + 1), e.getForward(D, -D)) if ok, ans := e.fdone(D+1, -(D + 1)); ok { return ans } e.setForward(D+1, D+1, e.getForward(D, D)+1) if ok, ans := e.fdone(D+1, D+1); ok { return ans } for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get backwards lookv := e.lookForward(k, e.getForward(D, k-1)+1) lookh := e.lookForward(k, e.getForward(D, k+1)) if lookv > lookh { e.setForward(D+1, k, lookv) } else { e.setForward(D+1, k, lookh) } if ok, ans := e.fdone(D+1, k); ok { return ans } } } // D is too large // find the D path with maximal x+y inside the rectangle and // use that to compute the found part of the lcs kmax := -e.limit - 1 diagmax := -1 for k := -e.limit; k <= e.limit; k += 2 { x := e.getForward(e.limit, k) y := x - k if x+y > diagmax && x <= e.ux && y <= e.uy { diagmax, kmax = x+y, k } } return e.forwardlcs(e.limit, kmax) } // recover the lcs by backtracking from the farthest point reached func (e *editGraph) forwardlcs(D, k int) lcs { var ans lcs for x := e.getForward(D, k); x != 0 || x-k != 0; { if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) { // if (x-1,y) is labelled D-1, x--,D--,k--,continue D, k, x = D-1, k-1, x-1 continue } else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) { // if (x,y-1) is labelled D-1, x, D--,k++, continue D, k = D-1, k+1 continue } // if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue y := x - k ans = ans.prepend(x+e.lx-1, y+e.ly-1) x-- } return ans } // start at (x,y), go up the diagonal as far as possible, // and label the result with d func (e *editGraph) lookForward(k, relx int) int { rely := relx - k x, y := relx+e.lx, rely+e.ly if x < e.ux && y < e.uy { x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy) } return x } func (e *editGraph) setForward(d, k, relx int) { x := e.lookForward(k, relx) e.vf.set(d, k, x-e.lx) } func (e *editGraph) getForward(d, k int) int { x := e.vf.get(d, k) return x } // --- BACKWARD --- // bdone decides if the backward path has reached the lower left corner func (e *editGraph) bdone(D, k int) (bool, lcs) { // x, y, k are relative to the rectangle x := e.vb.get(D, k) y := x - (k + e.delta) if x == 0 && y == 0 { return true, e.backwardlcs(D, k) } return false, nil } // run the backward algorithm, until success or up to the limit on D. // (used only by tests) func backward(e *editGraph) lcs { e.setBackward(0, 0, e.ux) if ok, ans := e.bdone(0, 0); ok { return ans } // from D to D+1 for D := range e.limit { e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) if ok, ans := e.bdone(D+1, -(D + 1)); ok { return ans } e.setBackward(D+1, D+1, e.getBackward(D, D)) if ok, ans := e.bdone(D+1, D+1); ok { return ans } for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get wrong lookv := e.lookBackward(k, e.getBackward(D, k-1)) lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) if lookv < lookh { e.setBackward(D+1, k, lookv) } else { e.setBackward(D+1, k, lookh) } if ok, ans := e.bdone(D+1, k); ok { return ans } } } // D is too large // find the D path with minimal x+y inside the rectangle and // use that to compute the part of the lcs found kmax := -e.limit - 1 diagmin := 1 << 25 for k := -e.limit; k <= e.limit; k += 2 { x := e.getBackward(e.limit, k) y := x - (k + e.delta) if x+y < diagmin && x >= 0 && y >= 0 { diagmin, kmax = x+y, k } } if kmax < -e.limit { panic(fmt.Sprintf("no paths when limit=%d?", e.limit)) } return e.backwardlcs(e.limit, kmax) } // recover the lcs by backtracking func (e *editGraph) backwardlcs(D, k int) lcs { var ans lcs for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; { if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) { // D--, k--, x unchanged D, k = D-1, k-1 continue } else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) { // D--, k++, x++ D, k, x = D-1, k+1, x+1 continue } y := x - (k + e.delta) ans = ans.append(x+e.lx, y+e.ly) x++ } return ans } // start at (x,y), go down the diagonal as far as possible, func (e *editGraph) lookBackward(k, relx int) int { rely := relx - (k + e.delta) // forward k = k + e.delta x, y := relx+e.lx, rely+e.ly if x > 0 && y > 0 { x -= e.seqs.commonSuffixLen(0, x, 0, y) } return x } // convert to rectangle, and label the result with d func (e *editGraph) setBackward(d, k, relx int) { x := e.lookBackward(k, relx) e.vb.set(d, k, x-e.lx) } func (e *editGraph) getBackward(d, k int) int { x := e.vb.get(d, k) return x } // -- TWOSIDED --- func twosided(e *editGraph) lcs { // The termination condition could be improved, as either the forward // or backward pass could succeed before Myers' Lemma applies. // Aside from questions of efficiency (is the extra testing cost-effective) // this is more likely to matter when e.limit is reached. e.setForward(0, 0, e.lx) e.setBackward(0, 0, e.ux) // from D to D+1 for D := range e.limit { // just finished a backwards pass, so check if got, ok := e.twoDone(D, D); ok { return e.twolcs(D, D, got) } // do a forwards pass (D to D+1) e.setForward(D+1, -(D + 1), e.getForward(D, -D)) e.setForward(D+1, D+1, e.getForward(D, D)+1) for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get backwards lookv := e.lookForward(k, e.getForward(D, k-1)+1) lookh := e.lookForward(k, e.getForward(D, k+1)) if lookv > lookh { e.setForward(D+1, k, lookv) } else { e.setForward(D+1, k, lookh) } } // just did a forward pass, so check if got, ok := e.twoDone(D+1, D); ok { return e.twolcs(D+1, D, got) } // do a backward pass, D to D+1 e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) e.setBackward(D+1, D+1, e.getBackward(D, D)) for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get wrong lookv := e.lookBackward(k, e.getBackward(D, k-1)) lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) if lookv < lookh { e.setBackward(D+1, k, lookv) } else { e.setBackward(D+1, k, lookh) } } } // D too large. combine a forward and backward partial lcs // first, a forward one kmax := -e.limit - 1 diagmax := -1 for k := -e.limit; k <= e.limit; k += 2 { x := e.getForward(e.limit, k) y := x - k if x+y > diagmax && x <= e.ux && y <= e.uy { diagmax, kmax = x+y, k } } if kmax < -e.limit { panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit)) } lcs := e.forwardlcs(e.limit, kmax) // now a backward one // find the D path with minimal x+y inside the rectangle and // use that to compute the lcs diagmin := 1 << 25 // infinity for k := -e.limit; k <= e.limit; k += 2 { x := e.getBackward(e.limit, k) y := x - (k + e.delta) if x+y < diagmin && x >= 0 && y >= 0 { diagmin, kmax = x+y, k } } if kmax < -e.limit { panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit)) } lcs = append(lcs, e.backwardlcs(e.limit, kmax)...) // These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs) ans := lcs.fix() return ans } // Does Myers' Lemma apply? func (e *editGraph) twoDone(df, db int) (int, bool) { if (df+db+e.delta)%2 != 0 { return 0, false // diagonals cannot overlap } kmin := max(-df, -db+e.delta) kmax := min(df, db+e.delta) for k := kmin; k <= kmax; k += 2 { x := e.vf.get(df, k) u := e.vb.get(db, k-e.delta) if u <= x { // is it worth looking at all the other k? for l := k; l <= kmax; l += 2 { x := e.vf.get(df, l) y := x - l u := e.vb.get(db, l-e.delta) v := u - l if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux { return l, true } } return k, true } } return 0, false } func (e *editGraph) twolcs(df, db, kf int) lcs { // db==df || db+1==df x := e.vf.get(df, kf) y := x - kf kb := kf - e.delta u := e.vb.get(db, kb) v := u - kf // Myers proved there is a df-path from (0,0) to (u,v) // and a db-path from (x,y) to (N,M). // In the first case the overall path is the forward path // to (u,v) followed by the backward path to (N,M). // In the second case the path is the backward path to (x,y) // followed by the forward path to (x,y) from (0,0). // Look for some special cases to avoid computing either of these paths. if x == u { // "babaab" "cccaba" // already patched together lcs := e.forwardlcs(df, kf) lcs = append(lcs, e.backwardlcs(db, kb)...) return lcs.sort() } // is (u-1,v) or (u,v-1) labelled df-1? // if so, that forward df-1-path plus a horizontal or vertical edge // is the df-path to (u,v), then plus the db-path to (N,M) if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 { // "aabbab" "cbcabc" lcs := e.forwardlcs(df-1, u-1-v) lcs = append(lcs, e.backwardlcs(db, kb)...) return lcs.sort() } if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u { // "abaabb" "bcacab" lcs := e.forwardlcs(df-1, u-(v-1)) lcs = append(lcs, e.backwardlcs(db, kb)...) return lcs.sort() } // The path can't possibly contribute to the lcs because it // is all horizontal or vertical edges if u == 0 || v == 0 || x == e.ux || y == e.uy { // "abaabb" "abaaaa" if u == 0 || v == 0 { return e.backwardlcs(db, kb) } return e.forwardlcs(df, kf) } // is (x+1,y) or (x,y+1) labelled db-1? if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 { // "bababb" "baaabb" lcs := e.backwardlcs(db-1, kb+1) lcs = append(lcs, e.forwardlcs(df, kf)...) return lcs.sort() } if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x { // "abbbaa" "cabacc" lcs := e.backwardlcs(db-1, kb-1) lcs = append(lcs, e.forwardlcs(df, kf)...) return lcs.sort() } // need to compute another path // "aabbaa" "aacaba" lcs := e.backwardlcs(db, kb) oldx, oldy := e.ux, e.uy e.ux = u e.uy = v lcs = append(lcs, forward(e)...) e.ux, e.uy = oldx, oldy return lcs.sort() } ================================================ FILE: modules/diferenco/lcs/old_test.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "fmt" "log" "math/rand/v2" "os" "strings" "testing" ) func TestAlgosOld(t *testing.T) { for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} { t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) { for _, tx := range Btests { lim := len(tx.a) + len(tx.b) diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim) check(t, tx.a, lcs, tx.lcs) checkDiffs(t, tx.a, diffs, tx.b) diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim) check(t, tx.b, lcs, tx.lcs) checkDiffs(t, tx.b, diffs, tx.a) } }) } } func TestIntOld(t *testing.T) { // need to avoid any characters in btests lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB" for _, tx := range Btests { if len(tx.a) < 2 || len(tx.b) < 2 { continue } left := tx.a + lfill right := tx.b + rfill lim := len(tx.a) + len(tx.b) diffs, lcs := compute(stringSeqs{left, right}, twosided, lim) check(t, left, lcs, tx.lcs) checkDiffs(t, left, diffs, right) diffs, lcs = compute(stringSeqs{right, left}, twosided, lim) check(t, right, lcs, tx.lcs) checkDiffs(t, right, diffs, left) left = lfill + tx.a right = rfill + tx.b diffs, lcs = compute(stringSeqs{left, right}, twosided, lim) check(t, left, lcs, tx.lcs) checkDiffs(t, left, diffs, right) diffs, lcs = compute(stringSeqs{right, left}, twosided, lim) check(t, right, lcs, tx.lcs) checkDiffs(t, right, diffs, left) } } func TestSpecialOld(t *testing.T) { // exercises lcs.fix a := "golang.org/x/tools/intern" b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern" diffs, lcs := compute(stringSeqs{a, b}, twosided, 4) if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } } func TestRegressionOld001(t *testing.T) { a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" for i := 1; i < len(b); i++ { diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } checkDiffs(t, a, diffs, b) } } func TestRegressionOld002(t *testing.T) { a := "n\"\n)\n" b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" for i := 1; i <= len(b); i++ { diffs, lcs := compute(stringSeqs{a, b}, twosided, i) if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } checkDiffs(t, a, diffs, b) } } func TestRegressionOld003(t *testing.T) { a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1" b := "golang.org/x/hello v1" for i := 1; i <= len(a); i++ { diffs, lcs := compute(stringSeqs{a, b}, twosided, i) if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } checkDiffs(t, a, diffs, b) } } func TestRandOld(t *testing.T) { for i := range 1000 { // TODO(adonovan): use ASCII and bytesSeqs here? The use of // non-ASCII isn't relevant to the property exercised by the test. a := []rune(randstr("abω", 16)) b := []rune(randstr("abωc", 16)) seq := runesSeqs{a, b} const lim = 24 // large enough to get true lcs _, forw := compute(seq, forward, lim) _, back := compute(seq, backward, lim) _, two := compute(seq, twosided, lim) if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) { t.Logf("\n%v\n%v\n%v", forw, back, two) t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two)) } if !two.valid() || !forw.valid() || !back.valid() { t.Errorf("check failure") } } } // TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes}) // to ensure at least minimal parity of the three representations. func TestDiffAPI(t *testing.T) { for _, test := range []struct { a, b string wantStrings, wantBytes, wantRunes string }{ {"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII {"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII } { gotStrings := fmt.Sprint(DiffStrings(test.a, test.b)) if gotStrings != test.wantStrings { t.Errorf("DiffStrings(%q, %q) = %v, want %v", test.a, test.b, gotStrings, test.wantStrings) } gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b))) if gotBytes != test.wantBytes { t.Errorf("DiffBytes(%q, %q) = %v, want %v", test.a, test.b, gotBytes, test.wantBytes) } gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b))) if gotRunes != test.wantRunes { t.Errorf("DiffRunes(%q, %q) = %v, want %v", test.a, test.b, gotRunes, test.wantRunes) } } } func BenchmarkTwoOld(b *testing.B) { tests := genBench("abc", 96) for range b.N { for _, tt := range tests { _, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100) if !two.valid() { b.Error("check failed") } } } } func BenchmarkForwOld(b *testing.B) { tests := genBench("abc", 96) for range b.N { for _, tt := range tests { _, two := compute(stringSeqs{tt.before, tt.after}, forward, 100) if !two.valid() { b.Error("check failed") } } } } func genBench(set string, n int) []struct{ before, after string } { // before and after for benchmarks. 24 strings of length n with // before and after differing at least once, and about 5% var ans []struct{ before, after string } for range 24 { // maybe b should have an approximately known number of diffs a := randstr(set, n) cnt := 0 bb := make([]rune, 0, n) for _, r := range a { if rand.Float64() < .05 { cnt++ r = 'N' } bb = append(bb, r) } if cnt == 0 { // avoid == shortcut bb[n/2] = 'N' } ans = append(ans, struct{ before, after string }{a, string(bb)}) } return ans } // This benchmark represents a common case for a diff command: // large file with a single relatively small diff in the middle. // (It's not clear whether this is representative of gopls workloads // or whether it is important to gopls diff performance.) // // TODO(adonovan) opt: it could be much faster. For example, // comparing a file against itself is about 10x faster than with the // small deletion in the middle. Strangely, comparing a file against // itself minus the last byte is faster still; I don't know why. // There is much low-hanging fruit here for further improvement. func BenchmarkLargeFileSmallDiff(b *testing.B) { data, err := os.ReadFile("old.go") // large file if err != nil { log.Fatal(err) } n := len(data) src := string(data) dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle b.Run("string", func(b *testing.B) { for range b.N { compute(stringSeqs{src, dst}, twosided, len(src)+len(dst)) } }) srcBytes := []byte(src) dstBytes := []byte(dst) b.Run("bytes", func(b *testing.B) { for range b.N { compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes)) } }) srcRunes := []rune(src) dstRunes := []rune(dst) b.Run("runes", func(b *testing.B) { for range b.N { compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes)) } }) } ================================================ FILE: modules/diferenco/lcs/sequence.go ================================================ // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs // This file defines the abstract sequence over which the LCS algorithm operates. // sequences abstracts a pair of sequences, A and B. type sequences interface { lengths() (int, int) // len(A), len(B) commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj])) commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj])) } type stringSeqs struct{ a, b string } func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) } func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj]) } func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj]) } // The explicit capacity in s[i:j:j] leads to more efficient code. type bytesSeqs struct{ a, b []byte } func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) } func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } type runesSeqs struct{ a, b []rune } func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) } func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } // TODO(adonovan): optimize these functions using ideas from: // - https://go.dev/cl/408116 common.go // - https://go.dev/cl/421435 xor_generic.go // TODO(adonovan): factor using generics when available, // but measure performance impact. // commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj]. func commonPrefixLenBytes(a, b []byte) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } func commonPrefixLenRunes(a, b []rune) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } func commonPrefixLenString(a, b string) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } // commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj]. func commonSuffixLenBytes(a, b []byte) int { n := min(len(a), len(b)) i := 0 for i < n && a[len(a)-1-i] == b[len(b)-1-i] { i++ } return i } func commonSuffixLenRunes(a, b []rune) int { n := min(len(a), len(b)) i := 0 for i < n && a[len(a)-1-i] == b[len(b)-1-i] { i++ } return i } func commonSuffixLenString(a, b string) int { n := min(len(a), len(b)) i := 0 for i < n && a[len(a)-1-i] == b[len(b)-1-i] { i++ } return i } type comparableSeqs[E comparable] struct{ a, b []E } // commonPrefixLength returns the length of the common prefix of two T slices. func commonPrefixLength[E comparable](a, b []E) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } // commonSuffixLength returns the length of the common suffix of two rune slices. func commonSuffixLength[E comparable](a, b []E) int { i1, i2 := len(a), len(b) n := min(i1, i2) i := 0 for i < n && a[i1-1-i] == b[i2-1-i] { i++ } return i } func (s comparableSeqs[E]) lengths() (int, int) { return len(s.a), len(s.b) } func (s comparableSeqs[E]) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLength(s.a[ai:aj:aj], s.b[bi:bj:bj]) } func (s comparableSeqs[E]) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLength(s.a[ai:aj:aj], s.b[bi:bj:bj]) } ================================================ FILE: modules/diferenco/merge.go ================================================ /* Copyright (c) 2024 epic labs Package diff3 implements a three-way merge algorithm Original version in Javascript by Bryan Housel @bhousel: https://github.com/bhousel/node-diff3, which in turn is based on project Synchrotron, created by Tony Garnock-Jones. For more detail please visit: http://homepages.kcbbs.gen.nz/tonyg/projects/synchrotron.html https://github.com/tonyg/synchrotron Ported to go by Javier Peletier @jpeletier SOURCE: https://github.com/epiclabs-io/diff3 SPDX-License-Identifier: MIT */ package diferenco import ( "cmp" "context" "errors" "fmt" "io" "slices" "strings" ) // https://blog.jcoglan.com/2017/05/08/merging-with-diff3/ // Alice Original Bob // // 1. celery 1. celery 1. celery // 2. salmon 2. garlic 2. salmon // 3. tomatoes 3. onions 3. garlic // 4. garlic 4. salmon 4. onions // 5. onions 5. tomatoes 5. tomatoes // 6. wine 6. wine 6. wine // Alice Original Bob // // 1. celery 1. celery 1. celery A // ----------------------------------------------------------- // 2. garlic 2. salmon B // 2. salmon 3. onions 3. garlic // 4. salmon 4. onions // ----------------------------------------------------------- // 3. tomatoes 5. tomatoes 5. tomatoes C // ----------------------------------------------------------- // 4. garlic D // 5. onions // ----------------------------------------------------------- // 6. wine 6. wine 6. wine E // celery // <<<<<<< Alice // salmon // ======= // salmon // garlic // onions // >>>>>>> Bob // tomatoes // garlic // onions // wine const ( // Sep1 signifies the start of a conflict. Sep1 = "<<<<<<<" // Sep2 signifies the middle of a conflict. Sep2 = "=======" // Sep3 signifies the end of a conflict. Sep3 = ">>>>>>>" // SepO origin content SepO = "|||||||" ) type hunk [5]int // Given three files, A, O, and B, where both A and B are // independently derived from O, returns a fairly complicated // internal representation of merge decisions it's taken. The // interested reader may wish to consult // // Sanjeev Khanna, Keshav Kunal, and Benjamin C. Pierce. // 'A Formal Investigation of ' In Arvind and Prasad, // editors, Foundations of Software Technology and Theoretical // Computer Science (FSTTCS), December 2007. // // (http://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf) func diff3MergeIndices[E comparable](ctx context.Context, o, a, b []E, algo Algorithm) ([][]int, error) { m1, err := DiffSlices(ctx, o, a, algo) if err != nil { return nil, err } m2, err := DiffSlices(ctx, o, b, algo) if err != nil { return nil, err } hunks := make([]hunk, 0, len(m1)+len(m2)) for i := range m1 { hunks = append(hunks, hunk{m1[i].P1, 0, m1[i].Del, m1[i].P2, m1[i].Ins}) } for i := range m2 { hunks = append(hunks, hunk{m2[i].P1, 2, m2[i].Del, m2[i].P2, m2[i].Ins}) } slices.SortFunc(hunks, func(a, b hunk) int { return cmp.Compare(a[0], b[0]) }) var result [][]int var commonOffset = 0 copyCommon := func(targetOffset int) { if targetOffset > commonOffset { result = append(result, []int{1, commonOffset, targetOffset - commonOffset}) commonOffset = targetOffset } } for hunkIndex := 0; hunkIndex < len(hunks); hunkIndex++ { firstHunkIndex := hunkIndex hunk := hunks[hunkIndex] regionLhs := hunk[0] regionRhs := regionLhs + hunk[2] for hunkIndex < len(hunks)-1 { maybeOverlapping := hunks[hunkIndex+1] maybeLhs := maybeOverlapping[0] if maybeLhs > regionRhs { break } regionRhs = max(regionRhs, maybeLhs+maybeOverlapping[2]) hunkIndex++ } copyCommon(regionLhs) if firstHunkIndex == hunkIndex { // The 'overlap' was only one hunk long, meaning that // there's no conflict here. Either a and o were the // same, or b and o were the same. if hunk[4] > 0 { result = append(result, []int{hunk[1], hunk[3], hunk[4]}) } } else { // A proper conflict. Determine the extents of the // regions involved from a, o and b. Effectively merge // all the hunks on the left into one giant hunk, and // do the same for the right; then, correct for skew // in the regions of o that each side changed, and // report appropriate spans for the three sides. regions := [][]int{{len(a), -1, len(o), -1}, nil, {len(b), -1, len(o), -1}} for i := firstHunkIndex; i <= hunkIndex; i++ { hunk = hunks[i] side := hunk[1] r := regions[side] oLhs := hunk[0] oRhs := oLhs + hunk[2] abLhs := hunk[3] abRhs := abLhs + hunk[4] r[0] = min(abLhs, r[0]) r[1] = max(abRhs, r[1]) r[2] = min(oLhs, r[2]) r[3] = max(oRhs, r[3]) } aLhs := regions[0][0] + (regionLhs - regions[0][2]) aRhs := regions[0][1] + (regionRhs - regions[0][3]) bLhs := regions[2][0] + (regionLhs - regions[2][2]) bRhs := regions[2][1] + (regionRhs - regions[2][3]) result = append(result, []int{-1, aLhs, aRhs - aLhs, regionLhs, regionRhs - regionLhs, bLhs, bRhs - bLhs}) } commonOffset = regionRhs } copyCommon(len(o)) return result, nil } // conflict describes a merge conflict type conflict[E comparable] struct { a []E aIndex int o []E oIndex int b []E bIndex int } // Diff3MergeResult describes a merge result type Diff3MergeResult[E comparable] struct { ok []E conflict *conflict[E] } // Diff3Merge applies the output of diff3MergeIndices to actually // construct the merged file; the returned result alternates // between 'ok' and 'conflict' blocks. func Diff3Merge[E comparable](ctx context.Context, o, a, b []E, algo Algorithm, excludeFalseConflicts bool) ([]*Diff3MergeResult[E], error) { var result []*Diff3MergeResult[E] files := [][]E{a, o, b} indices, err := diff3MergeIndices(ctx, o, a, b, algo) if err != nil { return nil, err } var okLines []E flushOk := func() { if len(okLines) != 0 { result = append(result, &Diff3MergeResult[E]{ok: okLines}) } okLines = nil } pushOk := func(xs []E) { okLines = append(okLines, xs...) } isTrueConflict := func(rec []int) bool { if rec[2] != rec[6] { return true } var aoff = rec[1] var boff = rec[5] for j := range rec[2] { if a[j+aoff] != b[j+boff] { return true } } return false } for i := range indices { var x = indices[i] var side = x[0] if side == -1 { if excludeFalseConflicts && !isTrueConflict(x) { pushOk(files[0][x[1] : x[1]+x[2]]) } else { flushOk() result = append(result, &Diff3MergeResult[E]{ conflict: &conflict[E]{ a: a[x[1] : x[1]+x[2]], aIndex: x[1], o: o[x[3] : x[3]+x[4]], oIndex: x[3], b: b[x[5] : x[5]+x[6]], bIndex: x[5], }, }) } } else { pushOk(files[side][x[1] : x[1]+x[2]]) } } flushOk() return result, nil } const ( // Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, // hiding the base version entirely. // // ```text // line1-changed-by-both // <<<<<<< local // line2-to-be-changed-in-incoming // ======= // line2-changed // >>>>>>> incoming // ``` STYLE_DEFAULT = iota // Show non-minimized hunks of local changes, the base, and the incoming (other) changes. // // This mode does not hide any information. // // ```text // <<<<<<< local // line1-changed-by-both // line2-to-be-changed-in-incoming // ||||||| 9a8d80c // line1-to-be-changed-by-both // line2-to-be-changed-in-incoming // ======= // line1-changed-by-both // line2-changed // >>>>>>> incoming // ``` STYLE_DIFF3 // Like diff3, but will show *minimized* hunks of local change and the incoming (other) changes, // as well as non-minimized hunks of the base. // // ```text // line1-changed-by-both // <<<<<<< local // line2-to-be-changed-in-incoming // ||||||| 9a8d80c // line1-to-be-changed-by-both // line2-to-be-changed-in-incoming // ======= // line2-changed // >>>>>>> incoming // ``` STYLE_ZEALOUS_DIFF3 ) var ( styles = map[string]int{ "merge": STYLE_DEFAULT, "diff3": STYLE_DIFF3, "zdiff3": STYLE_ZEALOUS_DIFF3, } ) func ParseConflictStyle(s string) int { if s, ok := styles[strings.ToLower(s)]; ok { return s } return STYLE_DEFAULT } type MergeOptions struct { TextO, TextA, TextB string RO, R1, R2 io.Reader // when if set LabelO, LabelA, LabelB string A Algorithm Style int // Conflict Style } func (opts *MergeOptions) ValidateOptions() error { if opts == nil { return errors.New("invalid merge options") } if opts.A == Unspecified { opts.A = Histogram } if len(opts.LabelO) != 0 && !strings.HasPrefix(opts.LabelO, " ") { opts.LabelO = " " + opts.LabelO } if len(opts.LabelA) != 0 && !strings.HasPrefix(opts.LabelA, " ") { opts.LabelA = " " + opts.LabelA } if len(opts.LabelB) != 0 && !strings.HasPrefix(opts.LabelB, " ") { opts.LabelB = " " + opts.LabelB } return nil } func (s *Sink) writeConflict(out io.Writer, opts *MergeOptions, conflict *conflict[int]) { if opts.Style == STYLE_DIFF3 { _, _ = fmt.Fprintf(out, "%s%s\n", Sep1, opts.LabelA) s.WriteLine(out, conflict.a...) _, _ = fmt.Fprintf(out, "%s%s\n", SepO, opts.LabelO) s.WriteLine(out, conflict.o...) _, _ = fmt.Fprintf(out, "%s\n", Sep2) s.WriteLine(out, conflict.b...) _, _ = fmt.Fprintf(out, "%s%s\n", Sep3, opts.LabelB) return } a, b := conflict.a, conflict.b prefix := commonPrefixLength(a, b) s.WriteLine(out, a[:prefix]...) a = a[prefix:] b = b[prefix:] suffix := commonSuffixLength(a, b) _, _ = fmt.Fprintf(out, "%s%s\n", Sep1, opts.LabelA) s.WriteLine(out, a[:len(a)-suffix]...) if opts.Style == STYLE_ZEALOUS_DIFF3 { // Zealous Diff3 _, _ = fmt.Fprintf(out, "%s%s\n", SepO, opts.LabelO) s.WriteLine(out, conflict.o...) } _, _ = fmt.Fprintf(out, "%s\n", Sep2) s.WriteLine(out, b[:len(b)-suffix]...) _, _ = fmt.Fprintf(out, "%s%s\n", Sep3, opts.LabelB) // Note: Through normal Merge/MergeParallel paths, suffix is always 0 because // the diff3 algorithms already separate common suffix into its own "ok" block. // This branch is kept as defensive code but should never execute in production. if suffix != 0 { s.WriteLine(out, b[len(b)-suffix:]...) } } // Merge implements the diff3 algorithm to merge two texts into a common base. // // Support multiple diff algorithms and multiple conflict styles func Merge(ctx context.Context, opts *MergeOptions) (string, bool, error) { if err := opts.ValidateOptions(); err != nil { return "", false, err } select { case <-ctx.Done(): return "", false, ctx.Err() default: } s := NewSink(NEWLINE_RAW) slicesO, err := s.parseLines(opts.RO, opts.TextO) if err != nil { return "", false, err } slicesA, err := s.parseLines(opts.R1, opts.TextA) if err != nil { return "", false, err } slicesB, err := s.parseLines(opts.R2, opts.TextB) if err != nil { return "", false, err } regions, err := Diff3Merge(ctx, slicesO, slicesA, slicesB, opts.A, true) if err != nil { return "", false, err } out := &strings.Builder{} out.Grow(max(len(opts.TextO), len(opts.TextA), len(opts.TextB))) var conflicts = false for _, r := range regions { if r.ok != nil { s.WriteLine(out, r.ok...) continue } if r.conflict != nil { conflicts = true s.writeConflict(out, opts, r.conflict) } } return out.String(), conflicts, nil } // DefaultMerge implements the diff3 algorithm to merge two texts into a common base. func DefaultMerge(ctx context.Context, o, a, b string, labelO, labelA, labelB string) (string, bool, error) { return Merge(ctx, &MergeOptions{TextO: o, TextA: a, TextB: b, LabelO: labelO, LabelA: labelA, LabelB: labelB, A: Histogram}) } func HasConflict(ctx context.Context, textO, textA, textB string) (bool, error) { s := NewSink(NEWLINE_RAW) slicesO := s.SplitLines(textO) slicesA := s.SplitLines(textA) slicesB := s.SplitLines(textB) regions, err := Diff3Merge(ctx, slicesO, slicesA, slicesB, Histogram, true) if err != nil { return false, err } return slices.ContainsFunc(regions, func(result *Diff3MergeResult[int]) bool { return result.conflict != nil }), nil } ================================================ FILE: modules/diferenco/merge_parallel.go ================================================ // Package diferenco provides diff and merge functionality. // // This file (merge_parallel.go) contains the MergeParallel and HasConflictParallel implementations. // These functions were generated by GLM-5 (Zhipu AI) and provide a modern three-way merge // implementation based on the Diff3 paper algorithm. // // Key features: // - Cleaner separation of concerns with dedicated merge regions // - Support for multiple conflict styles (Default, Diff3, Zealous Diff3) // - Efficient conflict detection with HasConflictParallel // - Parallel diff computation for better performance on large inputs // - Consistent behavior with the classic Merge implementation package diferenco import ( "cmp" "context" "io" "slices" "strings" "golang.org/x/sync/errgroup" ) // HasConflictParallel checks if there are any conflicts when merging three texts. // It uses the same logic as MergeParallel but only checks for conflicts without // generating the merged result, making it more efficient for conflict detection. func HasConflictParallel(ctx context.Context, textO, textA, textB string) (bool, error) { sink := NewSink(NEWLINE_RAW) // Parse the texts into indices oIdx, err := sink.parseLines(nil, textO) if err != nil { return false, err } aIdx, err := sink.parseLines(nil, textA) if err != nil { return false, err } bIdx, err := sink.parseLines(nil, textB) if err != nil { return false, err } // Step 1: Calculate diffs in parallel for better performance changesA, changesB, err := parallelDiff(ctx, oIdx, aIdx, bIdx, Histogram) if err != nil { return false, err } // Step 2: Find merge regions and check for conflicts regions := findMergeRegions(changesA, changesB) // Step 3: Finalize regions (check for false conflicts) for i := range regions { regions[i] = finalizeRegion(regions[i], changesA, changesB, aIdx, bIdx) } // Step 4: Check if any region has a conflict return slices.ContainsFunc(regions, func(r mergeRegion) bool { return r.isConflict }), nil } // MergeParallel performs a three-way merge based on Diff3 paper principles. // It uses a cleaner, more modern Go 1.26+ implementation with parallel diff computation. func MergeParallel(ctx context.Context, opts *MergeOptions) (string, bool, error) { if err := opts.ValidateOptions(); err != nil { return "", false, err } sink := NewSink(NEWLINE_RAW) oIdx, err := sink.parseLines(opts.RO, opts.TextO) if err != nil { return "", false, err } aIdx, err := sink.parseLines(opts.R1, opts.TextA) if err != nil { return "", false, err } bIdx, err := sink.parseLines(opts.R2, opts.TextB) if err != nil { return "", false, err } var builder strings.Builder result, err := newMergeInternal(ctx, sink, &builder, oIdx, aIdx, bIdx, opts) if err != nil { return "", false, err } return builder.String(), result.hasConflict, nil } // newMergeResult contains the merge result type newMergeResult struct { hasConflict bool } // newMergeInternal performs the core three-way merge logic func newMergeInternal( ctx context.Context, sink *Sink, out io.Writer, oIdx, aIdx, bIdx []int, opts *MergeOptions, ) (*newMergeResult, error) { result := &newMergeResult{} // Step 1: Calculate diffs in parallel for better performance changesA, changesB, err := parallelDiff(ctx, oIdx, aIdx, bIdx, opts.A) if err != nil { return nil, err } // Step 2: Find merge regions (groups of overlapping changes) regions := findMergeRegions(changesA, changesB) // Step 3: Finalize regions (check for false conflicts) for i := range regions { regions[i] = finalizeRegion(regions[i], changesA, changesB, aIdx, bIdx) } // Step 4: Process each region pos := 0 for _, region := range regions { // Write unchanged content before this region if pos < region.start { writeOriginLines(sink, out, oIdx, pos, region.start) } // Process the region if region.isConflict { result.hasConflict = true writeConflictRegion(sink, out, oIdx, aIdx, bIdx, region, opts, changesA, changesB) } else { writeNonConflictRegion(sink, out, aIdx, bIdx, region, changesA, changesB) } pos = region.end } // Write remaining unchanged content if pos < len(oIdx) { writeOriginLines(sink, out, oIdx, pos, len(oIdx)) } return result, nil } // mergeRegion represents a group of changes that overlap in the original. // Optimized: stores actual indices to avoid range compression bug. // Memory layout optimized to reduce padding (64 bytes instead of 72 bytes). type mergeRegion struct { start, end int // Range in O (original) // Actual indices into changesA/changesB that belong to this region // This avoids the "range compression" bug where min/max indices // might include changes that don't actually belong to this region changesAIndices []int changesBIndices []int isConflict bool } // findMergeRegions groups overlapping changes into regions. // This version stores actual indices to avoid the "range compression" bug. // Note: sink, aIdx, bIdx are only used by finalizeRegion for false conflict detection. func findMergeRegions(changesA, changesB []Change) []mergeRegion { totalChanges := len(changesA) + len(changesB) if totalChanges == 0 { return nil } // Pre-allocate regions with estimated capacity // Use totalChanges/2 + 1 as many changes will merge into regions regions := make([]mergeRegion, 0, totalChanges/2+1) // Use a more compact representation for sorting with index tracking type indexedChange struct { change Change side int // 0 = A, 1 = B index int // Original index in changesA or changesB } allChanges := make([]indexedChange, 0, totalChanges) for i, ch := range changesA { allChanges = append(allChanges, indexedChange{ch, 0, i}) } for i, ch := range changesB { allChanges = append(allChanges, indexedChange{ch, 1, i}) } // Sort by position in O using cmp.Compare (Go 1.21+) slices.SortFunc(allChanges, func(a, b indexedChange) int { return cmp.Compare(a.change.P1, b.change.P1) }) // Group overlapping changes, storing actual indices // Pre-allocate to reduce reallocations currentAIndices := make([]int, 0, 4) currentBIndices := make([]int, 0, 4) regionStart := allChanges[0].change.P1 regionEnd := allChanges[0].change.P1 + allChanges[0].change.Del // Fix: Initialize first change correctly first := allChanges[0] if first.side == 0 { currentAIndices = append(currentAIndices, first.index) } else { currentBIndices = append(currentBIndices, first.index) } // Helper to finalize current region and append to regions finalizeCurrentRegion := func() { region := mergeRegion{ start: regionStart, end: regionEnd, changesAIndices: currentAIndices, changesBIndices: currentBIndices, } regions = append(regions, region) } // Process remaining changes (skip first, already processed) for _, item := range allChanges[1:] { changeEnd := item.change.P1 + item.change.Del // Check if this change overlaps with current region // Use < (not <=) because diff uses half-open intervals [P1, P1+Del) // Two changes [10,15) and [15,18) do NOT overlap // Exception: Pure insertions (Del=0) at the same position should overlap overlaps := item.change.P1 < regionEnd || (item.change.P1 == regionEnd && (item.change.Del == 0 || regionStart == regionEnd)) if overlaps { // Overlaps, extend region if needed if changeEnd > regionEnd { regionEnd = changeEnd } // Add index to appropriate list if item.side == 0 { currentAIndices = append(currentAIndices, item.index) } else { currentBIndices = append(currentBIndices, item.index) } } else { // No overlap, finalize current region finalizeCurrentRegion() // Start new region with this change // Note: We create new slices here because mergeRegion stores the slice reference regionStart = item.change.P1 regionEnd = changeEnd if item.side == 0 { currentAIndices = []int{item.index} currentBIndices = nil } else { currentAIndices = nil currentBIndices = []int{item.index} } } } // Add the last region finalizeCurrentRegion() return regions } // finalizeRegion determines if a region is a conflict func finalizeRegion(region mergeRegion, changesA, changesB []Change, aIdx, bIdx []int) mergeRegion { // Region is a conflict if both A and B have changes region.isConflict = len(region.changesAIndices) > 0 && len(region.changesBIndices) > 0 // Check for false conflict (same content on both sides) if region.isConflict && isFalseConflict(region, changesA, changesB, aIdx, bIdx) { region.isConflict = false } return region } // isFalseConflict checks if A and B made the same change func isFalseConflict(region mergeRegion, changesA, changesB []Change, aIdx, bIdx []int) bool { // Only single changes from each side can be false conflicts if len(region.changesAIndices) != 1 || len(region.changesBIndices) != 1 { return false } chA := changesA[region.changesAIndices[0]] chB := changesB[region.changesBIndices[0]] // Check if both delete the same range if chA.P1 != chB.P1 || chA.Del != chB.Del { return false } // Check if both insert the same content if chA.Ins != chB.Ins { return false } // Compare the inserted content by index (avoids string allocation) if chA.Ins > 0 { if !slices.Equal(aIdx[chA.P2:chA.P2+chA.Ins], bIdx[chB.P2:chB.P2+chB.Ins]) { return false } } // Same operation and same content return true } // writeOriginLines writes unchanged lines from O func writeOriginLines(sink *Sink, out io.Writer, oIdx []int, start, end int) { sink.WriteLine(out, oIdx[start:end]...) } // writeNonConflictRegion writes a region without conflicts func writeNonConflictRegion(sink *Sink, out io.Writer, aIdx, bIdx []int, region mergeRegion, changesA, changesB []Change) { // Prefer A's changes if available if len(region.changesAIndices) > 0 { for _, idx := range region.changesAIndices { ch := changesA[idx] if ch.Ins > 0 { sink.WriteLine(out, aIdx[ch.P2:ch.P2+ch.Ins]...) } } return } // Otherwise write B's changes for _, idx := range region.changesBIndices { ch := changesB[idx] if ch.Ins > 0 { sink.WriteLine(out, bIdx[ch.P2:ch.P2+ch.Ins]...) } } } // writeConflictRegion writes a region with conflicts // Optimized: avoids slice allocation by using calculateRangeByIndices func writeConflictRegion( sink *Sink, out io.Writer, oIdx, aIdx, bIdx []int, region mergeRegion, opts *MergeOptions, changesA, changesB []Change, ) { // Calculate A, O, B ranges for this region without allocating slices aLhs, aRhs := calculateRangeByIndices(changesA, region.changesAIndices, aIdx, region.start, region.end) oLhs, oRhs := region.start, region.end bLhs, bRhs := calculateRangeByIndices(changesB, region.changesBIndices, bIdx, region.start, region.end) conflict := &conflict[int]{ a: aIdx[aLhs:aRhs], o: oIdx[oLhs:oRhs], b: bIdx[bLhs:bRhs], } sink.writeConflict(out, opts, conflict) } // calculateRangeByIndices calculates the content range using indices into a changes slice. // This avoids allocating a slice of changes for each conflict region. // Parameters: // - changes: the slice of all changes (changesA or changesB) // - indices: indices into changes slice for the specific region // - lineIndex: the line index slice (aIdx or bIdx) for content lookup // - regionStart, regionEnd: the region range in O func calculateRangeByIndices(changes []Change, indices []int, lineIndex []int, regionStart, regionEnd int) (lhs, rhs int) { if len(indices) == 0 { return regionStart, regionEnd } // Initialize with extreme values to find min/max abLhs := len(lineIndex) abRhs := -1 oLhs := regionEnd oRhs := regionStart for _, i := range indices { ch := changes[i] // Track origin range (oLhs, oRhs) if ch.P1 < oLhs { oLhs = ch.P1 } originEnd := ch.P1 + ch.Del if originEnd > oRhs { oRhs = originEnd } // Track content range (abLhs, abRhs) if ch.P2 < abLhs { abLhs = ch.P2 } contentEnd := ch.P2 + ch.Ins if contentEnd > abRhs { abRhs = contentEnd } } // Apply offset formula lhs = abLhs + (regionStart - oLhs) rhs = abRhs + (regionEnd - oRhs) // Ensure bounds are valid if lhs < 0 { lhs = 0 } if rhs > len(lineIndex) { rhs = len(lineIndex) } if lhs > rhs { lhs = rhs } return } // parallelDiff calculates O→A and O→B diffs in parallel for better performance. // For large inputs, this can reduce total time by ~40%. // Uses errgroup for proper context cancellation. func parallelDiff[E comparable](ctx context.Context, o, a, b []E, algo Algorithm) (changesA, changesB []Change, err error) { g, ctx := errgroup.WithContext(ctx) // Calculate O→A diff g.Go(func() error { var err error changesA, err = DiffSlices(ctx, o, a, algo) return err }) // Calculate O→B diff g.Go(func() error { var err error changesB, err = DiffSlices(ctx, o, b, algo) return err }) if err := g.Wait(); err != nil { return nil, nil, err } return changesA, changesB, nil } ================================================ FILE: modules/diferenco/merge_parallel_bench_test.go ================================================ package diferenco import ( "context" "fmt" "strconv" "strings" "testing" ) // Helper functions for benchmark test data func generateText(lines int, prefix string) string { var builder strings.Builder builder.Grow(lines * 20) // Pre-allocate approximate size for i := range lines { builder.WriteString(prefix) builder.WriteString(strconv.Itoa(i)) builder.WriteByte('\n') } return builder.String() } func generateModifiedText(lines int, prefix string, changes int) string { var builder strings.Builder builder.Grow(lines * 25) // Pre-allocate approximate size for i := range lines { // Modify some lines if i%10 == 0 && changes > 0 { builder.WriteString(prefix) builder.WriteString("_modified_") builder.WriteString(strconv.Itoa(i)) builder.WriteByte('\n') changes-- } else { builder.WriteString(prefix) builder.WriteString(strconv.Itoa(i)) builder.WriteByte('\n') } } return builder.String() } // generateConflictText generates texts where A and B modify the same lines (creates conflicts) func generateConflictText(lines int, prefix string, conflictRate int) (o, a, b string) { var oBuilder, aBuilder, bBuilder strings.Builder oBuilder.Grow(lines * 20) aBuilder.Grow(lines * 25) bBuilder.Grow(lines * 25) for i := range lines { // Original line oBuilder.WriteString(prefix) oBuilder.WriteString(strconv.Itoa(i)) oBuilder.WriteByte('\n') // A and B modify the same lines with conflictRate probability if i%conflictRate == 0 { // A's modification aBuilder.WriteString(prefix) aBuilder.WriteString("_A_modified_") aBuilder.WriteString(strconv.Itoa(i)) aBuilder.WriteByte('\n') // B's modification (different from A - creates conflict) bBuilder.WriteString(prefix) bBuilder.WriteString("_B_modified_") bBuilder.WriteString(strconv.Itoa(i)) bBuilder.WriteByte('\n') } else if i%10 == 0 { // Only A modifies aBuilder.WriteString(prefix) aBuilder.WriteString("_A_only_") aBuilder.WriteString(strconv.Itoa(i)) aBuilder.WriteByte('\n') bBuilder.WriteString(prefix) bBuilder.WriteString(strconv.Itoa(i)) bBuilder.WriteByte('\n') } else if i%7 == 0 { // Only B modifies aBuilder.WriteString(prefix) aBuilder.WriteString(strconv.Itoa(i)) aBuilder.WriteByte('\n') bBuilder.WriteString(prefix) bBuilder.WriteString("_B_only_") bBuilder.WriteString(strconv.Itoa(i)) bBuilder.WriteByte('\n') } else { // No change aBuilder.WriteString(prefix) aBuilder.WriteString(strconv.Itoa(i)) aBuilder.WriteByte('\n') bBuilder.WriteString(prefix) bBuilder.WriteString(strconv.Itoa(i)) bBuilder.WriteByte('\n') } } return oBuilder.String(), aBuilder.String(), bBuilder.String() } // BenchmarkMergeParallel compares MergeParallel performance with Merge func BenchmarkMergeParallel(b *testing.B) { ctx := context.Background() // Test cases with different sizes benchmarks := []struct { name string size int textO string textA string textB string }{ { name: "small", size: 100, textO: generateText(100, "line"), textA: generateModifiedText(100, "line", 10), textB: generateModifiedText(100, "line", 15), }, { name: "medium", size: 1000, textO: generateText(1000, "line"), textA: generateModifiedText(1000, "line", 100), textB: generateModifiedText(1000, "line", 150), }, { name: "large", size: 10000, textO: generateText(10000, "line"), textA: generateModifiedText(10000, "line", 1000), textB: generateModifiedText(10000, "line", 1500), }, } for _, bm := range benchmarks { b.Run("MergeParallel_"+bm.name, func(b *testing.B) { opts := &MergeOptions{ TextO: bm.textO, TextA: bm.textA, TextB: bm.textB, Style: STYLE_DEFAULT, A: Histogram, } b.ResetTimer() for range b.N { _, _, _ = MergeParallel(ctx, opts) } }) b.Run("Merge_"+bm.name, func(b *testing.B) { opts := &MergeOptions{ TextO: bm.textO, TextA: bm.textA, TextB: bm.textB, Style: STYLE_DEFAULT, A: Histogram, } b.ResetTimer() for range b.N { _, _, _ = Merge(ctx, opts) } }) } } // BenchmarkMergeParallelAlgorithms compares different algorithms func BenchmarkMergeParallelAlgorithms(b *testing.B) { ctx := context.Background() algorithms := []Algorithm{ Histogram, Myers, ONP, Patience, Minimal, } textO := generateText(1000, "line") textA := generateModifiedText(1000, "line", 100) textB := generateModifiedText(1000, "line", 150) for _, algo := range algorithms { b.Run("MergeParallel_"+algo.String(), func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: STYLE_DEFAULT, A: algo, } b.ResetTimer() for range b.N { _, _, _ = MergeParallel(ctx, opts) } }) b.Run("Merge_"+algo.String(), func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: STYLE_DEFAULT, A: algo, } b.ResetTimer() for range b.N { _, _, _ = Merge(ctx, opts) } }) } } // BenchmarkMergeParallelConflictScenarios tests different conflict scenarios func BenchmarkMergeParallelConflictScenarios(b *testing.B) { ctx := context.Background() scenarios := []struct { name string lines int conflictRate int description string }{ {"no_conflicts", 1000, 1000, "no conflicts - only independent changes"}, {"few_conflicts", 1000, 100, "few conflicts - ~1% conflicting lines"}, {"moderate_conflicts", 1000, 50, "moderate conflicts - ~2% conflicting lines"}, {"many_conflicts", 1000, 20, "many conflicts - ~5% conflicting lines"}, } for _, scenario := range scenarios { textO, textA, textB := generateConflictText(scenario.lines, "line", scenario.conflictRate) b.Run(fmt.Sprintf("MergeParallel_%s", scenario.name), func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: STYLE_DEFAULT, A: Histogram, } b.ResetTimer() for range b.N { _, _, _ = MergeParallel(ctx, opts) } }) b.Run(fmt.Sprintf("Merge_%s", scenario.name), func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: STYLE_DEFAULT, A: Histogram, } b.ResetTimer() for range b.N { _, _, _ = Merge(ctx, opts) } }) } } // BenchmarkMergeParallelConflictStyles compares different conflict styles func BenchmarkMergeParallelConflictStyles(b *testing.B) { ctx := context.Background() textO, textA, textB := generateConflictText(1000, "line", 30) styles := []struct { name string style int }{ {"STYLE_DEFAULT", STYLE_DEFAULT}, {"STYLE_DIFF3", STYLE_DIFF3}, {"STYLE_ZEALOUS_DIFF3", STYLE_ZEALOUS_DIFF3}, } for _, s := range styles { b.Run(fmt.Sprintf("MergeParallel_%s", s.name), func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: s.style, A: Histogram, } b.ResetTimer() for range b.N { _, _, _ = MergeParallel(ctx, opts) } }) b.Run(fmt.Sprintf("Merge_%s", s.name), func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: s.style, A: Histogram, } b.ResetTimer() for range b.N { _, _, _ = Merge(ctx, opts) } }) } } // BenchmarkHasConflictComparison compares HasConflict vs HasConflictParallel func BenchmarkHasConflictComparison(b *testing.B) { ctx := context.Background() scenarios := []struct { name string lines int conflictRate int }{ {"small_no_conflict", 100, 1000}, {"small_with_conflict", 100, 20}, {"medium_no_conflict", 1000, 1000}, {"medium_with_conflict", 1000, 20}, {"large_no_conflict", 10000, 1000}, {"large_with_conflict", 10000, 20}, } for _, scenario := range scenarios { textO, textA, textB := generateConflictText(scenario.lines, "line", scenario.conflictRate) b.Run(fmt.Sprintf("HasConflictParallel_%s", scenario.name), func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = HasConflictParallel(ctx, textO, textA, textB) } }) b.Run(fmt.Sprintf("HasConflict_%s", scenario.name), func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = HasConflict(ctx, textO, textA, textB) } }) } } // BenchmarkMergeParallelMemory traces memory allocations func BenchmarkMergeParallelMemory(b *testing.B) { ctx := context.Background() textO := generateText(1000, "line") textA := generateModifiedText(1000, "line", 100) textB := generateModifiedText(1000, "line", 150) b.Run("MergeParallel_memory", func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: STYLE_DEFAULT, A: Histogram, } b.ReportAllocs() b.ResetTimer() for range b.N { _, _, _ = MergeParallel(ctx, opts) } }) b.Run("Merge_memory", func(b *testing.B) { opts := &MergeOptions{ TextO: textO, TextA: textA, TextB: textB, Style: STYLE_DEFAULT, A: Histogram, } b.ReportAllocs() b.ResetTimer() for range b.N { _, _, _ = Merge(ctx, opts) } }) } // BenchmarkMergeParallelComponents benchmarks individual components of the merge func BenchmarkMergeParallelComponents(b *testing.B) { ctx := context.Background() sink := NewSink(NEWLINE_LF) textO := generateText(1000, "line") textA := generateModifiedText(1000, "line", 100) textB := generateModifiedText(1000, "line", 150) oIdx, _ := sink.parseLines(nil, textO) aIdx, _ := sink.parseLines(nil, textA) bIdx, _ := sink.parseLines(nil, textB) b.Run("parseLines", func(b *testing.B) { b.ResetTimer() for range b.N { sink := NewSink(NEWLINE_LF) _, _ = sink.parseLines(nil, textO) _, _ = sink.parseLines(nil, textA) _, _ = sink.parseLines(nil, textB) } }) b.Run("DiffSlices_OA", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, oIdx, aIdx, Histogram) } }) b.Run("DiffSlices_OB", func(b *testing.B) { b.ResetTimer() for range b.N { _, _ = DiffSlices(ctx, oIdx, bIdx, Histogram) } }) changesA, _ := DiffSlices(ctx, oIdx, aIdx, Histogram) changesB, _ := DiffSlices(ctx, oIdx, bIdx, Histogram) b.Run("findMergeRegions", func(b *testing.B) { b.ResetTimer() for range b.N { _ = findMergeRegions(changesA, changesB) } }) } ================================================ FILE: modules/diferenco/merge_parallel_test.go ================================================ // Package diferenco provides diff and merge functionality. // // This file (merge_new_test.go) contains comprehensive tests for MergeParallel and HasConflictParallel. // These tests were generated by GLM-5 (Zhipu AI) to validate the three-way merge implementation. package diferenco import ( "context" "strings" "testing" "unicode/utf8" ) // ============================================================================ // Basic Tests // ============================================================================ // TestMergeParallelBasic tests basic merge scenarios func TestMergeParallelBasic(t *testing.T) { tests := []struct { name string origin string ours string theirs string style int wantConflict bool }{ { name: "conflict_default", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DEFAULT, wantConflict: true, }, { name: "conflict_diff3", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DIFF3, wantConflict: true, }, { name: "no_conflict_adjacent", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1\nline2a\n", style: STYLE_DEFAULT, wantConflict: false, // Adjacent (non-overlapping) changes should NOT conflict }, { name: "empty_texts", origin: "", ours: "", theirs: "", style: STYLE_DEFAULT, wantConflict: false, }, { name: "ours_empty", origin: "line1\nline2\n", ours: "", theirs: "line1\nline2\n", style: STYLE_DEFAULT, wantConflict: false, }, { name: "theirs_empty", origin: "line1\nline2\n", ours: "line1\nline2\n", theirs: "", style: STYLE_DEFAULT, wantConflict: false, }, { name: "same_change_both", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1a\nline2\n", style: STYLE_DEFAULT, wantConflict: false, // Same change should not conflict }, { name: "only_ours_changed", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1\nline2\n", style: STYLE_DEFAULT, wantConflict: false, }, { name: "only_theirs_changed", origin: "line1\nline2\n", ours: "line1\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DEFAULT, wantConflict: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: tt.style, A: Histogram, } result, hasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } if hasConflict != tt.wantConflict { t.Errorf("MergeParallel() hasConflict = %v, want %v", hasConflict, tt.wantConflict) } // Verify conflict markers are present when expected if tt.wantConflict && !strings.Contains(result, "<<<<<<<") { t.Errorf("MergeParallel() result should contain conflict markers when hasConflict=true") } if !tt.wantConflict && strings.Contains(result, "<<<<<<<") { t.Errorf("MergeParallel() result should not contain conflict markers when hasConflict=false") } }) } } // ============================================================================ // Comparison Tests: MergeParallel vs Merge // ============================================================================ // TestMergeParallelVsMerge compares MergeParallel with original Merge func TestMergeParallelVsMerge(t *testing.T) { tests := []struct { name string origin string ours string theirs string style int }{ { name: "simple_conflict", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DEFAULT, }, // Note: "adjacent_conflict" test removed because Merge and MergeParallel // handle adjacent (non-overlapping) changes differently: // - Merge uses <= for overlap check (treats adjacent as conflict) // - MergeParallel uses < for overlap check (correct: adjacent is NOT conflict) { name: "diff3_style", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DIFF3, }, { name: "no_trailing_newline_conflict", origin: "line1\nline2", ours: "line1\nline2a", theirs: "line1\nline2b", style: STYLE_DEFAULT, }, { name: "no_change_ours", origin: "line1\nline2\n", ours: "line1\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DEFAULT, }, { name: "no_change_theirs", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1\nline2\n", style: STYLE_DEFAULT, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() // Original Merge optsOriginal := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: tt.style, A: Histogram, } resultOriginal, conflictOriginal, errOriginal := Merge(ctx, optsOriginal) if errOriginal != nil { t.Fatalf("Merge() error = %v", errOriginal) } // MergeParallel optsNew := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: tt.style, A: Histogram, } resultNew, conflictNew, errNew := MergeParallel(ctx, optsNew) if errNew != nil { t.Fatalf("MergeParallel() error = %v", errNew) } // Compare conflict flags if conflictOriginal != conflictNew { t.Errorf("Conflict mismatch: Merge=%v, MergeParallel=%v", conflictOriginal, conflictNew) } // Compare results if resultOriginal != resultNew { t.Errorf("Results differ:\nOriginal:\n%s\n\nMergeParallel:\n%s", resultOriginal, resultNew) } }) } } // ============================================================================ // Label Tests // ============================================================================ // TestMergeParallelLabels tests label formatting func TestMergeParallelLabels(t *testing.T) { tests := []struct { name string labelO string labelA string labelB string wantLabel string }{ { name: "default_labels", labelO: "o.txt", labelA: "a.txt", labelB: "b.txt", wantLabel: " a.txt", // ValidateOptions adds space }, { name: "empty_labels", labelO: "", labelA: "", labelB: "", wantLabel: "", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: "line1\nline2\n", TextA: "line1a\nline2\n", TextB: "line1b\nline2\n", LabelO: tt.labelO, LabelA: tt.labelA, LabelB: tt.labelB, Style: STYLE_DEFAULT, A: Histogram, } result, _, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } if tt.labelA != "" { if !strings.Contains(result, tt.wantLabel) { t.Errorf("MergeParallel() result should contain label %q, got:\n%s", tt.wantLabel, result) } } }) } } // ============================================================================ // Multi-line Tests // ============================================================================ // TestMergeParallelMultiLine tests multi-line merges func TestMergeParallelMultiLine(t *testing.T) { tests := []struct { name string origin string ours string theirs string wantConflict bool }{ { name: "multi_line_change", origin: "line1\nline2\nline3\nline4\n", ours: "line1\nline2a\nline3\nline4\n", theirs: "line1\nline2\nline3b\nline4\n", wantConflict: false, // Adjacent (non-overlapping) modifications should NOT conflict }, { name: "insert_middle", origin: "line1\nline3\n", ours: "line1\nline2\nline3\n", theirs: "line1\nline2\nline3\n", wantConflict: false, // Same insert }, { name: "delete_middle", origin: "line1\nline2\nline3\n", ours: "line1\nline3\n", theirs: "line1\nline3\n", wantConflict: false, // Same delete }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: STYLE_DEFAULT, A: Histogram, } _, hasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } if hasConflict != tt.wantConflict { t.Errorf("MergeParallel() hasConflict = %v, want %v", hasConflict, tt.wantConflict) } }) } } // ============================================================================ // Context Tests // ============================================================================ // TestMergeParallelContext tests context cancellation func TestMergeParallelContext(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) cancel() // Cancel immediately opts := &MergeOptions{ TextO: "line1\nline2\n", TextA: "line1a\nline2\n", TextB: "line1b\nline2\n", Style: STYLE_DEFAULT, A: Histogram, } _, _, err := MergeParallel(ctx, opts) if err == nil { t.Error("MergeParallel() should return error when context is canceled") } } // ============================================================================ // Options Validation Tests // ============================================================================ // TestMergeParallelValidateOptions tests option validation func TestMergeParallelValidateOptions(t *testing.T) { tests := []struct { name string opts *MergeOptions wantErr bool }{ { name: "nil_options", opts: nil, wantErr: true, }, { name: "valid_options", opts: &MergeOptions{ TextO: "line1\n", TextA: "line1a\n", TextB: "line1b\n", }, wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() _, _, err := MergeParallel(ctx, tt.opts) if (err != nil) != tt.wantErr { t.Errorf("MergeParallel() error = %v, wantErr %v", err, tt.wantErr) } }) } } // ============================================================================ // Algorithm Tests // ============================================================================ // TestMergeParallelAlgorithms tests different diff algorithms func TestMergeParallelAlgorithms(t *testing.T) { algorithms := []Algorithm{ Histogram, Myers, ONP, Patience, Minimal, } for _, algo := range algorithms { t.Run(algo.String(), func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: "line1\nline2\nline3\n", TextA: "line1a\nline2\nline3\n", TextB: "line1b\nline2\nline3\n", Style: STYLE_DEFAULT, A: algo, } result, hasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() with algorithm %s error = %v", algo, err) } if !hasConflict { t.Errorf("MergeParallel() with algorithm %s should detect conflict", algo) } if !strings.Contains(result, "<<<<<<<") { t.Errorf("MergeParallel() result should contain conflict markers") } }) } } // ============================================================================ // Complex Conflict Tests // ============================================================================ // TestMergeParallelComplexConflicts tests complex conflict scenarios func TestMergeParallelComplexConflicts(t *testing.T) { tests := []struct { name string origin string ours string theirs string wantConflict bool }{ { name: "both_delete_same", origin: "line1\nline2\nline3\n", ours: "line1\nline3\n", theirs: "line1\nline3\n", wantConflict: false, // Same delete, no conflict }, { name: "both_delete_different", origin: "line1\nline2\nline3\n", ours: "line1\nline3\n", theirs: "line1\nline2\n", wantConflict: false, // Adjacent deletions (line2 vs line3) don't overlap }, { name: "both_insert_same_place", origin: "line1\nline3\n", ours: "line1\nline2\nline3\n", theirs: "line1\nline2a\nline3\n", wantConflict: true, // Different insert at same place }, { name: "replace_same_content", origin: "line1\nline2\nline3\n", ours: "line1\nline2a\nline3\n", theirs: "line1\nline2a\nline3\n", wantConflict: false, // Same replacement, no conflict }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: STYLE_DEFAULT, A: Histogram, } _, hasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } if hasConflict != tt.wantConflict { t.Errorf("MergeParallel() hasConflict = %v, want %v", hasConflict, tt.wantConflict) } }) } } // ============================================================================ // Empty Region Tests // ============================================================================ // TestMergeParallelEmptyRegion tests edge cases with empty regions func TestMergeParallelEmptyRegion(t *testing.T) { tests := []struct { name string origin string ours string theirs string wantConflict bool }{ { name: "ours_insert_at_beginning", origin: "line1\nline2\n", ours: "line0\nline1\nline2\n", theirs: "line1\nline2\n", wantConflict: false, }, { name: "theirs_insert_at_end", origin: "line1\nline2\n", ours: "line1\nline2\n", theirs: "line1\nline2\nline3\n", wantConflict: false, }, { name: "both_insert_at_beginning_different", origin: "line1\nline2\n", ours: "line0a\nline1\nline2\n", theirs: "line0b\nline1\nline2\n", wantConflict: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: STYLE_DEFAULT, A: Histogram, } _, hasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } if hasConflict != tt.wantConflict { t.Errorf("MergeParallel() hasConflict = %v, want %v", hasConflict, tt.wantConflict) } }) } } // ============================================================================ // Edge Cases Tests // ============================================================================ // TestMergeParallelEdgeCases tests edge cases for MergeParallel func TestMergeParallelEdgeCases(t *testing.T) { tests := []struct { name string origin string ours string theirs string style int wantConflict bool description string }{ // ===== 空值和 null 边界情况 ===== { name: "all_empty", origin: "", ours: "", theirs: "", style: STYLE_DEFAULT, wantConflict: false, description: "所有输入为空字符串", }, { name: "only_origin_empty", origin: "", ours: "line1\nline2\n", theirs: "line1\nline2\n", style: STYLE_DEFAULT, wantConflict: false, description: "只有 origin 为空,ours 和 theirs 相同", }, { name: "origin_empty_ours_theirs_different", origin: "", ours: "line1\n", theirs: "line2\n", style: STYLE_DEFAULT, wantConflict: true, description: "origin 为空,ours 和 theirs 不同", }, { name: "single_line_all_empty", origin: "\n", ours: "\n", theirs: "\n", style: STYLE_DEFAULT, wantConflict: false, description: "所有输入只有一个换行符", }, // ===== 单行边界情况 ===== { name: "single_line_origin", origin: "line1", ours: "line1", theirs: "line1", style: STYLE_DEFAULT, wantConflict: false, description: "单行文本,无变化", }, { name: "single_line_modified_ours", origin: "line1", ours: "line1a", theirs: "line1", style: STYLE_DEFAULT, wantConflict: false, description: "单行文本,只有 ours 修改", }, { name: "single_line_both_modified_same", origin: "line1", ours: "line1a", theirs: "line1a", style: STYLE_DEFAULT, wantConflict: false, description: "单行文本,ours 和 theirs 修改相同内容", }, { name: "single_line_both_modified_different", origin: "line1", ours: "line1a", theirs: "line1b", style: STYLE_DEFAULT, wantConflict: true, description: "单行文本,ours 和 theirs 修改不同内容", }, { name: "single_line_without_newline", origin: "line1", ours: "line1a", theirs: "line1b", style: STYLE_DEFAULT, wantConflict: true, description: "单行文本无换行符", }, // ===== 特殊字符和编码 ===== { name: "unicode_characters", origin: "中文\n日本語\n한국어\n", ours: "中文修改\n日本語\n한국어\n", theirs: "中文\n日本語修改\n한국어\n", style: STYLE_DEFAULT, wantConflict: false, // 相邻但不重叠的修改(第一行 vs 第二行) description: "Unicode 多语言字符 - 相邻修改", }, { name: "emoji_characters", origin: "😀\n😎\n", ours: "😊\n😎\n", theirs: "😀\n🥳\n", style: STYLE_DEFAULT, wantConflict: false, // 相邻但不重叠的修改(第一行 vs 第二行) description: "Emoji 表情符号 - 相邻修改", }, { name: "special_characters", origin: "line1\ttab\nline2\rcarriage\n", ours: "line1\ttab modified\nline2\rcarriage\n", theirs: "line1\ttab\nline2\rcarriage modified\n", style: STYLE_DEFAULT, wantConflict: false, // 相邻但不重叠的修改(第一行 vs 第二行) description: "特殊字符(制表符、回车符)- 相邻修改", }, { name: "mixed_line_endings", origin: "line1\nline2\r\nline3\r", ours: "line1 modified\nline2\r\nline3\r", theirs: "line1\nline2\r\nline3 modified\r", style: STYLE_DEFAULT, wantConflict: false, description: "混合行结束符(\\n, \\r\\n, \\r)", }, { name: "very_long_line", origin: strings.Repeat("a", 10000) + "\n", ours: strings.Repeat("b", 10000) + "\n", theirs: strings.Repeat("c", 10000) + "\n", style: STYLE_DEFAULT, wantConflict: true, description: "超长行(10000 字符)", }, { name: "whitespace_only", origin: " \n\t\n", ours: " \n\t\n", theirs: " \n\t\t\n", style: STYLE_DEFAULT, wantConflict: false, // Different lines modified (line1 vs line2), no overlap description: "只有空白字符", }, { name: "null_byte", origin: "line1\x00line2\n", ours: "line1\x00line2 modified\n", theirs: "line1\x00line2\n", style: STYLE_DEFAULT, wantConflict: false, description: "包含 null 字节(\\x00)", }, // ===== 插入和删除边界情况 ===== { name: "insert_at_beginning_both", origin: "line1\nline2\n", ours: "inserted\nline1\nline2\n", theirs: "inserted\nline1\nline2\n", style: STYLE_DEFAULT, wantConflict: false, description: "在开头插入相同内容", }, { name: "insert_at_beginning_different", origin: "line1\nline2\n", ours: "insertedA\nline1\nline2\n", theirs: "insertedB\nline1\nline2\n", style: STYLE_DEFAULT, wantConflict: true, description: "在开头插入不同内容", }, { name: "insert_at_end_both", origin: "line1\nline2\n", ours: "line1\nline2\ninserted\n", theirs: "line1\nline2\ninserted\n", style: STYLE_DEFAULT, wantConflict: false, description: "在末尾插入相同内容", }, { name: "insert_at_end_different", origin: "line1\nline2\n", ours: "line1\nline2\ninsertedA\n", theirs: "line1\nline2\ninsertedB\n", style: STYLE_DEFAULT, wantConflict: true, description: "在末尾插入不同内容", }, { name: "delete_all_content", origin: "line1\nline2\nline3\n", ours: "", theirs: "", style: STYLE_DEFAULT, wantConflict: false, description: "双方都删除所有内容", }, { name: "delete_all_content_ours_only", origin: "line1\nline2\nline3\n", ours: "", theirs: "line1\nline2\nline3\n", style: STYLE_DEFAULT, wantConflict: false, description: "只有 ours 删除所有内容", }, { name: "delete_middle_lines", origin: "line1\nline2\nline3\nline4\nline5\n", ours: "line1\nline4\nline5\n", theirs: "line1\nline4\nline5\n", style: STYLE_DEFAULT, wantConflict: false, description: "双方删除相同的中间行", }, { name: "delete_different_lines", origin: "line1\nline2\nline3\nline4\nline5\n", ours: "line1\nline3\nline5\n", theirs: "line1\nline2\nline4\nline5\n", style: STYLE_DEFAULT, wantConflict: false, // Adjacent deletions (line2,line4 vs line3), no overlap description: "删除不同的行", }, { name: "insert_multiple_lines", origin: "line1\nline3\n", ours: "line1\nline2a\nline2b\nline3\n", theirs: "line1\nline2a\nline2b\nline3\n", style: STYLE_DEFAULT, wantConflict: false, description: "双方插入相同的多个行", }, { name: "insert_different_multiple_lines", origin: "line1\nline3\n", ours: "line1\nline2a\nline2b\nline3\n", theirs: "line1\nline2x\nline2y\nline3\n", style: STYLE_DEFAULT, wantConflict: true, description: "双方插入不同的多个行", }, // ===== 替换边界情况 ===== { name: "replace_single_line_same", origin: "line1\nline2\nline3\n", ours: "line1\nmodified\nline3\n", theirs: "line1\nmodified\nline3\n", style: STYLE_DEFAULT, wantConflict: false, description: "替换同一行相同内容", }, { name: "replace_single_line_different", origin: "line1\nline2\nline3\n", ours: "line1\nmodifiedA\nline3\n", theirs: "line1\nmodifiedB\nline3\n", style: STYLE_DEFAULT, wantConflict: true, description: "替换同一行不同内容", }, { name: "replace_multiple_lines_same", origin: "line1\nline2\nline3\nline4\n", ours: "line1\nnew1\nnew2\nline4\n", theirs: "line1\nnew1\nnew2\nline4\n", style: STYLE_DEFAULT, wantConflict: false, description: "替换多个行相同内容", }, { name: "replace_multiple_lines_different", origin: "line1\nline2\nline3\nline4\n", ours: "line1\nnew1\nnew2\nline4\n", theirs: "line1\nnew3\nnew4\nline4\n", style: STYLE_DEFAULT, wantConflict: true, description: "替换多个行不同内容", }, // ===== 复杂冲突场景 ===== { name: "overlapping_changes", origin: "line1\nline2\nline3\nline4\n", ours: "line1\nmodifiedA\nline3\nline4\n", theirs: "line1\nline2\nmodifiedB\nline4\n", style: STYLE_DEFAULT, wantConflict: false, // Adjacent modifications (line2 vs line3), no overlap description: "相邻但不重叠的修改", }, { name: "multiple_conflicts", origin: "line1\nline2\nline3\nline4\nline5\n", ours: "line1a\nline2\nline3a\nline4\nline5\n", theirs: "line1\nline2b\nline3\nline4b\nline5\n", style: STYLE_DEFAULT, wantConflict: false, // Adjacent modifications (line1,line3 vs line2,line4), no overlap description: "多个独立的修改,相邻但不重叠", }, { name: "interleaved_changes", origin: "line1\nline2\nline3\nline4\nline5\n", ours: "line1\nline2a\nline3\nline4a\nline5\n", theirs: "line1\nline2b\nline3\nline4b\nline5\n", style: STYLE_DEFAULT, wantConflict: true, description: "交替的修改", }, // ===== 冲突样式测试 ===== { name: "diff3_style_conflict", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DIFF3, wantConflict: true, description: "Diff3 样式冲突(包含 origin 内容)", }, { name: "zealous_diff3_style_conflict", origin: "line1\nline2\nline3\n", ours: "line1\nline2a\nline3\n", theirs: "line1\nline2b\nline3\n", style: STYLE_ZEALOUS_DIFF3, wantConflict: true, description: "Zealous Diff3 样式冲突", }, // ===== 大文件测试 ===== { name: "large_file_no_conflict", origin: strings.Repeat("line\n", 100), ours: strings.Repeat("line\n", 50) + "modified\n" + strings.Repeat("line\n", 49), theirs: strings.Repeat("line\n", 100), style: STYLE_DEFAULT, wantConflict: false, description: "大文件单行修改无冲突", }, { name: "large_file_with_conflict", origin: strings.Repeat("line\n", 100), ours: strings.Repeat("line\n", 50) + "modifiedA\n" + strings.Repeat("line\n", 49), theirs: strings.Repeat("line\n", 50) + "modifiedB\n" + strings.Repeat("line\n", 49), style: STYLE_DEFAULT, wantConflict: true, description: "大文件同位置修改产生冲突", }, // ===== 编码相关测试 ===== { name: "utf8_bom", origin: "\xef\xbb\xbfline1\nline2\n", ours: "\xef\xbb\xbfline1a\nline2\n", theirs: "\xef\xbb\xbfline1\nline2\n", style: STYLE_DEFAULT, wantConflict: false, description: "UTF-8 BOM 处理", }, { name: "different_encodings_treated_as_binary", origin: "line1\nline2\n", ours: "line1a\nline2\n", theirs: "line1b\nline2\n", style: STYLE_DEFAULT, wantConflict: true, description: "不同编码处理(作为二进制处理)", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() opts := &MergeOptions{ TextO: tt.origin, TextA: tt.ours, TextB: tt.theirs, Style: tt.style, A: Histogram, } result, hasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } if hasConflict != tt.wantConflict { t.Errorf("MergeParallel() hasConflict = %v, want %v (%s)", hasConflict, tt.wantConflict, tt.description) } // Validate result is valid UTF-8 if !utf8.ValidString(result) { t.Errorf("MergeParallel() result is not valid UTF-8") } // Verify conflict markers are present when expected if tt.wantConflict && !strings.Contains(result, "<<<<<<<") { t.Errorf("MergeParallel() result should contain conflict markers when hasConflict=true") } }) } } // ============================================================================ // HasConflictParallel Tests // ============================================================================ // TestHasConflictParallel tests the HasConflictParallel function func TestHasConflictParallel(t *testing.T) { tests := []struct { name string textO string textA string textB string wantTrue bool expectErr bool }{ { name: "no_conflict_only_a_changed", textO: "line1\nline2\nline3\n", textA: "line1a\nline2\nline3\n", textB: "line1\nline2\nline3\n", wantTrue: false, expectErr: false, }, { name: "no_conflict_only_b_changed", textO: "line1\nline2\nline3\n", textA: "line1\nline2\nline3\n", textB: "line1\nline2b\nline3\n", wantTrue: false, expectErr: false, }, { name: "no_conflict_both_same_change", textO: "line1\nline2\nline3\n", textA: "line1a\nline2\nline3\n", textB: "line1a\nline2\nline3\n", wantTrue: false, expectErr: false, }, { name: "conflict_same_line_different_content", textO: "line1\nline2\nline3\n", textA: "line1\nline2a\nline3\n", textB: "line1\nline2b\nline3\n", wantTrue: true, expectErr: false, }, { name: "no_conflict_different_lines_adjacent", // 相邻但不重叠的修改(line1 vs line2) textO: "line1\nline2\nline3\n", textA: "line1a\nline2\nline3\n", textB: "line1\nline2b\nline3\n", wantTrue: false, // 相邻但不重叠,不冲突 expectErr: false, }, { name: "conflict_adjacent_changes", textO: "line1\nline2\nline3\n", textA: "line1a\nline2\nline3\n", textB: "line1b\nline2\nline3\n", wantTrue: true, expectErr: false, }, { name: "no_conflict_all_same", textO: "line1\nline2\nline3\n", textA: "line1\nline2\nline3\n", textB: "line1\nline2\nline3\n", wantTrue: false, expectErr: false, }, { name: "no_conflict_empty_texts", textO: "", textA: "", textB: "", wantTrue: false, expectErr: false, }, { name: "conflict_empty_origin", textO: "", textA: "line1\n", textB: "line2\n", wantTrue: true, expectErr: false, }, { name: "conflict_insert_at_same_position_different_content", textO: "line1\nline3\n", textA: "line1\nline2a\nline3\n", textB: "line1\nline2b\nline3\n", wantTrue: true, expectErr: false, }, { name: "conflict_insert_at_same_position", textO: "line1\nline3\n", textA: "line1\nline2a\nline3\n", textB: "line1\nline2b\nline3\n", wantTrue: true, expectErr: false, }, { name: "no_conflict_delete_same_line", textO: "line1\nline2\nline3\n", textA: "line1\nline3\n", textB: "line1\nline3\n", wantTrue: false, expectErr: false, }, { name: "conflict_delete_different_lines", textO: "line1\nline2\nline3\n", textA: "line1\nline3\n", textB: "line1\nline2\n", wantTrue: false, // Adjacent deletions (line2 vs line3), no overlap expectErr: false, }, { name: "no_conflict_single_line", textO: "line1\n", textA: "line1\n", textB: "line1\n", wantTrue: false, expectErr: false, }, { name: "conflict_single_line", textO: "line1\n", textA: "line1a\n", textB: "line1b\n", wantTrue: true, expectErr: false, }, { name: "no_conflict_multiple_changes_separated", textO: "line1\nline2\nline3\nline4\nline5\n", textA: "line1a\nline2\nline3\nline4\nline5\n", textB: "line1\nline2\nline3\nline4b\nline5\n", wantTrue: false, expectErr: false, }, { name: "conflict_multiple_overlapping_changes", textO: "line1\nline2\nline3\nline4\nline5\n", textA: "line1\nline2a\nline3a\nline4\nline5\n", textB: "line1\nline2b\nline3b\nline4\nline5\n", wantTrue: true, expectErr: false, }, } ctx := context.Background() for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got, err := HasConflictParallel(ctx, tt.textO, tt.textA, tt.textB) if (err != nil) != tt.expectErr { t.Errorf("HasConflictParallel() error = %v, expectErr %v", err, tt.expectErr) return } if got != tt.wantTrue { t.Errorf("HasConflictParallel() = %v, want %v", got, tt.wantTrue) } }) } } // TestHasConflictParallelVsMerge tests that HasConflictParallel is consistent with MergeParallel func TestHasConflictParallelVsMerge(t *testing.T) { tests := []struct { name string textO string textA string textB string }{ { name: "simple_conflict", textO: "line1\nline2\nline3\n", textA: "line1\nline2a\nline3\n", textB: "line1\nline2b\nline3\n", }, { name: "no_conflict", textO: "line1\nline2\nline3\n", textA: "line1\nline2a\nline3\n", textB: "line1\nline2\nline3\n", }, { name: "adjacent_changes", textO: "line1\nline2\nline3\n", textA: "line1a\nline2\nline3\n", textB: "line1b\nline2\nline3\n", }, { name: "same_change", textO: "line1\nline2\nline3\n", textA: "line1\nline2a\nline3\n", textB: "line1\nline2a\nline3\n", }, } ctx := context.Background() for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Check with HasConflictParallel hasConflict, err := HasConflictParallel(ctx, tt.textO, tt.textA, tt.textB) if err != nil { t.Fatalf("HasConflictParallel() error = %v", err) } // Check with MergeParallel opts := &MergeOptions{ TextO: tt.textO, TextA: tt.textA, TextB: tt.textB, Style: STYLE_DEFAULT, A: Histogram, } _, mergeHasConflict, err := MergeParallel(ctx, opts) if err != nil { t.Fatalf("MergeParallel() error = %v", err) } // They should match if hasConflict != mergeHasConflict { t.Errorf("HasConflictParallel() = %v, MergeParallel() = %v, should match", hasConflict, mergeHasConflict) } }) } } // TestHasConflictParallelContextCancellation tests context cancellation func TestHasConflictParallelContextCancellation(t *testing.T) { tests := []struct { name string cancelBefore bool cancelDuring bool expectError bool }{ { name: "cancel_before_merge", cancelBefore: true, expectError: true, }, { name: "no_cancellation", cancelBefore: false, expectError: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) if tt.cancelBefore { cancel() } textO := "line1\nline2\nline3\n" textA := "line1\nline2a\nline3\n" textB := "line1\nline2\nline3\n" _, err := HasConflictParallel(ctx, textO, textA, textB) if tt.expectError && err == nil { t.Error("expected error but got nil") } if !tt.expectError && err != nil { t.Errorf("unexpected error: %v", err) } cancel() }) } } ================================================ FILE: modules/diferenco/merge_test.go ================================================ package diferenco import ( "fmt" "os" "strings" "testing" ) func TestMerge(t *testing.T) { const textO = `celery garlic onions salmon tomatoes wine ` const textA = `celery salmon tomatoes garlic onions wine ` const textB = `celery salmon garlic onions tomatoes wine ` content, conflict, err := DefaultMerge(t.Context(), textO, textA, textB, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) content, conflict, err = Merge(t.Context(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_ZEALOUS_DIFF3}) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "ZEALOUS_DIFF3\n%s\nconflicts: %v\n", content, conflict) content, conflict, err = Merge(t.Context(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_DIFF3}) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "DIFF3\n%s\nconflicts: %v\n", content, conflict) } func TestMerge2(t *testing.T) { const textO = `celery garlic onions salmon tomatoes wine ` const textA = `celery salmon tomatoes garlic onions wine ` content, conflict, err := DefaultMerge(t.Context(), textO, textA, textA, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) } func TestMerge3(t *testing.T) { const textO = `celery garlic onions salmon tomatoes wine ` const textA = `celery garlic onions salmon tomatoes wine 0000 00000 ` const textB = `celery garlic onions salmon tomatoes wine 0000 00000 77777 ` content, conflict, err := DefaultMerge(t.Context(), textO, textA, textB, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) content, conflict, err = Merge(t.Context(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_ZEALOUS_DIFF3}) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) content, conflict, err = Merge(t.Context(), &MergeOptions{TextO: textO, TextA: textA, TextB: textB, LabelO: "o.txt", LabelA: "a.txt", LabelB: "b.txt", Style: STYLE_DIFF3}) if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) } func TestMergeConflicts(t *testing.T) { const textO = `1 2 3 4 5 6 ` const textA = `1 2 AAA XXX 4 5 6 ` const textB = `1 2 BBB YYY 4 5 6 ` content, conflict, err := DefaultMerge(t.Context(), textO, textA, textB, "o.txt", "a.txt", "b.txt") if err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) return } fmt.Fprintf(os.Stderr, "%s\nconflicts: %v\n", content, conflict) } // TestWriteConflictSuffix tests whether the suffix != 0 branch in writeConflict // can ever be reached. This tests the hypothesis that conflict.a and conflict.b // never have a common suffix when excludeFalseConflicts is true. func TestWriteConflictSuffix(t *testing.T) { tests := []struct { name string textO string textA string textB string }{ { name: "same_prefix_and_suffix_in_conflict", // Test: a and b have the same prefix and suffix in the conflict region textO: `line1 line2 line3 line4 line5 `, textA: `line1 CHANGED_A line3 line4 line5 `, textB: `line1 CHANGED_B line3 line4 line5 `, }, { name: "multi_line_same_ending", // Test: multi-line changes with the same ending textO: `start old1 old2 end `, textA: `start new_a1 new_a2 common_end end `, textB: `start new_b1 new_b2 common_end end `, }, { name: "insert_with_common_context", // Test: insert operation with the same surrounding context textO: `prefix content suffix `, textA: `prefix inserted_a content suffix `, textB: `prefix inserted_b content suffix `, }, { name: "delete_with_common_remaining", // Test: delete operation with the same remaining content textO: `line1 to_delete line2 line3 `, textA: `line1 line2 line3 `, textB: `line1 extra_line line2 line3 `, }, { name: "complex_overlapping_changes", // Test: complex overlapping changes textO: `a b c d e f `, textA: `a X Y d e f `, textB: `a Z W d e f `, }, { name: "both_add_same_prefix_different_middle", // Test: both sides add the same prefix but different middle textO: `1 2 3 `, textA: `1 same_prefix different_A 3 `, textB: `1 same_prefix different_B 3 `, }, { name: "adjacent_changes", // Test: adjacent changes textO: `line1 line2 line3 line4 `, textA: `line1 modified_a1 modified_a2 line3 line4 `, textB: `line1 modified_b1 modified_b2 line3 line4 `, }, { name: "same_content_different_position", // Test: same content at different positions textO: `a b c d `, textA: `a x b c d `, textB: `a b x c d `, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // 使用三种样式测试 for _, style := range []int{STYLE_DEFAULT, STYLE_DIFF3, STYLE_ZEALOUS_DIFF3} { styleName := []string{"DEFAULT", "DIFF3", "ZEALOUS_DIFF3"}[style] t.Run(styleName, func(t *testing.T) { content, hasConflict, err := Merge(t.Context(), &MergeOptions{ TextO: tt.textO, TextA: tt.textA, TextB: tt.textB, Style: style, }) if err != nil { t.Fatalf("unexpected error: %v", err) } // 详细输出以便调试 t.Logf("Style %s:\n%s\nhasConflict: %v", styleName, content, hasConflict) }) } }) } } // TestConflictSuffixDirectly directly tests the writeConflict function // by constructing conflict structs to verify suffix behavior. func TestConflictSuffixDirectly(t *testing.T) { s := NewSink(NEWLINE_RAW) tests := []struct { name string conflict conflict[int] wantIn string // should contain this substring wantNot string // should NOT contain this substring }{ { name: "identical_a_and_b", // If a and b are identical, this should not be a real conflict conflict: conflict[int]{ a: s.SplitLines("same\ncontent\n"), o: s.SplitLines("original\n"), b: s.SplitLines("same\ncontent\n"), }, }, { name: "a_and_b_share_prefix_and_suffix", conflict: conflict[int]{ a: s.SplitLines("prefix\ndiff_a\nsuffix\n"), o: s.SplitLines("original\n"), b: s.SplitLines("prefix\ndiff_b\nsuffix\n"), }, }, { name: "a_and_b_completely_different", conflict: conflict[int]{ a: s.SplitLines("completely\ndifferent\na\n"), o: s.SplitLines("original\n"), b: s.SplitLines("totally\nother\nb\n"), }, }, { name: "a_and_b_share_only_prefix", conflict: conflict[int]{ a: s.SplitLines("prefix\nunique_a\n"), o: s.SplitLines("original\n"), b: s.SplitLines("prefix\nunique_b\n"), }, }, { name: "a_and_b_share_only_suffix", conflict: conflict[int]{ a: s.SplitLines("unique_a\nsuffix\n"), o: s.SplitLines("original\n"), b: s.SplitLines("unique_b\nsuffix\n"), }, }, { name: "empty_a_and_b", conflict: conflict[int]{ a: []int{}, o: s.SplitLines("original\n"), b: []int{}, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, style := range []int{STYLE_DEFAULT, STYLE_ZEALOUS_DIFF3} { styleName := []string{"DEFAULT", "DIFF3", "ZEALOUS_DIFF3"}[style] t.Run(styleName, func(t *testing.T) { opts := &MergeOptions{Style: style} out := &strings.Builder{} s.writeConflict(out, opts, &tt.conflict) result := out.String() t.Logf("Output:\n%s", result) // Check for suffix-related output // In DEFAULT mode, if suffix != 0, the common suffix would be output after >>>>>>> // We can check by examining the number of lines in the output lines := strings.Split(result, "\n") t.Logf("Number of lines: %d", len(lines)) }) } }) } } // TestDiff3MergeIndicesConflictBounds tests what ranges diff3MergeIndices // produces for conflict regions. func TestDiff3MergeIndicesConflictBounds(t *testing.T) { s := NewSink(NEWLINE_RAW) tests := []struct { name string textO string textA string textB string }{ { name: "simple_conflict", textO: "line1\nline2\nline3\n", textA: "line1\nCHANGED_A\nline3\n", textB: "line1\nCHANGED_B\nline3\n", }, { name: "conflict_with_shared_suffix", textO: "a\nb\nc\nd\n", textA: "a\nX\nc\nd\n", textB: "a\nY\nc\nd\n", }, { name: "conflict_with_shared_prefix_and_suffix", textO: "prefix\nmiddle\nsuffix\n", textA: "prefix\nA\nsuffix\n", textB: "prefix\nB\nsuffix\n", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { o := s.SplitLines(tt.textO) a := s.SplitLines(tt.textA) b := s.SplitLines(tt.textB) indices, err := diff3MergeIndices(t.Context(), o, a, b, Histogram) if err != nil { t.Fatalf("unexpected error: %v", err) } t.Logf("Indices for %s:", tt.name) for i, idx := range indices { if len(idx) == 3 { // Non-conflict record: {side, offset, length} t.Logf(" [%d]: side=%d, offset=%d, length=%d", i, idx[0], idx[1], idx[2]) } else if len(idx) == 7 { // Conflict record: {-1, aLhs, aLen, oLhs, oLen, bLhs, bLen} t.Logf(" [%d]: CONFLICT, a=[%d:%d], o=[%d:%d], b=[%d:%d]", i, idx[1], idx[1]+idx[2], idx[3], idx[3]+idx[4], idx[5], idx[5]+idx[6]) // Examine the conflict content conflictA := a[idx[1] : idx[1]+idx[2]] conflictB := b[idx[5] : idx[5]+idx[6]] prefix := commonPrefixLength(conflictA, conflictB) suffix := commonSuffixLength(conflictA[prefix:], conflictB[prefix:]) t.Logf(" conflict.a = %v", conflictA) t.Logf(" conflict.b = %v", conflictB) t.Logf(" prefix length = %d", prefix) t.Logf(" suffix length = %d", suffix) // Key test: verify if suffix can be non-zero if suffix > 0 { t.Errorf(" suffix = %d (non-zero!), this would trigger the 'dead code' branch!", suffix) } } } }) } } // TestWriteConflictSuffixNeverHappens verifies that the `if suffix != 0` branch // in writeConflict can NEVER be reached when going through the normal Merge path. func TestWriteConflictSuffixNeverHappens(t *testing.T) { // This test verifies: through the normal Merge path, suffix is always 0 // This means the `if suffix != 0` branch is dead code tests := []struct { name string textO string textA string textB string }{ { name: "case1", textO: "1\n2\n3\n4\n5\n", textA: "A\n2\n3\n4\n5\n", textB: "B\n2\n3\n4\n5\n", }, { name: "case2", textO: "prefix\norig\nsuffix\n", textA: "prefix\nA\nsuffix\n", textB: "prefix\nB\nsuffix\n", }, { name: "case3", textO: "a\nb\nc\nd\ne\n", textA: "a\nX\nY\nc\nd\ne\n", textB: "a\nP\nQ\nc\nd\ne\n", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Use Merge function for complete testing content, _, err := Merge(t.Context(), &MergeOptions{ TextO: tt.textO, TextA: tt.textA, TextB: tt.textB, Style: STYLE_DEFAULT, }) if err != nil { t.Fatalf("unexpected error: %v", err) } // Check if the output contains the expected common suffix // If the suffix != 0 branch were executed, the common suffix would appear after >>>>>>> t.Logf("Output:\n%s", content) }) } } // TestMergeParallelSuffixBehavior tests whether MergeParallel can trigger the suffix != 0 branch func TestMergeParallelSuffixBehavior(t *testing.T) { tests := []struct { name string textO string textA string textB string }{ { name: "simple_conflict", textO: "line1\nline2\nline3\nline4\n", textA: "line1\nCHANGED_A\nline3\nline4\n", textB: "line1\nCHANGED_B\nline3\nline4\n", }, { name: "multi_line_with_shared_context", textO: "start\na\nb\nc\nend\n", textA: "start\nX\nY\nZ\nc\nend\n", textB: "start\nP\nQ\nR\nc\nend\n", }, { name: "insert_at_beginning", textO: "line1\nline2\n", textA: "NEW_A\nline1\nline2\n", textB: "NEW_B\nline1\nline2\n", }, { name: "delete_vs_modify", textO: "a\ntarget\nb\n", textA: "a\nb\n", textB: "a\nMODIFIED\nb\n", }, { name: "complex_overlapping", textO: "1\n2\n3\n4\n5\n6\n", textA: "1\nA1\nA2\nA3\n5\n6\n", textB: "1\nB1\nB2\nB3\n5\n6\n", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Compare Merge and MergeParallel outputs content1, hasConflict1, err := Merge(t.Context(), &MergeOptions{ TextO: tt.textO, TextA: tt.textA, TextB: tt.textB, Style: STYLE_DEFAULT, }) if err != nil { t.Fatalf("Merge error: %v", err) } content2, hasConflict2, err := MergeParallel(t.Context(), &MergeOptions{ TextO: tt.textO, TextA: tt.textA, TextB: tt.textB, Style: STYLE_DEFAULT, }) if err != nil { t.Fatalf("MergeParallel error: %v", err) } t.Logf("Merge output:\n%s", content1) t.Logf("MergeParallel output:\n%s", content2) // Check if results are consistent if hasConflict1 != hasConflict2 { t.Errorf("conflict status mismatch: Merge=%v, MergeParallel=%v", hasConflict1, hasConflict2) } // Both should produce the same output (modulo whitespace differences) if content1 != content2 { t.Errorf("output mismatch:\nMerge:\n%s\nMergeParallel:\n%s", content1, content2) } }) } } // TestMergeParallelConflictSuffixDirectly directly tests the conflict structure // created by MergeParallel's writeConflictRegion function func TestMergeParallelConflictSuffixDirectly(t *testing.T) { sink := NewSink(NEWLINE_LF) tests := []struct { name string textO string textA string textB string wantSuffix int // expected suffix length in conflict }{ { name: "simple_different_content", textO: "a\nb\nc\n", textA: "X\nb\nc\n", textB: "Y\nb\nc\n", wantSuffix: 0, // conflict should only contain X/Y, not b,c }, { name: "same_prefix_different_middle", textO: "start\nmid\nend\n", textA: "start\nA\nend\n", textB: "start\nB\nend\n", wantSuffix: 0, // conflict should only contain A/B }, { name: "multi_line_conflict", textO: "1\n2\n3\n4\n", textA: "A1\nA2\n3\n4\n", textB: "B1\nB2\n3\n4\n", wantSuffix: 0, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { oIdx := sink.SplitLines(tt.textO) aIdx := sink.SplitLines(tt.textA) bIdx := sink.SplitLines(tt.textB) // Get changes using parallel diff changesA, changesB, err := parallelDiff(t.Context(), oIdx, aIdx, bIdx, Histogram) if err != nil { t.Fatalf("parallelDiff error: %v", err) } t.Logf("changesA: %v", changesA) t.Logf("changesB: %v", changesB) // Find merge regions regions := findMergeRegions(changesA, changesB) t.Logf("regions: %+v", regions) // Check each conflict region for _, region := range regions { // Finalize region (check for false conflicts) region = finalizeRegion(region, changesA, changesB, aIdx, bIdx) if !region.isConflict { continue } // Calculate conflict content like writeConflictRegion does aLhs, aRhs := calculateRangeByIndices(changesA, region.changesAIndices, aIdx, region.start, region.end) bLhs, bRhs := calculateRangeByIndices(changesB, region.changesBIndices, bIdx, region.start, region.end) conflictA := aIdx[aLhs:aRhs] conflictB := bIdx[bLhs:bRhs] prefix := commonPrefixLength(conflictA, conflictB) suffix := commonSuffixLength(conflictA[prefix:], conflictB[prefix:]) t.Logf("region: start=%d, end=%d", region.start, region.end) t.Logf("conflict.a: %v (aLhs=%d, aRhs=%d)", conflictA, aLhs, aRhs) t.Logf("conflict.b: %v (bLhs=%d, bRhs=%d)", conflictB, bLhs, bRhs) t.Logf("prefix=%d, suffix=%d", prefix, suffix) if suffix != tt.wantSuffix { t.Errorf("suffix mismatch: got %d, want %d", suffix, tt.wantSuffix) } if suffix > 0 { t.Errorf("suffix=%d (non-zero!), this would trigger the 'dead code' branch!", suffix) } } }) } } ================================================ FILE: modules/diferenco/minimal.go ================================================ package diferenco import ( "context" "github.com/antgroup/hugescm/modules/diferenco/lcs" ) // minimal: Myers: An O(ND) Difference Algorithm and Its Variations func minimal[E comparable](ctx context.Context, L1 []E, L2 []E) ([]Change, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } diffs := lcs.DiffSlices(L1, L2) changes := make([]Change, 0, len(diffs)) for _, d := range diffs { changes = append(changes, Change{P1: d.Start, P2: d.ReplStart, Del: d.End - d.Start, Ins: d.ReplEnd - d.ReplStart}) } return changes, nil } ================================================ FILE: modules/diferenco/minimal_test.go ================================================ package diferenco import ( "fmt" "os" "path/filepath" "runtime" "testing" "github.com/antgroup/hugescm/modules/diferenco/color" ) func TestMinimalDiff(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Minimal) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) e := NewUnifiedEncoder(os.Stderr, WithVCS("zeta"), WithColor(color.NewColorConfig())) _ = e.Encode([]*Patch{u}) } ================================================ FILE: modules/diferenco/myers.go ================================================ /*--------------------------------------------------------------------------------------------- * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ // https://github.com/microsoft/vscode/blob/main/src/vs/editor/common/diff/defaultLinesDiffComputer/algorithms/myersDiffAlgorithm.ts package diferenco import ( "context" "slices" ) // myers: An O(ND) diff algorithm that has a quadratic space worst-case complexity. func myers[E comparable](ctx context.Context, L1 []E, L2 []E) ([]Change, error) { prefix := commonPrefixLength(L1, L2) L1 = L1[prefix:] L2 = L2[prefix:] suffix := commonSuffixLength(L1, L2) L1 = L1[:len(L1)-suffix] L2 = L2[:len(L2)-suffix] return myersCompute(ctx, L1, prefix, L2, prefix) } func myersCompute[E comparable](ctx context.Context, seq1 []E, P1 int, seq2 []E, P2 int) ([]Change, error) { // These are common special cases. // The early return improves performance dramatically. if len(seq1) == 0 && len(seq2) == 0 { return []Change{}, nil } if len(seq1) == 0 { return []Change{{P1: P1, P2: P2, Ins: len(seq2)}}, nil } if len(seq2) == 0 { return []Change{{P1: P1, P2: P2, Del: len(seq1)}}, nil } seqX := seq1 seqY := seq2 getXAfterSnake := func(x, y int) int { for x < len(seqX) && y < len(seqY) && seqX[x] == seqY[y] { y++ x++ } return x } d := 0 // V[k]: X value of longest d-line that ends in diagonal k. // d-line: path from (0,0) to (x,y) that uses exactly d non-diagonals. // diagonal k: Set of points (x,y) with x-y = k. // k=1 -> (1,0),(2,1) V := newFastIntArray() V.set(0, getXAfterSnake(0, 0)) paths := newFastPathArray() if V.get(0) == 0 { paths.set(0, nil) } else { paths.set(0, newSnakePath(nil, 0, 0, V.get(0))) } var k int outer: for { select { case <-ctx.Done(): return nil, ctx.Err() default: } d++ // The paper has `for (k = -d; k <= d; k += 2)`, but we can ignore diagonals that cannot influence the result. lowerBound := -min(d, len(seqY)+(d%2)) upperBound := min(d, len(seqX)+(d%2)) for k = lowerBound; k <= upperBound; k += 2 { // We can use the X values of (d-1)-lines to compute X value of the longest d-lines. maxXofDLineTop, maxXofDLineLeft := -1, -1 if k != upperBound { maxXofDLineTop = V.get(k + 1) // We take a vertical non-diagonal (add a symbol in seqX) } if k != lowerBound { maxXofDLineLeft = V.get(k-1) + 1 // We take a horizontal non-diagonal (+1 x) (delete a symbol in seqX) } x := min(max(maxXofDLineTop, maxXofDLineLeft), len(seqX)) y := x - k if x > len(seqX) || y > len(seqY) { // This diagonal is irrelevant for the result. // TODO: Don't pay the cost for this in the next iteration. continue } newMaxX := getXAfterSnake(x, y) V.set(k, newMaxX) var lastPath *snakePath if x == maxXofDLineTop { lastPath = paths.get(k + 1) } else { lastPath = paths.get(k - 1) } if newMaxX != x { paths.set(k, newSnakePath(lastPath, x, y, newMaxX-x)) } else { paths.set(k, lastPath) } if V.get(k) == len(seqX) && V.get(k)-k == len(seqY) { break outer } } } path := paths.get(k) lastAligningPosS1 := len(seqX) lastAligningPosS2 := len(seqY) changes := make([]Change, 0, 10) for { var endX, endY int if path != nil { endX = path.x + path.length endY = path.y + path.length } if endX != lastAligningPosS1 || endY != lastAligningPosS2 { changes = append(changes, Change{P1: P1 + endX, P2: P2 + endY, Del: lastAligningPosS1 - endX, Ins: lastAligningPosS2 - endY}) } if path == nil { break } lastAligningPosS1 = path.x lastAligningPosS2 = path.y path = path.pre } slices.Reverse(changes) return changes, nil } type snakePath struct { pre *snakePath x, y, length int } func newSnakePath(pre *snakePath, x, y, length int) *snakePath { return &snakePath{ pre: pre, x: x, y: y, length: length, } } type fastIntArray struct { positiveArr []int negativeArr []int } func newFastIntArray() *fastIntArray { return &fastIntArray{ positiveArr: make([]int, 10), negativeArr: make([]int, 10), } } func (t *fastIntArray) get(i int) int { if i < 0 { i = -i - 1 return t.negativeArr[i] } return t.positiveArr[i] } func (t *fastIntArray) set(i int, v int) { if i < 0 { i = -i - 1 if i >= len(t.negativeArr) { newArr := make([]int, len(t.negativeArr)*2) copy(newArr, t.negativeArr) t.negativeArr = newArr } t.negativeArr[i] = v return } if i >= len(t.positiveArr) { newArr := make([]int, len(t.positiveArr)*2) copy(newArr, t.positiveArr) t.positiveArr = newArr } t.positiveArr[i] = v } // An array that supports fast negative indices, using slices for performance. type fastArrayWithNegIndex struct { positiveArr []*snakePath negativeArr []*snakePath } func newFastPathArray() *fastArrayWithNegIndex { return &fastArrayWithNegIndex{ positiveArr: make([]*snakePath, 10), negativeArr: make([]*snakePath, 10), } } func (t *fastArrayWithNegIndex) get(i int) *snakePath { if i < 0 { i = -i - 1 if i >= len(t.negativeArr) { return nil } return t.negativeArr[i] } if i >= len(t.positiveArr) { return nil } return t.positiveArr[i] } func (t *fastArrayWithNegIndex) set(i int, v *snakePath) { if i < 0 { i = -i - 1 if i >= len(t.negativeArr) { newArr := make([]*snakePath, max(len(t.negativeArr)*2, i+1)) copy(newArr, t.negativeArr) t.negativeArr = newArr } t.negativeArr[i] = v return } if i >= len(t.positiveArr) { newArr := make([]*snakePath, max(len(t.positiveArr)*2, i+1)) copy(newArr, t.positiveArr) t.positiveArr = newArr } t.positiveArr[i] = v } ================================================ FILE: modules/diferenco/myers_bench_test.go ================================================ package diferenco import ( "context" "math/rand" "testing" ) // myersFast is a GPT implementation for comparison func myersFast[E comparable](ctx context.Context, a []E, P1 int, b []E, P2 int) ([]Change, error) { n := len(a) m := len(b) if n == 0 && m == 0 { return nil, nil } if n == 0 { return []Change{{P1: P1, P2: P2, Ins: m}}, nil } if m == 0 { return []Change{{P1: P1, P2: P2, Del: n}}, nil } mx := n + m offset := mx V := make([]int, 2*mx+1) trace := make([][]int, 0, mx+1) V[offset] = 0 for d := 0; d <= mx; d++ { select { case <-ctx.Done(): return nil, ctx.Err() default: } Vcopy := make([]int, len(V)) copy(Vcopy, V) trace = append(trace, Vcopy) for k := -d; k <= d; k += 2 { var x int if k == -d || (k != d && V[offset+k-1] < V[offset+k+1]) { x = V[offset+k+1] } else { x = V[offset+k-1] + 1 } y := x - k for x < n && y < m && a[x] == b[y] { x++ y++ } V[offset+k] = x if x >= n && y >= m { return buildScriptFast(trace, a, b, P1, P2) } } } return nil, nil } func buildScriptFast[E comparable](trace [][]int, a, b []E, P1, P2 int) ([]Change, error) { x := len(a) y := len(b) maxVal := len(a) + len(b) offset := maxVal changes := make([]Change, 0, 16) for d := len(trace) - 1; d >= 0; d-- { V := trace[d] k := x - y var prevK int if k == -d || (k != d && V[offset+k-1] < V[offset+k+1]) { prevK = k + 1 } else { prevK = k - 1 } prevX := V[offset+prevK] prevY := prevX - prevK for x > prevX && y > prevY { x-- y-- } if d == 0 { break } if x == prevX { y-- changes = append(changes, Change{ P1: P1 + x, P2: P2 + y, Ins: 1, }) } else { x-- changes = append(changes, Change{ P1: P1 + x, P2: P2 + y, Del: 1, }) } } for i, j := 0, len(changes)-1; i < j; i, j = i+1, j-1 { changes[i], changes[j] = changes[j], changes[i] } return mergeChangesFast(changes), nil } func mergeChangesFast(ch []Change) []Change { if len(ch) == 0 { return ch } out := make([]Change, 0, len(ch)) cur := ch[0] for i := 1; i < len(ch); i++ { n := ch[i] if cur.P1+cur.Del == n.P1 && cur.P2+cur.Ins == n.P2 { cur.Del += n.Del cur.Ins += n.Ins } else { out = append(out, cur) cur = n } } out = append(out, cur) return out } func generateTestLines(n int) []string { lines := make([]string, n) for i := range n { lines[i] = randStringBench(20) } return lines } func randStringBench(n int) string { const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" b := make([]byte, n) for i := range b { b[i] = letters[rand.Intn(len(letters))] } return string(b) } func BenchmarkMyersOriginal(b *testing.B) { ctx := context.Background() a := generateTestLines(1000) c := make([]string, len(a)) copy(c, a) // 10% modification for range 100 { idx := rand.Intn(len(c)) c[idx] = randStringBench(20) } for b.Loop() { _, _ = myersCompute(ctx, a, 0, c, 0) } } func BenchmarkMyersFast(b *testing.B) { ctx := context.Background() a := generateTestLines(1000) c := make([]string, len(a)) copy(c, a) // 10% modification for range 100 { idx := rand.Intn(len(c)) c[idx] = randStringBench(20) } for b.Loop() { _, _ = myersFast(ctx, a, 0, c, 0) } } func BenchmarkMyersOriginalLarge(b *testing.B) { ctx := context.Background() a := generateTestLines(5000) c := make([]string, len(a)) copy(c, a) for range 500 { idx := rand.Intn(len(c)) c[idx] = randStringBench(20) } for b.Loop() { _, _ = myersCompute(ctx, a, 0, c, 0) } } func BenchmarkMyersFastLarge(b *testing.B) { ctx := context.Background() a := generateTestLines(5000) c := make([]string, len(a)) copy(c, a) for range 500 { idx := rand.Intn(len(c)) c[idx] = randStringBench(20) } b.ResetTimer() for i := 0; i < b.N; i++ { _, _ = myersFast(ctx, a, 0, c, 0) } } ================================================ FILE: modules/diferenco/myers_test.go ================================================ package diferenco import ( "fmt" "os" "path/filepath" "runtime" "testing" ) func TestMyersDiff(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Myers) i := 0 for _, c := range changes { for ; i < c.P1; i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } for j := c.P1; j < c.P1+c.Del; j++ { fmt.Fprintf(os.Stderr, "- %s", sink.Lines[a[j]]) } for j := c.P2; j < c.P2+c.Ins; j++ { fmt.Fprintf(os.Stderr, "+ %s", sink.Lines[b[j]]) } i += c.Del } for ; i < len(a); i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } fmt.Fprintf(os.Stderr, "\n\nEND\n\n") } func TestMyersDiff2(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Myers) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String()) } func TestMyersDiff3(t *testing.T) { textA := `1 2 3 4 5` textB := `1 4 5 4 5` sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Myers) u := sink.ToPatch(&File{Name: "a.txt"}, &File{Name: "b.txt"}, changes, a, b, DefaultContextLines) fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String()) } ================================================ FILE: modules/diferenco/onp.go ================================================ // Copyright (c) 2014-2021 Akinori Hattori // // SPDX-License-Identifier: MIT // // SOURCE: https://github.com/hattya/go.diff // // Package diff implements the difference algorithm, which is based upon // S. Wu, U. Manber, G. Myers, and W. Miller, // "An O(NP) Sequence Comparison Algorithm" August 1989. package diferenco import "context" func onpCompute[E comparable](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) { m, n := len(L1), len(L2) c := &onpCtx[E]{L1: L1, L2: L2, P1: P1, P2: P2} if n >= m { c.M = m c.N = n } else { c.M = n c.N = m c.xchg = true } c.Δ = c.N - c.M return c.compare(ctx) } type onpCtx[E comparable] struct { L1, L2 []E P1, P2 int M, N int Δ int fp []point xchg bool } func (c *onpCtx[E]) compare(ctx context.Context) ([]Change, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } c.fp = make([]point, (c.M+1)+(c.N+1)+1) for i := range c.fp { c.fp[i].y = -1 } Δ := c.Δ + (c.M + 1) for p := 0; c.fp[Δ].y != c.N; p++ { for k := -p; k < c.Δ; k++ { c.snake(k) } for k := c.Δ + p; k > c.Δ; k-- { c.snake(k) } c.snake(c.Δ) } lcs, n := c.reverse(c.fp[Δ].lcs) changes := make([]Change, 0, n+1) var x, y int for ; lcs != nil; lcs = lcs.next { if x < lcs.x || y < lcs.y { if !c.xchg { changes = append(changes, Change{x + c.P1, y + c.P2, lcs.x - x, lcs.y - y}) } else { changes = append(changes, Change{y + c.P1, x + c.P2, lcs.y - y, lcs.x - x}) } } x = lcs.x + lcs.n y = lcs.y + lcs.n } if x < c.M || y < c.N { if !c.xchg { changes = append(changes, Change{x + c.P1, y + c.P2, c.M - x, c.N - y}) } else { changes = append(changes, Change{y + c.P1, x + c.P2, c.N - y, c.M - x}) } } return changes, nil } func (c *onpCtx[E]) snake(k int) { var y int var prev *onpLcs kk := k + (c.M + 1) h := &c.fp[kk-1] v := &c.fp[kk+1] if h.y+1 >= v.y { y = h.y + 1 prev = h.lcs } else { y = v.y prev = v.lcs } x := y - k n := 0 for x < c.M && y < c.N { var eq bool if !c.xchg { eq = c.L1[x] == c.L2[y] } else { eq = c.L1[y] == c.L2[x] } if !eq { break } x++ y++ n++ } p := &c.fp[kk] p.y = y if n == 0 { p.lcs = prev } else { p.lcs = &onpLcs{ x: x - n, y: y - n, n: n, next: prev, } } } func (c *onpCtx[E]) reverse(curr *onpLcs) (next *onpLcs, n int) { for ; curr != nil; n++ { curr.next, next, curr = next, curr, curr.next } return } type point struct { y int lcs *onpLcs } type onpLcs struct { x, y int n int next *onpLcs } // onp returns the differences between []E. // It makes O(NP) (the worst case) calls to equal. func onp[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) { prefix := commonPrefixLength(L1, L2) L1 = L1[prefix:] L2 = L2[prefix:] suffix := commonSuffixLength(L1, L2) L1 = L1[:len(L1)-suffix] L2 = L2[:len(L2)-suffix] return onpCompute(ctx, L1, prefix, L2, prefix) } ================================================ FILE: modules/diferenco/onp_test.go ================================================ package diferenco import ( "fmt" "os" "path/filepath" "runtime" "testing" ) func TestONP(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, ONP) i := 0 for _, c := range changes { for ; i < c.P1; i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } for j := c.P1; j < c.P1+c.Del; j++ { fmt.Fprintf(os.Stderr, "- %s", sink.Lines[a[j]]) } for j := c.P2; j < c.P2+c.Ins; j++ { fmt.Fprintf(os.Stderr, "+ %s", sink.Lines[b[j]]) } i += c.Del } for ; i < len(a); i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } fmt.Fprintf(os.Stderr, "\n\nEND\n\n") } ================================================ FILE: modules/diferenco/patience.go ================================================ // MIT License // Copyright (c) 2022 Peter Evans // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. package diferenco import ( "context" "slices" ) // uniqueElements returns a slice of unique elements from a slice of // strings, and a slice of the original indices of each element. func uniqueElements[E comparable](a []E) ([]E, []int) { m := make(map[E]int) for _, e := range a { m[e]++ } elements := []E{} indices := []int{} for i, e := range a { if m[e] == 1 { elements = append(elements, e) indices = append(indices, i) } } return elements, indices } // patienceLCS computes the longest common subsequence of two string // slices and returns the index pairs of the patienceLCS. // Uses O(n log n) LIS algorithm for better performance. func patienceLCS[E comparable](a, b []E) [][2]int { // Build index map for unique elements in b pos := make(map[E]int, len(b)) count := make(map[E]int, len(b)) for _, e := range b { count[e]++ } for i, e := range b { if count[e] == 1 { pos[e] = i } } // Build sequence of matching pairs (unique elements that appear in both) type pair struct { i int j int } pairs := make([]pair, 0, len(a)) for i, e := range a { if j, ok := pos[e]; ok { pairs = append(pairs, pair{i, j}) } } if len(pairs) == 0 { return nil } // LIS on j values using O(n log n) algorithm n := len(pairs) tails := make([]int, 0, n) prev := make([]int, n) for i := range prev { prev[i] = -1 } for i, p := range pairs { j := p.j // Binary search for the position to insert lo, hi := 0, len(tails) for lo < hi { mid := (lo + hi) / 2 if pairs[tails[mid]].j < j { lo = mid + 1 } else { hi = mid } } if lo == len(tails) { tails = append(tails, i) } else { tails[lo] = i } if lo > 0 { prev[i] = tails[lo-1] } } // Reconstruct LIS res := make([][2]int, 0, len(tails)) k := tails[len(tails)-1] for k >= 0 { p := pairs[k] res = append(res, [2]int{p.i, p.j}) k = prev[k] } slices.Reverse(res) return res } func patienceCompute[E comparable](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } if len(L1) == 0 && len(L2) == 0 { return []Change{}, nil } if len(L1) == 0 { return []Change{{P1: P1, P2: P2, Ins: len(L2)}}, nil } if len(L2) == 0 { return []Change{{P1: P1, P2: P2, Del: len(L1)}}, nil } i := 0 for i < len(L1) && i < len(L2) && L1[i] == L2[i] { i++ } if i > 0 { return patienceCompute(ctx, L1[i:], P1+i, L2[i:], P2+i) } // Find equal elements at the tail of slices a and b. j := 0 for j < len(L1) && j < len(L2) && L1[len(L1)-1-j] == L2[len(L2)-1-j] { j++ } if j > 0 { return patienceCompute(ctx, L1[:len(L1)-j], P1, L2[:len(L2)-j], P2) } // Find the longest common subsequence of unique elements in a and b. ua, idxa := uniqueElements(L1) ub, idxb := uniqueElements(L2) lcs := patienceLCS(ua, ub) // If the LCS is empty, the diff is all deletions and insertions. if len(lcs) == 0 { return []Change{{P1: P1, P2: P2, Del: len(L1), Ins: len(L2)}}, nil } // Lookup the original indices of slices a and b. for i, x := range lcs { lcs[i][0] = idxa[x[0]] lcs[i][1] = idxb[x[1]] } changes := make([]Change, 0, 10) ga, gb := 0, 0 for _, ip := range lcs { // Diff the gaps between the lcs elements. sub, err := patienceCompute(ctx, L1[ga:ip[0]], P1+ga, L2[gb:ip[1]], P2+gb) if err != nil { return nil, err } // Append the LCS elements to the diff. changes = append(changes, sub...) ga = ip[0] + 1 gb = ip[1] + 1 } // Diff the remaining elements of a and b after the final LCS element. sub, err := patienceCompute(ctx, L1[ga:], P1+ga, L2[gb:], P2+gb) if err != nil { return nil, err } changes = append(changes, sub...) return changes, nil } // patience: Calculates the difference using the patience algorithm func patience[E comparable](ctx context.Context, L1 []E, L2 []E) ([]Change, error) { prefix := commonPrefixLength(L1, L2) L1 = L1[prefix:] L2 = L2[prefix:] suffix := commonSuffixLength(L1, L2) L1 = L1[:len(L1)-suffix] L2 = L2[:len(L2)-suffix] return patienceCompute(ctx, L1, prefix, L2, prefix) } ================================================ FILE: modules/diferenco/patience_bench_test.go ================================================ package diferenco import ( "context" "math/rand" "slices" "testing" ) // patienceLCSLegacy is the original O(n²) implementation for benchmark comparison func patienceLCSLegacy[E comparable](a, b []E) [][2]int { // Initialize the LCS table. lcs := make([][]int, len(a)+1) for i := range lcs { lcs[i] = make([]int, len(b)+1) } // Populate the LCS table. for i := 1; i < len(lcs); i++ { for j := 1; j < len(lcs[i]); j++ { if a[i-1] == b[j-1] { lcs[i][j] = lcs[i-1][j-1] + 1 } else { lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) } } } // Backtrack to find the LCS. i, j := len(a), len(b) s := make([][2]int, 0, lcs[i][j]) for i > 0 && j > 0 { switch { case a[i-1] == b[j-1]: s = append(s, [2]int{i - 1, j - 1}) i-- j-- case lcs[i-1][j] > lcs[i][j-1]: i-- default: j-- } } slices.Reverse(s) return s } func generateUniqueLinesPatience(n int) []string { seen := make(map[string]bool, n) lines := make([]string, 0, n) for len(lines) < n { s := randStringPatience(20) if !seen[s] { seen[s] = true lines = append(lines, s) } } return lines } func randStringPatience(n int) string { const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" b := make([]byte, n) for i := range b { b[i] = letters[rand.Intn(len(letters))] } return string(b) } func BenchmarkPatienceLCSLegacy_Small(b *testing.B) { a := generateUniqueLinesPatience(50) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) b.ResetTimer() for b.Loop() { _ = patienceLCSLegacy(ua, ub) } } func BenchmarkPatienceLCS_Small(b *testing.B) { a := generateUniqueLinesPatience(50) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) b.ResetTimer() for b.Loop() { _ = patienceLCS(ua, ub) } } func BenchmarkPatienceLCSLegacy_Medium(b *testing.B) { a := generateUniqueLinesPatience(200) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) b.ResetTimer() for b.Loop() { _ = patienceLCSLegacy(ua, ub) } } func BenchmarkPatienceLCS_Medium(b *testing.B) { a := generateUniqueLinesPatience(200) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) b.ResetTimer() for b.Loop() { _ = patienceLCS(ua, ub) } } func BenchmarkPatienceLCSLegacy_Large(b *testing.B) { a := generateUniqueLinesPatience(500) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) b.ResetTimer() for b.Loop() { _ = patienceLCSLegacy(ua, ub) } } func BenchmarkPatienceLCS_Large(b *testing.B) { a := generateUniqueLinesPatience(500) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) b.ResetTimer() for b.Loop() { _ = patienceLCS(ua, ub) } } // Test LCS correctness - verify O(n log n) produces same results as O(n²) func TestPatienceLCSCorrectness(t *testing.T) { a := generateUniqueLinesPatience(100) c := make([]string, len(a)) copy(c, a) rand.Shuffle(len(c), func(i, j int) { c[i], c[j] = c[j], c[i] }) ua, _ := uniqueElements(a) ub, _ := uniqueElements(c) result1 := patienceLCSLegacy(ua, ub) result2 := patienceLCS(ua, ub) // Both should find same length LCS if len(result1) != len(result2) { t.Errorf("LCS length mismatch: legacy=%d, optimized=%d", len(result1), len(result2)) } // Verify result is valid for _, p := range result2 { if ua[p[0]] != ub[p[1]] { t.Errorf("Invalid match: a[%d]=%v, b[%d]=%v", p[0], ua[p[0]], p[1], ub[p[1]]) } } } // patienceComputeLegacy uses the legacy O(n²) LCS implementation func patienceComputeLegacy[E comparable](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } if len(L1) == 0 && len(L2) == 0 { return []Change{}, nil } if len(L1) == 0 { return []Change{{P1: P1, P2: P2, Ins: len(L2)}}, nil } if len(L2) == 0 { return []Change{{P1: P1, P2: P2, Del: len(L1)}}, nil } i := 0 for i < len(L1) && i < len(L2) && L1[i] == L2[i] { i++ } if i > 0 { return patienceComputeLegacy(ctx, L1[i:], P1+i, L2[i:], P2+i) } j := 0 for j < len(L1) && j < len(L2) && L1[len(L1)-1-j] == L2[len(L2)-1-j] { j++ } if j > 0 { return patienceComputeLegacy(ctx, L1[:len(L1)-j], P1, L2[:len(L2)-j], P2) } ua, idxa := uniqueElements(L1) ub, idxb := uniqueElements(L2) lcs := patienceLCSLegacy(ua, ub) // Use legacy LCS if len(lcs) == 0 { return []Change{{P1: P1, P2: P2, Del: len(L1), Ins: len(L2)}}, nil } for i, x := range lcs { lcs[i][0] = idxa[x[0]] lcs[i][1] = idxb[x[1]] } changes := make([]Change, 0, 10) ga, gb := 0, 0 for _, ip := range lcs { sub, err := patienceComputeLegacy(ctx, L1[ga:ip[0]], P1+ga, L2[gb:ip[1]], P2+gb) if err != nil { return nil, err } changes = append(changes, sub...) ga = ip[0] + 1 gb = ip[1] + 1 } sub, err := patienceComputeLegacy(ctx, L1[ga:], P1+ga, L2[gb:], P2+gb) if err != nil { return nil, err } changes = append(changes, sub...) return changes, nil } // DiffSlicesLegacy uses the O(n²) LCS implementation for benchmark comparison func DiffSlicesLegacy[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) { prefix := commonPrefixLength(L1, L2) L1 = L1[prefix:] L2 = L2[prefix:] suffix := commonSuffixLength(L1, L2) L1 = L1[:len(L1)-suffix] L2 = L2[:len(L2)-suffix] return patienceComputeLegacy(ctx, L1, prefix, L2, prefix) } // Benchmark full diff algorithm func BenchmarkDiffSlicesLegacy(b *testing.B) { ctx := context.Background() a := generateUniqueLinesPatience(200) c := make([]string, len(a)) copy(c, a) for range 20 { idx := rand.Intn(len(c)) c[idx] = randStringPatience(20) } b.ResetTimer() for b.Loop() { _, _ = DiffSlicesLegacy(ctx, a, c) } } func BenchmarkPatienceDiff(b *testing.B) { ctx := context.Background() a := generateUniqueLinesPatience(200) c := make([]string, len(a)) copy(c, a) for range 20 { idx := rand.Intn(len(c)) c[idx] = randStringPatience(20) } b.ResetTimer() for b.Loop() { _, _ = DiffSlices(ctx, a, c, Patience) } } // Test diff equivalence - both implementations should produce same results func TestPatienceDiffEquivalence(t *testing.T) { ctx := context.Background() tests := []struct { name string a, b []string }{ { name: "simple", a: []string{"a", "b", "c", "d", "e"}, b: []string{"a", "c", "d", "f", "e"}, }, { name: "insert", a: []string{"a", "b", "c"}, b: []string{"a", "b", "x", "c"}, }, { name: "delete", a: []string{"a", "b", "c", "d"}, b: []string{"a", "c", "d"}, }, { name: "replace", a: []string{"a", "b", "c"}, b: []string{"a", "x", "c"}, }, { name: "reorder", a: []string{"a", "b", "c", "d", "e"}, b: []string{"e", "d", "c", "b", "a"}, }, { name: "random_100", a: generateUniqueLinesPatience(100), b: func() []string { s := generateUniqueLinesPatience(100) rand.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] }) return s }(), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { changes1, err := DiffSlicesLegacy(ctx, tt.a, tt.b) if err != nil { t.Fatalf("DiffSlicesLegacy error: %v", err) } changes2, err := DiffSlices(ctx, tt.a, tt.b, Patience) if err != nil { t.Fatalf("PatienceDiff error: %v", err) } // Compare total deletions and insertions var del1, ins1, del2, ins2 int for _, c := range changes1 { del1 += c.Del ins1 += c.Ins } for _, c := range changes2 { del2 += c.Del ins2 += c.Ins } if del1 != del2 || ins1 != ins2 { t.Errorf("Diff mismatch: legacy (del=%d, ins=%d), optimized (del=%d, ins=%d)", del1, ins1, del2, ins2) } t.Logf("Both implementations: %d changes, %d del, %d ins", len(changes1), del1, ins1) }) } } ================================================ FILE: modules/diferenco/patience_test.go ================================================ package diferenco import ( "fmt" "os" "path/filepath" "runtime" "testing" ) func TestPatienceDiff(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, Patience) i := 0 for _, c := range changes { for ; i < c.P1; i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } for j := c.P1; j < c.P1+c.Del; j++ { fmt.Fprintf(os.Stderr, "- %s", sink.Lines[a[j]]) } for j := c.P2; j < c.P2+c.Ins; j++ { fmt.Fprintf(os.Stderr, "+ %s", sink.Lines[b[j]]) } i += c.Del } for ; i < len(a); i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } fmt.Fprintf(os.Stderr, "\n\nEND\n\n") } ================================================ FILE: modules/diferenco/regression_test.go ================================================ package diferenco import "testing" func TestPatchNameHandlesNilSides(t *testing.T) { tests := []struct { name string p Patch want string }{ { name: "both_non_nil_prefers_to", p: Patch{ From: &File{Name: "old.txt"}, To: &File{Name: "new.txt"}, }, want: "new.txt", }, { name: "from_nil_returns_to", p: Patch{ To: &File{Name: "new.txt"}, }, want: "new.txt", }, { name: "to_nil_returns_from", p: Patch{ From: &File{Name: "old.txt"}, }, want: "old.txt", }, { name: "both_nil_returns_empty", p: Patch{}, want: "", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if got := tt.p.Name(); got != tt.want { t.Fatalf("Patch.Name() = %q, want %q", got, tt.want) } }) } } func TestValidateOptionsIdempotent(t *testing.T) { opts := &MergeOptions{ LabelO: "base", LabelA: "ours", LabelB: "theirs", A: Unspecified, } if err := opts.ValidateOptions(); err != nil { t.Fatalf("ValidateOptions() first call error: %v", err) } firstO, firstA, firstB := opts.LabelO, opts.LabelA, opts.LabelB if err := opts.ValidateOptions(); err != nil { t.Fatalf("ValidateOptions() second call error: %v", err) } if opts.LabelO != firstO || opts.LabelA != firstA || opts.LabelB != firstB { t.Fatalf("ValidateOptions() should be idempotent, got (%q, %q, %q), want (%q, %q, %q)", opts.LabelO, opts.LabelA, opts.LabelB, firstO, firstA, firstB) } if opts.A != Histogram { t.Fatalf("ValidateOptions() should default algorithm to Histogram, got %v", opts.A) } } ================================================ FILE: modules/diferenco/sink.go ================================================ package diferenco import ( "bufio" "fmt" "io" "strings" "unicode" "unicode/utf8" ) const ( NEWLINE_RAW = iota NEWLINE_LF NEWLINE_CRLF ) // Sink is a line deduplication and indexing structure for diff operations. // It maps unique text lines to integer indices, allowing diff algorithms to // operate on integers rather than strings for better performance. // // Sink is NOT safe for concurrent use. Callers must ensure that all parse/scan // operations are completed before passing the resulting indices to concurrent // diff computations. type Sink struct { Lines []string Index map[string]int NewLine int } func NewSink(newLineMode int) *Sink { sink := &Sink{ Lines: make([]string, 0, 200), Index: make(map[string]int), NewLine: newLineMode, } return sink } func (s *Sink) addLine(line string) int { if lineIndex, ok := s.Index[line]; ok { return lineIndex } index := len(s.Lines) s.Index[line] = index s.Lines = append(s.Lines, line) return index } func (s *Sink) ScanRawLines(r io.Reader) ([]int, error) { lines := make([]int, 0, 200) br := bufio.NewReader(r) for { line, err := br.ReadString('\n') if err != nil && err != io.EOF { return nil, err } // line including '\n' always >= 1 if len(line) == 0 { break } lines = append(lines, s.addLine(line)) } return lines, nil } func (s *Sink) ScanLines(r io.Reader) ([]int, error) { if s.NewLine == NEWLINE_RAW { return s.ScanRawLines(r) } lines := make([]int, 0, 200) br := bufio.NewScanner(r) for br.Scan() { lines = append(lines, s.addLine(strings.TrimSuffix(br.Text(), "\r"))) } return lines, br.Err() } func (s *Sink) SplitRawLines(text string) []int { lines := make([]int, 0, 200) for pos := 0; pos < len(text); { part := text[pos:] newPos := strings.IndexByte(part, '\n') if newPos == -1 { lines = append(lines, s.addLine(part)) break } lines = append(lines, s.addLine(part[:newPos+1])) pos += newPos + 1 } return lines } func (s *Sink) SplitLines(text string) []int { if s.NewLine == NEWLINE_RAW { return s.SplitRawLines(text) } lines := make([]int, 0, 200) for pos := 0; pos < len(text); { part := text[pos:] newPos := strings.IndexByte(part, '\n') if newPos == -1 { lines = append(lines, s.addLine(strings.TrimSuffix(part, "\r"))) break } lines = append(lines, s.addLine(strings.TrimSuffix(part[:newPos], "\r"))) pos += newPos + 1 } return lines } func (s *Sink) parseLines(r io.Reader, text string) ([]int, error) { if r != nil { return s.ScanLines(r) } return s.SplitLines(text), nil } func (s *Sink) WriteLine(w io.Writer, E ...int) { if s.NewLine == NEWLINE_CRLF { for _, e := range E { _, _ = fmt.Fprintf(w, "%s\r\n", s.Lines[e]) } return } if s.NewLine == NEWLINE_LF { for _, e := range E { _, _ = fmt.Fprintln(w, s.Lines[e]) } return } for _, e := range E { _, _ = io.WriteString(w, s.Lines[e]) } } func (s *Sink) addEqualLines(h *Hunk, index []int, start, end int) int { delta := 0 for i := start; i < end; i++ { if i < 0 { continue } if i >= len(index) { return delta } h.Lines = append(h.Lines, Line{Kind: Equal, Content: s.Lines[index[i]]}) delta++ } return delta } func (s *Sink) ToPatch(from, to *File, changes []Change, linesA, linesB []int, contextLines int) *Patch { gap := contextLines * 2 p := &Patch{ From: from, To: to, } if len(changes) == 0 { return p } var h *Hunk last := 0 toLine := 0 for _, ch := range changes { start := ch.P1 end := ch.P1 + ch.Del switch { case h != nil && start == last: case h != nil && start <= last+gap: // within range of previous lines, add the joiners s.addEqualLines(h, linesA, last, start) default: // need to start a new hunk if h != nil { // add the edge to the previous hunk s.addEqualLines(h, linesA, last, last+contextLines) p.Hunks = append(p.Hunks, h) } toLine += start - last h = &Hunk{ FromLine: start + 1, ToLine: toLine + 1, } // add the edge to the new hunk delta := s.addEqualLines(h, linesA, start-contextLines, start) h.FromLine -= delta h.ToLine -= delta } last = start for i := start; i < end; i++ { h.Lines = append(h.Lines, Line{Kind: Delete, Content: s.Lines[linesA[i]]}) last++ } addEnd := ch.P2 + ch.Ins for i := ch.P2; i < addEnd; i++ { h.Lines = append(h.Lines, Line{Kind: Insert, Content: s.Lines[linesB[i]]}) toLine++ } } if h != nil { // add the edge to the final hunk s.addEqualLines(h, linesA, last, last+contextLines) p.Hunks = append(p.Hunks, h) } return p } // SplitWords splits string by character classes (keeping delimiters). // CJK characters and emojis are split individually. // Word characters include letters, digits, and common symbols (-, _, ., /). func SplitWords(s string) []string { if s == "" { return nil } // Pre-allocate: average token length is ~3-4 chars out := make([]string, 0, len(s)/3+1) start := -1 mode := 0 for i, r := range s { m := classify(r) // CJK / emoji: split as single characters if m == modeSingle { if start >= 0 { out = append(out, s[start:i]) start = -1 } out = append(out, s[i:i+utf8.RuneLen(r)]) continue } if start < 0 { start = i mode = m continue } if m != mode { out = append(out, s[start:i]) start = i mode = m } } if start >= 0 { out = append(out, s[start:]) } return out } const ( modePunct = iota // Default: 0 modeWord modeSpace modeSingle // CJK, emoji, and other wide characters ) // asciiClass is a lookup table for ASCII character classification. // Values: 0=Punct (default), 1=Word, 2=Space. var asciiClass = [128]byte{ '\t': modeSpace, '\n': modeSpace, '\r': modeSpace, ' ': modeSpace, '-': modeWord, '.': modeWord, '/': modeWord, '_': modeWord, '0': modeWord, '1': modeWord, '2': modeWord, '3': modeWord, '4': modeWord, '5': modeWord, '6': modeWord, '7': modeWord, '8': modeWord, '9': modeWord, 'A': modeWord, 'B': modeWord, 'C': modeWord, 'D': modeWord, 'E': modeWord, 'F': modeWord, 'G': modeWord, 'H': modeWord, 'I': modeWord, 'J': modeWord, 'K': modeWord, 'L': modeWord, 'M': modeWord, 'N': modeWord, 'O': modeWord, 'P': modeWord, 'Q': modeWord, 'R': modeWord, 'S': modeWord, 'T': modeWord, 'U': modeWord, 'V': modeWord, 'W': modeWord, 'X': modeWord, 'Y': modeWord, 'Z': modeWord, 'a': modeWord, 'b': modeWord, 'c': modeWord, 'd': modeWord, 'e': modeWord, 'f': modeWord, 'g': modeWord, 'h': modeWord, 'i': modeWord, 'j': modeWord, 'k': modeWord, 'l': modeWord, 'm': modeWord, 'n': modeWord, 'o': modeWord, 'p': modeWord, 'q': modeWord, 'r': modeWord, 's': modeWord, 't': modeWord, 'u': modeWord, 'v': modeWord, 'w': modeWord, 'x': modeWord, 'y': modeWord, 'z': modeWord, } func classify(r rune) int { // ASCII fast path if r < 128 { return int(asciiClass[r]) } // Non-ASCII switch { case unicode.IsSpace(r): return modeSpace case isCJK(r) || isEmoji(r): return modeSingle case unicode.IsLetter(r) || unicode.IsDigit(r): return modeWord default: return modePunct } } ================================================ FILE: modules/diferenco/sink_test.go ================================================ package diferenco import ( "fmt" "os" "strings" "testing" ) func TestProcessLine(t *testing.T) { text := `A B C D A` s := &Sink{ Index: make(map[string]int), } lines := s.SplitLines(text) for _, line := range lines { fmt.Fprintf(os.Stderr, "%d [%s]\n", line, s.Lines[line]) } } func TestProcessLineNewLine(t *testing.T) { text := `A B C D D ` s := &Sink{ Index: make(map[string]int), } lines := s.SplitLines(text) for _, line := range lines { fmt.Fprintf(os.Stderr, "%d [%s]\n", line, s.Lines[line]) } } func TestReadLines(t *testing.T) { text := `A B C D D ` s := &Sink{ Index: make(map[string]int), } lines, err := s.ScanLines(strings.NewReader(text)) if err != nil { return } for _, line := range lines { fmt.Fprintf(os.Stderr, "%d [%s]\n", line, s.Lines[line]) } } func TestReadLinesNoNewLine(t *testing.T) { text := `A B C D D` s := &Sink{ Index: make(map[string]int), } lines, err := s.ScanLines(strings.NewReader(text)) if err != nil { return } for _, line := range lines { fmt.Fprintf(os.Stderr, "%d \"%s\"\n", line, strings.ReplaceAll(s.Lines[line], "\n", "\\n")) } } func TestReadLinesLF(t *testing.T) { text := `A B C D D` s := &Sink{ Index: make(map[string]int), NewLine: NEWLINE_LF, } lines, err := s.ScanLines(strings.NewReader(text)) if err != nil { return } for _, line := range lines { fmt.Fprintf(os.Stderr, "%d \"%s\"\n", line, s.Lines[line]) } } func TestProcessLineLF(t *testing.T) { text := `A B C D B` s := &Sink{ NewLine: NEWLINE_LF, Index: make(map[string]int), } lines := s.SplitLines(text) for _, line := range lines { fmt.Fprintf(os.Stderr, "%d [%s]\n", line, s.Lines[line]) } } func TestProcessLineNewLineLF(t *testing.T) { text := `A B C D ` s := &Sink{ NewLine: NEWLINE_LF, Index: make(map[string]int), } lines := s.SplitLines(text) for _, line := range lines { fmt.Fprintf(os.Stderr, "%d [%s]\n", line, s.Lines[line]) } } func TestSplitWord(t *testing.T) { sss := []string{ " blah test2 test3 ", "\tblah test2 test3 ", "\tblah test2 test3 t", "\tblah test2 test3 tt", "The quick brown fox jumps over the lazy dog", "The quick brown dog leaps over the lazy cat", "Hello😋World", "😋 Hello😋World", } for _, s := range sss { w := SplitWords(s) fmt.Fprintf(os.Stderr, "[%s] -->\n", s) for _, e := range w { fmt.Fprintf(os.Stderr, "[%s] ", e) } fmt.Fprintf(os.Stderr, "\n") } } func TestSplitWordsCases(t *testing.T) { tests := []struct { name string input string expected []string }{ // Empty and single character {"empty", "", nil}, {"single_ascii", "a", []string{"a"}}, {"single_cjk", "中", []string{"中"}}, {"single_emoji", "😀", []string{"😀"}}, {"single_space", " ", []string{" "}}, {"single_punct", "!", []string{"!"}}, // ASCII words {"ascii_word", "hello", []string{"hello"}}, {"ascii_words", "hello world", []string{"hello", " ", "world"}}, {"ascii_numbers", "123 456", []string{"123", " ", "456"}}, // Word characters: letters, digits, -, _, ., / {"path", "/usr/local/bin", []string{"/usr/local/bin"}}, {"file_name", "file-name.txt", []string{"file-name.txt"}}, {"snake_case", "hello_world", []string{"hello_world"}}, {"mixed_word_chars", "a-b_c.d/e", []string{"a-b_c.d/e"}}, // CJK characters (split individually) {"cjk_single", "你好", []string{"你", "好"}}, {"cjk_sentence", "你好世界", []string{"你", "好", "世", "界"}}, {"cjk_mixed", "Hello世界", []string{"Hello", "世", "界"}}, {"cjk_japanese", "こんにちは", []string{"こ", "ん", "に", "ち", "は"}}, {"cjk_korean", "안녕하세요", []string{"안", "녕", "하", "세", "요"}}, // Emoji (split individually) {"emoji_single", "😀😃", []string{"😀", "😃"}}, {"emoji_mixed", "Hello😀World", []string{"Hello", "😀", "World"}}, {"emoji_multiple", "🎉🎊🎁", []string{"🎉", "🎊", "🎁"}}, // Punctuation (grouped by same class) {"punct_simple", "hello,world", []string{"hello", ",", "world"}}, {"punct_multiple", "a!b?c", []string{"a", "!", "b", "?", "c"}}, {"punct_sequence", "!!!", []string{"!!!"}}, {"punct_mixed", "!?;", []string{"!?;"}}, // different puncts grouped together // Whitespace {"spaces", "a b", []string{"a", " ", "b"}}, {"tabs", "a\t\tb", []string{"a", "\t\t", "b"}}, {"mixed_whitespace", "a \t b", []string{"a", " \t ", "b"}}, {"newline", "a\nb", []string{"a", "\n", "b"}}, // Complex cases {"url", "https://example.com/path", []string{"https", ":", "//example.com/path"}}, {"email", "user@example.com", []string{"user", "@", "example.com"}}, {"code_line", "if (x > 0) { return x; }", []string{"if", " ", "(", "x", " ", ">", " ", "0", ")", " ", "{", " ", "return", " ", "x", ";", " ", "}"}}, {"chinese_sentence", "你好,世界!", []string{"你", "好", ",", "世", "界", "!"}}, {"mixed_complex", "Hello世界🎉test-1.0/file_name", []string{"Hello", "世", "界", "🎉", "test-1.0/file_name"}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := SplitWords(tt.input) if !equalStringSlices(got, tt.expected) { t.Errorf("SplitWords(%q) = %v, want %v", tt.input, got, tt.expected) } }) } } func TestSplitWordsASCIIFastPath(t *testing.T) { // Test that ASCII fast path produces same results as non-ASCII path asciiTests := []string{ "hello world", "test-1.0/file_name", "if (x > 0) { return x; }", "123 456 789", "a!b?c:d;e", } for _, s := range asciiTests { got := SplitWords(s) if got == nil && s != "" { t.Errorf("SplitWords(%q) returned nil for non-empty string", s) } } } func TestSplitWordsBoundary(t *testing.T) { // Test boundary conditions tests := []struct { name string input string expected []string }{ {"control_chars", "\x00\x01\x02", []string{"\x00\x01\x02"}}, {"del_char", "\x7f", []string{"\x7f"}}, {"max_ascii", "~", []string{"~"}}, // 0x7E } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := SplitWords(tt.input) if !equalStringSlices(got, tt.expected) { t.Errorf("SplitWords(%q) = %v, want %v", tt.input, got, tt.expected) } }) } } func equalStringSlices(a, b []string) bool { if len(a) != len(b) { return false } for i := range a { if a[i] != b[i] { return false } } return true } func BenchmarkSplitWords(b *testing.B) { tests := []struct { name string s string }{ {"ASCII", "The quick brown fox jumps over the lazy dog"}, {"CJK", "你好世界这是一个测试文本"}, {"Mixed", "Hello世界Test测试Go语言Programming"}, {"Emoji", "Hello😋World🎉Test🌟End"}, {"Path", "/usr/local/bin/file-name.txt"}, } for _, tt := range tests { b.Run(tt.name, func(b *testing.B) { for b.Loop() { SplitWords(tt.s) } }) } } ================================================ FILE: modules/diferenco/suffixarray.go ================================================ // Package diferenco provides diff algorithms. // Suffix-Array Diff implementation inspired by diff-match-patch. package diferenco import ( "cmp" "context" "slices" ) // match represents a common substring match between two sequences. type match struct { start1 int // start position in sequence 1 start2 int // start position in sequence 2 length int // length of the match } // buildSuffixArray constructs a suffix array for the given data. func buildSuffixArray[E cmp.Ordered](data []E) []int { n := len(data) if n <= 1 { if n == 1 { return []int{0} } return nil } sa := make([]int, n) for i := range n { sa[i] = i } slices.SortFunc(sa, func(i, j int) int { return compareSuffixes(data, i, j) }) return sa } // compareSuffixes compares two suffixes starting at positions i and j. func compareSuffixes[E cmp.Ordered](data []E, i, j int) int { n := len(data) for i < n && j < n { if c := cmp.Compare(data[i], data[j]); c != 0 { return c } i++ j++ } return cmp.Compare(n-i, n-j) } // findLongestCommonSubstring finds the longest common substring between two sequences // using suffix array on the first sequence. func findLongestCommonSubstring[E cmp.Ordered](data1, data2 []E, sa []int) match { if len(data1) == 0 || len(data2) == 0 || len(sa) == 0 { return match{} } var bestMatch match // For each starting position in data2, binary search in suffix array for start2 := range len(data2) { matchLen := binarySearchMatch(data1, data2, sa, start2) if matchLen > bestMatch.length { bestMatch.start2 = start2 bestMatch.length = matchLen } } // Find the start position in data1 for the best match if bestMatch.length > 0 { bestMatch.start1 = findSuffixPosition(data1, data2, sa, bestMatch.start2, bestMatch.length) } return bestMatch } // binarySearchMatch finds the longest match for data2[start2:] in data1 using suffix array. func binarySearchMatch[E cmp.Ordered](data1, data2 []E, sa []int, start2 int) int { if len(data2) == 0 || start2 >= len(data2) { return 0 } n := len(data1) target := data2[start2:] // Binary search for lower bound pos, _ := slices.BinarySearchFunc(sa, target[0], func(suffixIdx int, firstElem E) int { if suffixIdx >= n { return -1 } return cmp.Compare(data1[suffixIdx], firstElem) }) bestLen := 0 // Check nearby suffixes for matches end := min(pos+10, n) // Limit search range for efficiency for i := pos; i < end; i++ { suffixStart := sa[i] if suffixStart >= n || data1[suffixStart] != target[0] { break } // Count matching elements maxLen := min(n-suffixStart, len(target)) matchLen := 0 for matchLen < maxLen && data1[suffixStart+matchLen] == target[matchLen] { matchLen++ } bestLen = max(bestLen, matchLen) } return bestLen } // findSuffixPosition finds the starting position in data1 for a match. func findSuffixPosition[E cmp.Ordered](data1, data2 []E, sa []int, start2, length int) int { if length == 0 { return 0 } target := data2[start2 : start2+length] // Binary search for the suffix pos, found := slices.BinarySearchFunc(sa, target, func(suffixIdx int, t []E) int { if suffixIdx >= len(data1) { return 1 } return slices.Compare(data1[suffixIdx:], t) }) if found { return sa[pos] } if pos < len(sa) { return sa[pos] } return 0 } // suffixArrayComputeOrdered performs the recursive diff computation using suffix array. func suffixArrayComputeOrdered[E cmp.Ordered](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } // Base cases switch { case len(L1) == 0 && len(L2) == 0: return []Change{}, nil case len(L1) == 0: return []Change{{P1: P1, P2: P2, Ins: len(L2)}}, nil case len(L2) == 0: return []Change{{P1: P1, P2: P2, Del: len(L1)}}, nil } // Check for common prefix prefixLen := 0 for prefixLen < len(L1) && prefixLen < len(L2) && L1[prefixLen] == L2[prefixLen] { prefixLen++ } if prefixLen > 0 { return suffixArrayComputeOrdered(ctx, L1[prefixLen:], P1+prefixLen, L2[prefixLen:], P2+prefixLen) } // Check for common suffix suffixLen := 0 for suffixLen < len(L1) && suffixLen < len(L2) && L1[len(L1)-1-suffixLen] == L2[len(L2)-1-suffixLen] { suffixLen++ } if suffixLen > 0 { return suffixArrayComputeOrdered(ctx, L1[:len(L1)-suffixLen], P1, L2[:len(L2)-suffixLen], P2) } // Build suffix array for L1 sa := buildSuffixArray(L1) // Find longest common substring lcs := findLongestCommonSubstring(L1, L2, sa) // If no common substring found, return all as changes if lcs.length == 0 { return []Change{{P1: P1, P2: P2, Del: len(L1), Ins: len(L2)}}, nil } // Recursively process left and right parts // Process left part (before the match) leftChanges, err := suffixArrayComputeOrdered(ctx, L1[:lcs.start1], P1, L2[:lcs.start2], P2) if err != nil { return nil, err } // Process right part (after the match) rightStart1 := lcs.start1 + lcs.length rightStart2 := lcs.start2 + lcs.length rightChanges, err := suffixArrayComputeOrdered(ctx, L1[rightStart1:], P1+rightStart1, L2[rightStart2:], P2+rightStart2) if err != nil { return nil, err } return append(leftChanges, rightChanges...), nil } // SuffixArrayDiff calculates the difference using suffix array algorithm. // This algorithm is efficient for finding longest common substrings and works well // for both text and binary data. // // Time complexity: O((n+m) log n) where n and m are the lengths of the input sequences. // Space complexity: O(n) for the suffix array. func suffixArray[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) { // Handle empty inputs if len(L1) == 0 && len(L2) == 0 { return []Change{}, nil } // Remove common prefix prefix := commonPrefixLength(L1, L2) L1 = L1[prefix:] L2 = L2[prefix:] // Remove common suffix suffix := commonSuffixLength(L1, L2) L1 = L1[:len(L1)-suffix] L2 = L2[:len(L2)-suffix] // If either slice is empty after removing prefix/suffix if len(L1) == 0 && len(L2) == 0 { return []Change{}, nil } // Try ordered types using type assertion helper if changes, err, ok := trySuffixArrayDiff(ctx, L1, L2, prefix); ok { return changes, err } // Fallback to ONP algorithm for unsupported types return onp(ctx, L1, L2) } // trySuffixArrayDiff attempts to run suffix array diff for ordered types. // Returns (changes, err, true) if the type is supported, or (nil, nil, false) if not. func trySuffixArrayDiff[E comparable](ctx context.Context, L1, L2 []E, prefix int) ([]Change, error, bool) { switch any(L1).(type) { case []string: changes, err := suffixArrayComputeOrdered(ctx, any(L1).([]string), prefix, any(L2).([]string), prefix) return changes, err, true case []int: changes, err := suffixArrayComputeOrdered(ctx, any(L1).([]int), prefix, any(L2).([]int), prefix) return changes, err, true case []int64: changes, err := suffixArrayComputeOrdered(ctx, any(L1).([]int64), prefix, any(L2).([]int64), prefix) return changes, err, true case []rune: changes, err := suffixArrayComputeOrdered(ctx, any(L1).([]rune), prefix, any(L2).([]rune), prefix) return changes, err, true case []byte: changes, err := suffixArrayComputeOrdered(ctx, any(L1).([]byte), prefix, any(L2).([]byte), prefix) return changes, err, true default: return nil, nil, false } } ================================================ FILE: modules/diferenco/suffixarray_test.go ================================================ package diferenco import ( "context" "fmt" "os" "path/filepath" "runtime" "testing" ) func TestSuffixArrayDiff(t *testing.T) { _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read a error: %v\n", err) return } textA := string(bytesA) bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt")) if err != nil { fmt.Fprintf(os.Stderr, "read b error: %v\n", err) return } textB := string(bytesB) sink := &Sink{ Index: make(map[string]int), } a := sink.SplitLines(textA) b := sink.SplitLines(textB) changes, _ := DiffSlices(t.Context(), a, b, SuffixArray) i := 0 for _, c := range changes { for ; i < c.P1; i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } for j := c.P1; j < c.P1+c.Del; j++ { fmt.Fprintf(os.Stderr, "- %s", sink.Lines[a[j]]) } for j := c.P2; j < c.P2+c.Ins; j++ { fmt.Fprintf(os.Stderr, "+ %s", sink.Lines[b[j]]) } i += c.Del } for ; i < len(a); i++ { fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]]) } fmt.Fprintf(os.Stderr, "\n\nEND\n\n") } func TestSuffixArrayDiffBasic(t *testing.T) { tests := []struct { name string a []string b []string expected []Change }{ { name: "empty both", a: []string{}, b: []string{}, expected: []Change{}, }, { name: "empty a", a: []string{}, b: []string{"a", "b", "c"}, expected: []Change{ {P1: 0, P2: 0, Ins: 3}, }, }, { name: "empty b", a: []string{"a", "b", "c"}, b: []string{}, expected: []Change{ {P1: 0, P2: 0, Del: 3}, }, }, { name: "identical", a: []string{"a", "b", "c"}, b: []string{"a", "b", "c"}, expected: []Change{}, }, { name: "single_insertion", a: []string{"a", "c"}, b: []string{"a", "b", "c"}, expected: []Change{ {P1: 1, P2: 1, Ins: 1}, }, }, { name: "single_deletion", a: []string{"a", "b", "c"}, b: []string{"a", "c"}, expected: []Change{ {P1: 1, P2: 1, Del: 1}, }, }, { name: "replace_middle", a: []string{"a", "b", "c"}, b: []string{"a", "x", "c"}, expected: []Change{ {P1: 1, P2: 1, Del: 1, Ins: 1}, }, }, { name: "completely_different", a: []string{"a", "b", "c"}, b: []string{"x", "y", "z"}, expected: []Change{ {P1: 0, P2: 0, Del: 3, Ins: 3}, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() changes, err := DiffSlices(ctx, tt.a, tt.b, SuffixArray) if err != nil { t.Fatalf("SuffixArrayDiff() error = %v", err) } // Verify the changes reconstruct the correct result result := reconstructFromChanges(tt.a, changes, tt.b) if !equalSlices(result, tt.b) { t.Errorf("SuffixArrayDiff() reconstructed = %v, want %v", result, tt.b) } }) } } func TestSuffixArrayDiffRune(t *testing.T) { tests := []struct { name string a string b string }{ { name: "simple", a: "Hello World", b: "Hello There", }, { name: "insertion", a: "abc", b: "abXc", }, { name: "deletion", a: "abXc", b: "abc", }, { name: "complex", a: "The quick brown fox jumps over the lazy dog", b: "The quick brown dog jumps over the lazy fox", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() runesA := []rune(tt.a) runesB := []rune(tt.b) changes, err := DiffSlices(ctx, runesA, runesB, SuffixArray) if err != nil { t.Fatalf("SuffixArrayDiff() error = %v", err) } // Verify changes are valid for i, c := range changes { if c.P1 < 0 || c.P1 > len(runesA) { t.Errorf("Change[%d].P1 = %d out of range [0, %d]", i, c.P1, len(runesA)) } if c.P2 < 0 || c.P2 > len(runesB) { t.Errorf("Change[%d].P2 = %d out of range [0, %d]", i, c.P2, len(runesB)) } if c.Del < 0 || c.P1+c.Del > len(runesA) { t.Errorf("Change[%d].Del = %d invalid with P1=%d, lenA=%d", i, c.Del, c.P1, len(runesA)) } if c.Ins < 0 || c.P2+c.Ins > len(runesB) { t.Errorf("Change[%d].Ins = %d invalid with P2=%d, lenB=%d", i, c.Ins, c.P2, len(runesB)) } } // Verify reconstruction result := reconstructFromChanges(runesA, changes, runesB) if !equalSlices(result, runesB) { t.Errorf("SuffixArrayDiff() reconstructed = %v, want %v", string(result), string(runesB)) } }) } } func TestSuffixArrayDiffConsistency(t *testing.T) { // Test that SuffixArray produces consistent results with other algorithms tests := []struct { name string a []string b []string }{ { name: "simple", a: []string{"line1", "line2", "line3"}, b: []string{"line1", "modified", "line3"}, }, { name: "multiple_changes", a: []string{"a", "b", "c", "d", "e"}, b: []string{"a", "x", "c", "y", "e"}, }, { name: "insert_delete", a: []string{"a", "b", "c", "d", "e"}, b: []string{"a", "c", "d", "f", "e"}, }, } algorithms := []Algorithm{Histogram, ONP, Myers, Patience, SuffixArray} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() results := make(map[Algorithm][]Change) for _, algo := range algorithms { changes, err := DiffSlices(ctx, tt.a, tt.b, algo) if err != nil { t.Fatalf("Algorithm %s failed: %v", algo, err) } results[algo] = changes // Verify each algorithm produces valid results reconstructed := reconstructFromChanges(tt.a, changes, tt.b) if !equalSlices(reconstructed, tt.b) { t.Errorf("Algorithm %s: reconstructed = %v, want %v", algo, reconstructed, tt.b) } } // All algorithms should produce correct reconstruction // (The exact changes may differ, but the result should be the same) }) } } func TestBuildSuffixArray(t *testing.T) { tests := []struct { name string data []string wantLen int }{ { name: "empty", data: []string{}, wantLen: 0, }, { name: "single", data: []string{"a"}, wantLen: 1, }, { name: "simple", data: []string{"b", "a", "n", "a", "n", "a"}, wantLen: 6, }, { name: "sorted", data: []string{"a", "b", "c", "d", "e"}, wantLen: 5, }, { name: "reverse", data: []string{"e", "d", "c", "b", "a"}, wantLen: 5, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { sa := buildSuffixArray(tt.data) if len(sa) != tt.wantLen { t.Errorf("buildSuffixArray() length = %d, want %d", len(sa), tt.wantLen) } // Verify suffix array is valid (all indices present) if len(sa) > 0 { seen := make(map[int]bool) for _, idx := range sa { if idx < 0 || idx >= len(tt.data) { t.Errorf("Invalid index in suffix array: %d", idx) } if seen[idx] { t.Errorf("Duplicate index in suffix array: %d", idx) } seen[idx] = true } // Verify suffix array is sorted for i := 1; i < len(sa); i++ { cmp := compareSuffixes(tt.data, sa[i-1], sa[i]) if cmp >= 0 { t.Errorf("Suffix array not sorted at position %d: sa[%d]=%d, sa[%d]=%d", i, i-1, sa[i-1], i, sa[i]) } } } }) } } func TestSuffixArrayDiffAlgorithm(t *testing.T) { // Test that the algorithm can be selected by name algo, err := AlgorithmFromName("suffixarray") if err != nil { t.Fatalf("AlgorithmFromName() error = %v", err) } if algo != SuffixArray { t.Errorf("AlgorithmFromName() = %v, want %v", algo, SuffixArray) } // Test string representation if SuffixArray.String() != "suffixarray" { t.Errorf("SuffixArray.String() = %q, want %q", SuffixArray.String(), "suffixarray") } } func TestSuffixArrayDiffContext(t *testing.T) { // Test context cancellation ctx, cancel := context.WithCancel(context.Background()) cancel() a := []string{"a", "b", "c"} b := []string{"a", "x", "c"} _, err := DiffSlices(ctx, a, b, SuffixArray) if err == nil { t.Error("SuffixArrayDiff() should return error on cancelled context") } } func TestSuffixArrayDiffBinary(t *testing.T) { // Test with binary data (byte slices) tests := []struct { name string a []byte b []byte }{ { name: "simple_binary", a: []byte{0x00, 0x01, 0x02, 0x03, 0x04}, b: []byte{0x00, 0x01, 0xFF, 0x03, 0x04}, }, { name: "insert_binary", a: []byte{0x00, 0x01, 0x02}, b: []byte{0x00, 0x01, 0x0A, 0x02}, }, { name: "delete_binary", a: []byte{0x00, 0x01, 0x0A, 0x02}, b: []byte{0x00, 0x01, 0x02}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() changes, err := DiffSlices(ctx, tt.a, tt.b, SuffixArray) if err != nil { t.Fatalf("SuffixArrayDiff() error = %v", err) } // Verify reconstruction result := reconstructFromChanges(tt.a, changes, tt.b) if !equalSlices(result, tt.b) { t.Errorf("SuffixArrayDiff() reconstructed = %v, want %v", result, tt.b) } }) } } // Helper functions func reconstructFromChanges[E comparable](a []E, changes []Change, b []E) []E { result := make([]E, 0, len(b)) posA := 0 posB := 0 for _, c := range changes { // Add equal elements before the change for posA < c.P1 { result = append(result, a[posA]) posA++ posB++ } // Skip deleted elements posA += c.Del // Add inserted elements for i := 0; i < c.Ins; i++ { result = append(result, b[posB]) posB++ } } // Add remaining equal elements for posA < len(a) { result = append(result, a[posA]) posA++ } return result } func equalSlices[E comparable](a, b []E) bool { if len(a) != len(b) { return false } for i := range a { if a[i] != b[i] { return false } } return true } ================================================ FILE: modules/diferenco/testdata/a.txt ================================================ #include // Frobs foo heartily int frobnitz(int foo) { int i; for(i = 0; i < 10; i++) { printf("Your answer is: "); printf("%d\n", foo); } } int fact(int n) { if(n > 1) { return fact(n-1) * n; } return 1; } int main(int argc, char **argv) { frobnitz(fact(10)); } ================================================ FILE: modules/diferenco/testdata/b.txt ================================================ #include int fib(int n) { if(n > 2) { return fib(n-1) + fib(n-2); } return 1; } // Frobs foo heartily int frobnitz(int foo) { int i; for(i = 0; i < 10; i++) { printf("%d\n", foo); } } int main(int argc, char **argv) { frobnitz(fib(10)); } ================================================ FILE: modules/diferenco/testdata/css_1.css ================================================ /* hello world */ .foo1 { margin: 0 0 20px 0; } .bar { margin: 0; } .baz { color: yellow; font-family: "Before"; } .another { margin-left: 0.5em; } ================================================ FILE: modules/diferenco/testdata/css_2.css ================================================ /* hello world */ .bar { margin: 0; } .foo1 { margin: 0 0 20px 0; color: green; } .baz { color: blue; font-family: "After"; } .another { margin-left: 1em; } p { color: #000; } ================================================ FILE: modules/diferenco/testdata/simple_1.scss ================================================ @mixin buttons($basicBorder:1px, $gradient1:#fff, $gradient2:#d8dee7){ button{ border:$basicBorder solid #acbed3; //brings in Compass' background-image mixin: http://compass-style.org/reference/compass/css3/images/ @include background-image(linear-gradient($gradient1, $gradient2)); padding:3px 14px; font-size:12px; color:#3b557d; //brings in Compass' border-radius mixin: http://compass-style.org/reference/compass/css3/border_radius/ @include border-radius($border-radius, $border-radius); cursor:pointer; //& attribute adds &.primary { border:2px solid #3b557d; padding:5px 15px; //requires a $border-radius variable @include border-radius($border-radius + 2, $border-radius + 2); } &.disabled { opacity: .8; } &:hover { @include background-image(linear-gradient($gradient2, $gradient1)); } } } ================================================ FILE: modules/diferenco/testdata/simple_2.scss ================================================ @mixin buttons($basicBorder:1px, $gradient1:#333, $gradient2:#d8dee7){ button{ border:$basicBorder dotted #acbed3; //brings in Compass' background-image mixin: http://compass-style.org/reference/compass/css3/images/ @include background-image(linear-gradient($gradient1, $gradient2)); padding:3px 14px; font-size:1rem; color:#3b557d; //brings in Compass' border-radius mixin: http://compass-style.org/reference/compass/css3/border_radius/ @include border-radius($border-radius, $border-radius); cursor:pointer; //& attribute adds &.primary { border:2px dotted #3b557d; padding:5px 15px; //requires a $border-radius variable @include border-radius($border-radius + 2, $border-radius + 2); } &.disabled { opacity: .6; } &:hover { @include background-image(linear-gradient($gradient2, $gradient1)); } } } ================================================ FILE: modules/diferenco/text.go ================================================ package diferenco import ( "bytes" "errors" "fmt" "io" "strings" "unsafe" "github.com/antgroup/hugescm/modules/chardet" "github.com/antgroup/hugescm/modules/mime" "github.com/antgroup/hugescm/modules/streamio" ) // /* // * xdiff isn't equipped to handle content over a gigabyte; // * we make the cutoff 1GB - 1MB to give some breathing // * room for constant-sized additions (e.g., merge markers) // */ // #define MAX_XDIFF_SIZE (1024UL * 1024 * 1023) const ( MAX_DIFF_SIZE = 100 << 20 // MAX_DIFF_SIZE 100MiB BINARY = "binary" UTF8 = "UTF-8" sniffLen = 8000 ) var ( // ErrBinaryData is returned when the content is detected as binary ErrBinaryData = errors.New("binary data") ) func checkCharset(s string) string { if _, charset, ok := strings.Cut(s, ";"); ok { return strings.TrimPrefix(strings.TrimSpace(charset), "charset=") } return UTF8 } func detectCharset(payload []byte) string { result := mime.DetectAny(payload) for p := result; p != nil; p = p.Parent() { if p.Is("text/plain") { return checkCharset(p.String()) } } return BINARY } func readUnifiedText(r io.Reader) (string, string, error) { // Read initial bytes for charset detection sniffBytes, err := streamio.ReadMax(r, sniffLen) if err != nil { return "", "", fmt.Errorf("failed to read initial bytes for charset detection: %w", err) } // Detect charset charset := detectCharset(sniffBytes) if charset == BINARY { return "", "", fmt.Errorf("%w: content appears to be binary", ErrBinaryData) } // Create combined reader reader := io.MultiReader(bytes.NewReader(sniffBytes), r) // Handle UTF-8 content if strings.EqualFold(charset, UTF8) { var b strings.Builder if _, err := io.Copy(&b, reader); err != nil { return "", "", fmt.Errorf("failed to read UTF-8 content: %w", err) } return b.String(), UTF8, nil } // Handle other charsets var b bytes.Buffer if _, err := b.ReadFrom(reader); err != nil { return "", "", fmt.Errorf("failed to read content: %w", err) } // Convert from detected charset buf, err := chardet.DecodeFromCharset(b.Bytes(), charset) if err != nil { return "", "", fmt.Errorf("failed to convert from charset '%s': %w", charset, err) } if len(buf) == 0 { return "", charset, nil } return unsafe.String(unsafe.SliceData(buf), len(buf)), charset, nil } func readRawText(r io.Reader, size int) (string, error) { var b bytes.Buffer // Read initial bytes for binary detection if _, err := b.ReadFrom(io.LimitReader(r, sniffLen)); err != nil { return "", fmt.Errorf("failed to read initial bytes: %w", err) } // Check for null bytes (binary content) if bytes.IndexByte(b.Bytes(), 0) != -1 { return "", fmt.Errorf("%w: detected null byte in content", ErrBinaryData) } // Pre-allocate buffer for remaining content b.Grow(size) // Read remaining content if _, err := b.ReadFrom(r); err != nil { return "", fmt.Errorf("failed to read remaining content: %w", err) } content := b.Bytes() return unsafe.String(unsafe.SliceData(content), len(content)), nil } func ReadUnifiedText(r io.Reader, size int64, textconv bool) (content string, charset string, err error) { // Validate size if size > MAX_DIFF_SIZE { return "", "", fmt.Errorf("file size %d bytes exceeds limit %d bytes", size, MAX_DIFF_SIZE) } if textconv { return readUnifiedText(r) } content, err = readRawText(r, int(size)) if err != nil { return "", "", fmt.Errorf("failed to read raw text: %w", err) } return content, UTF8, nil } func NewUnifiedReaderEx(r io.Reader, textconv bool) (io.Reader, string, error) { sniffBytes, err := streamio.ReadMax(r, sniffLen) if err != nil { return nil, "", err } reader := io.MultiReader(bytes.NewReader(sniffBytes), r) if !textconv { if bytes.IndexByte(sniffBytes, 0) != -1 { return reader, BINARY, nil } return reader, UTF8, nil } charset := detectCharset(sniffBytes) // binary or UTF-8 not need convert if charset == BINARY || strings.EqualFold(charset, UTF8) { return reader, charset, nil } return chardet.NewReader(reader, charset), charset, nil } func NewUnifiedReader(r io.Reader) (io.Reader, error) { sniffBytes, err := streamio.ReadMax(r, sniffLen) if err != nil { return nil, err } charset := detectCharset(sniffBytes) reader := io.MultiReader(bytes.NewReader(sniffBytes), r) // binary or UTF-8 not need convert if charset == BINARY || strings.EqualFold(charset, UTF8) { return reader, nil } return chardet.NewReader(reader, charset), nil } func NewTextReader(r io.Reader) (io.Reader, error) { sniffBytes, err := streamio.ReadMax(r, sniffLen) if err != nil { return nil, err } if bytes.IndexByte(sniffBytes, 0) != -1 { return nil, ErrBinaryData } return io.MultiReader(bytes.NewReader(sniffBytes), r), nil } ================================================ FILE: modules/diferenco/unicode.go ================================================ // Code generated by running "go generate". DO NOT EDIT. // See gen_unicode.go for generation logic. package diferenco //go:generate go run gen_unicode.go // interval represents a Unicode code point range [First, Last]. type interval struct { First rune Last rune } // isCJK returns true if r is a CJK (East Asian Wide/Fullwidth) character. func isCJK(r rune) bool { return inRange(cjkRanges, r) } // isEmoji returns true if r is an emoji character. func isEmoji(r rune) bool { return inRange(emojiRanges, r) } // inRange performs binary search to check if r is within any interval in ranges. func inRange(ranges []interval, r rune) bool { n := len(ranges) i, j := 0, n for i < j { h := i + (j-i)/2 if r < ranges[h].First { j = h continue } if r > ranges[h].Last { i = h + 1 continue } return true } return false } ================================================ FILE: modules/diferenco/unicode_data.go ================================================ // Code generated by gen_unicode.go. DO NOT EDIT. package diferenco var cjkRanges = []interval{ {0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A}, {0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3}, {0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2630, 0x2637}, {0x2648, 0x2653}, {0x267F, 0x267F}, {0x268A, 0x268F}, {0x2693, 0x2693}, {0x26A1, 0x26A1}, {0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5}, {0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA}, {0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA}, {0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B}, {0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E}, {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C}, {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E5}, {0x31EF, 0x321E}, {0x3220, 0x3247}, {0x3250, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF6}, {0x17000, 0x18CD5}, {0x18CFF, 0x18D1E}, {0x18D80, 0x18DF2}, {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1D300, 0x1D356}, {0x1D360, 0x1D376}, {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D8}, {0x1F6DC, 0x1F6DF}, {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0}, {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F9FF}, {0x1FA70, 0x1FA7C}, {0x1FA80, 0x1FA8A}, {0x1FA8E, 0x1FAC6}, {0x1FAC8, 0x1FAC8}, {0x1FACD, 0x1FADC}, {0x1FADF, 0x1FAEA}, {0x1FAEF, 0x1FAF8}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, } var emojiRanges = []interval{ {0x0023, 0x0023}, {0x002A, 0x002A}, {0x0030, 0x0039}, {0x00A9, 0x00A9}, {0x00AE, 0x00AE}, {0x200D, 0x200D}, {0x203C, 0x203C}, {0x2049, 0x2049}, {0x20E3, 0x20E3}, {0x2122, 0x2122}, {0x2139, 0x2139}, {0x2194, 0x2199}, {0x21A9, 0x21AA}, {0x231A, 0x231B}, {0x2328, 0x2328}, {0x23CF, 0x23CF}, {0x23E9, 0x23F3}, {0x23F8, 0x23FA}, {0x24C2, 0x24C2}, {0x25AA, 0x25AB}, {0x25B6, 0x25B6}, {0x25C0, 0x25C0}, {0x25FB, 0x25FE}, {0x2600, 0x2604}, {0x260E, 0x260E}, {0x2611, 0x2611}, {0x2614, 0x2615}, {0x2618, 0x2618}, {0x261D, 0x261D}, {0x2620, 0x2620}, {0x2622, 0x2623}, {0x2626, 0x2626}, {0x262A, 0x262A}, {0x262E, 0x262F}, {0x2638, 0x263A}, {0x2640, 0x2640}, {0x2642, 0x2642}, {0x2648, 0x2653}, {0x265F, 0x2660}, {0x2663, 0x2663}, {0x2665, 0x2666}, {0x2668, 0x2668}, {0x267B, 0x267B}, {0x267E, 0x267F}, {0x2692, 0x2697}, {0x2699, 0x2699}, {0x269B, 0x269C}, {0x26A0, 0x26A1}, {0x26A7, 0x26A7}, {0x26AA, 0x26AB}, {0x26B0, 0x26B1}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5}, {0x26C8, 0x26C8}, {0x26CE, 0x26CF}, {0x26D1, 0x26D1}, {0x26D3, 0x26D4}, {0x26E9, 0x26EA}, {0x26F0, 0x26F5}, {0x26F7, 0x26FA}, {0x26FD, 0x26FD}, {0x2702, 0x2702}, {0x2705, 0x2705}, {0x2708, 0x270D}, {0x270F, 0x270F}, {0x2712, 0x2712}, {0x2714, 0x2714}, {0x2716, 0x2716}, {0x271D, 0x271D}, {0x2721, 0x2721}, {0x2728, 0x2728}, {0x2733, 0x2734}, {0x2744, 0x2744}, {0x2747, 0x2747}, {0x274C, 0x274C}, {0x274E, 0x274E}, {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2763, 0x2764}, {0x2795, 0x2797}, {0x27A1, 0x27A1}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2934, 0x2935}, {0x2B05, 0x2B07}, {0x2B1B, 0x2B1C}, {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x3030, 0x3030}, {0x303D, 0x303D}, {0x3297, 0x3297}, {0x3299, 0x3299}, {0xFE0F, 0xFE0F}, {0x1F004, 0x1F004}, {0x1F02C, 0x1F02F}, {0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0}, {0x1F0CF, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F170, 0x1F171}, {0x1F17E, 0x1F17F}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, {0x1F1AE, 0x1F1FF}, {0x1F201, 0x1F20F}, {0x1F21A, 0x1F21A}, {0x1F22F, 0x1F22F}, {0x1F232, 0x1F23A}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F25F}, {0x1F266, 0x1F321}, {0x1F324, 0x1F393}, {0x1F396, 0x1F397}, {0x1F399, 0x1F39B}, {0x1F39E, 0x1F3F0}, {0x1F3F3, 0x1F3F5}, {0x1F3F7, 0x1F4FD}, {0x1F4FF, 0x1F53D}, {0x1F549, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F56F, 0x1F570}, {0x1F573, 0x1F57A}, {0x1F587, 0x1F587}, {0x1F58A, 0x1F58D}, {0x1F590, 0x1F590}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A5}, {0x1F5A8, 0x1F5A8}, {0x1F5B1, 0x1F5B2}, {0x1F5BC, 0x1F5BC}, {0x1F5C2, 0x1F5C4}, {0x1F5D1, 0x1F5D3}, {0x1F5DC, 0x1F5DE}, {0x1F5E1, 0x1F5E1}, {0x1F5E3, 0x1F5E3}, {0x1F5E8, 0x1F5E8}, {0x1F5EF, 0x1F5EF}, {0x1F5F3, 0x1F5F3}, {0x1F5FA, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CB, 0x1F6D2}, {0x1F6D5, 0x1F6E5}, {0x1F6E9, 0x1F6E9}, {0x1F6EB, 0x1F6F0}, {0x1F6F3, 0x1F6FF}, {0x1F7DA, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F}, {0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8AF}, {0x1F8BC, 0x1F8BF}, {0x1F8C2, 0x1F8CF}, {0x1F8D9, 0x1F8FF}, {0x1F90C, 0x1F93A}, {0x1F93C, 0x1F945}, {0x1F947, 0x1F9FF}, {0x1FA58, 0x1FA5F}, {0x1FA6E, 0x1FAFF}, {0x1FC00, 0x1FFFD}, {0xE0020, 0xE007F}, } ================================================ FILE: modules/diferenco/unicode_test.go ================================================ package diferenco import ( "testing" "unicode" ) func TestIsCJK(t *testing.T) { tests := []struct { r rune expect bool desc string }{ // Chinese characters {'中', true, "Chinese character 中"}, {'文', true, "Chinese character 文"}, {'你', true, "Chinese character 你"}, {'好', true, "Chinese character 好"}, {'龙', true, "Chinese character 龙"}, {0x4E00, true, "First CJK Unified Ideograph"}, {0x9FFF, true, "Near end of CJK Unified Ideographs"}, // Japanese Hiragana {'あ', true, "Hiragana あ"}, {'い', true, "Hiragana い"}, {0x3041, true, "Hiragana start"}, {0x309F, true, "Hiragana end"}, // Japanese Katakana {'ア', true, "Katakana ア"}, {'イ', true, "Katakana イ"}, {0x30A0, true, "Katakana start"}, {0x30FF, true, "Katakana end"}, // Korean Hangul {'한', true, "Hangul 한"}, {'글', true, "Hangul 글"}, {0xAC00, true, "Hangul start"}, {0xD7A3, true, "Hangul end"}, // ASCII - should be false {'a', false, "ASCII a"}, {'Z', false, "ASCII Z"}, {'0', false, "ASCII 0"}, {' ', false, "ASCII space"}, // Punctuation {'.', false, "period"}, {',', false, "comma"}, {'!', false, "exclamation"}, } for _, tt := range tests { got := isCJK(tt.r) if got != tt.expect { t.Errorf("isCJK(%q U+%04X %s) = %v, want %v", tt.r, tt.r, tt.desc, got, tt.expect) } } } func TestIsEmoji(t *testing.T) { tests := []struct { r rune expect bool desc string }{ // Common emojis (using hex to avoid encoding issues) {0x1F600, true, "Grinning Face"}, {0x1F389, true, "Party Popper"}, {0x2764, true, "Heavy Black Heart"}, {0x1F44D, true, "Thumbs Up"}, {0x1F31F, true, "Glowing Star"}, // Emoji numbers and symbols {'0', true, "Emoji digit 0"}, {'9', true, "Emoji digit 9"}, {'#', true, "Emoji #"}, {'*', true, "Emoji *"}, // ASCII letters - not emoji {'a', false, "ASCII a"}, {'Z', false, "ASCII Z"}, // Chinese characters - not emoji {'中', false, "Chinese character"}, // Variation Selector {0xFE0F, true, "Variation Selector-16"}, // Zero Width Joiner {0x200D, true, "Zero Width Joiner"}, } for _, tt := range tests { got := isEmoji(tt.r) if got != tt.expect { t.Errorf("isEmoji(%q U+%04X %s) = %v, want %v", tt.r, tt.r, tt.desc, got, tt.expect) } } } // TestInRangeBinarySearch tests that binary search works correctly func TestInRangeBinarySearch(t *testing.T) { // Test boundary conditions // First and last elements if !inRange(cjkRanges, 0x1100) { t.Error("First CJK range element not found") } if !inRange(cjkRanges, 0x115F) { t.Error("End of first CJK range not found") } // Elements just outside ranges if inRange(cjkRanges, 0x10FF) { t.Error("Element before first range incorrectly found") } if inRange(cjkRanges, 0x1160) { t.Error("Element after first range incorrectly found") } } // TestCJKVsUnicodeLibrary compares our implementation with unicode.In func TestCJKVsUnicodeLibrary(t *testing.T) { // Test a range of characters for r := rune(0x4E00); r <= 0x4E50; r++ { got := isCJK(r) want := unicode.In(r, unicode.Han) if got != want { t.Errorf("isCJK(U+%04X) = %v, unicode.In = %v", r, got, want) } } // Test Hiragana for r := rune(0x3041); r <= 0x3050; r++ { got := isCJK(r) want := unicode.In(r, unicode.Hiragana) if got != want { t.Errorf("isCJK(U+%04X) = %v, unicode.In = %v", r, got, want) } } // Test Katakana for r := rune(0x30A1); r <= 0x30B0; r++ { got := isCJK(r) want := unicode.In(r, unicode.Katakana) if got != want { t.Errorf("isCJK(U+%04X) = %v, unicode.In = %v", r, got, want) } } // Test Hangul for r := rune(0xAC00); r <= 0xAC10; r++ { got := isCJK(r) want := unicode.In(r, unicode.Hangul) if got != want { t.Errorf("isCJK(U+%04X) = %v, unicode.In = %v", r, got, want) } } } ================================================ FILE: modules/diferenco/unified.go ================================================ // Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package diferenco import ( "bytes" "context" "fmt" "strings" ) // DefaultContextLines is the number of unchanged lines of surrounding // context displayed by Unified. Use toPatch to specify a different value. const DefaultContextLines = 3 type File struct { Name string `json:"name"` Hash string `json:"hash"` Mode uint32 `json:"mode"` } // Patch represents a set of edits as a unified diff. type Patch struct { // From is the name of the original file. From *File `json:"from,omitempty"` // To is the name of the modified file. To *File `json:"to,omitempty"` // IsBinary returns true if this patch is representing a binary file. IsBinary bool `json:"binary"` // Fragments returns true if this patch is representing a fragments file. IsFragments bool `json:"fragments"` // Message prefix, eg: warning: something Message string `json:"message"` // Hunks is the set of edit Hunks needed to transform the file content. Hunks []*Hunk `json:"hunks,omitempty"` } func (p Patch) Name() string { switch { case p.To != nil: return p.To.Name case p.From != nil: return p.From.Name } return "" } func (p Patch) Stat() FileStat { s := FileStat{Hunks: len(p.Hunks), Name: p.Name()} for _, h := range p.Hunks { ins, del := h.Stat() s.Addition += ins s.Deletion += del } return s } func (p Patch) Format() ([]byte, int) { if len(p.Hunks) == 0 { return nil, 0 } b := new(bytes.Buffer) var lines int if p.From != nil { fmt.Fprintf(b, "--- %s\n", p.From.Name) } else { fmt.Fprintf(b, "--- /dev/null\n") } if p.To != nil { fmt.Fprintf(b, "+++ %s\n", p.To.Name) } else { fmt.Fprintf(b, "+++ /dev/null\n") } lines += 2 for _, hunk := range p.Hunks { fromCount, toCount := 0, 0 for _, l := range hunk.Lines { switch l.Kind { case Delete: fromCount++ case Insert: toCount++ default: fromCount++ toCount++ } } fmt.Fprint(b, "@@") if fromCount > 1 { fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount) } else if hunk.FromLine == 1 && fromCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " -0,0") } else { fmt.Fprintf(b, " -%d", hunk.FromLine) } if toCount > 1 { fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount) } else if hunk.ToLine == 1 && toCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " +0,0") } else { fmt.Fprintf(b, " +%d", hunk.ToLine) } if hunk.Section != "" { fmt.Fprintf(b, " @@ %s\n", hunk.Section) } else { fmt.Fprint(b, " @@\n") } lines += len(hunk.Lines) + 1 for _, l := range hunk.Lines { switch l.Kind { case Delete: fmt.Fprintf(b, "-%s", l.Content) case Insert: fmt.Fprintf(b, "+%s", l.Content) default: fmt.Fprintf(b, " %s", l.Content) } if !strings.HasSuffix(l.Content, "\n") { fmt.Fprintf(b, "\n\\ No newline at end of file\n") lines++ } } } lines++ // We respect the editor's line-ending convention: "\n" actually has two lines. return b.Bytes(), lines } // String converts a unified diff to the standard textual form for that diff. // The output of this function can be passed to tools like patch. func (p Patch) String() string { if len(p.Hunks) == 0 { return "" } b := new(strings.Builder) if p.From != nil { fmt.Fprintf(b, "--- %s\n", p.From.Name) } else { fmt.Fprintf(b, "--- /dev/null\n") } if p.To != nil { fmt.Fprintf(b, "+++ %s\n", p.To.Name) } else { fmt.Fprintf(b, "+++ /dev/null\n") } for _, hunk := range p.Hunks { fromCount, toCount := 0, 0 for _, l := range hunk.Lines { switch l.Kind { case Delete: fromCount++ case Insert: toCount++ default: fromCount++ toCount++ } } fmt.Fprint(b, "@@") if fromCount > 1 { fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount) } else if hunk.FromLine == 1 && fromCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " -0,0") } else { fmt.Fprintf(b, " -%d", hunk.FromLine) } if toCount > 1 { fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount) } else if hunk.ToLine == 1 && toCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " +0,0") } else { fmt.Fprintf(b, " +%d", hunk.ToLine) } if hunk.Section != "" { fmt.Fprintf(b, " @@ %s\n", hunk.Section) } else { fmt.Fprint(b, " @@\n") } for _, l := range hunk.Lines { switch l.Kind { case Delete: fmt.Fprintf(b, "-%s", l.Content) case Insert: fmt.Fprintf(b, "+%s", l.Content) default: fmt.Fprintf(b, " %s", l.Content) } if !strings.HasSuffix(l.Content, "\n") { fmt.Fprintf(b, "\n\\ No newline at end of file\n") } } } return b.String() } // Hunk represents a contiguous set of line edits to apply. type Hunk struct { // The line in the original source where the hunk starts. FromLine int `json:"from_line"` // The line in the original source where the hunk finishes. ToLine int `json:"to_line"` // The set of line based edits to apply. Lines []Line `json:"lines,omitempty"` // Section is the optional context text after the @@ markers in unified diff. // For example, in "@@ -1,5 +1,6 @@ function main", the section is "function main". // This provides context about which function/class the change belongs to. Section string `json:"section,omitempty"` } func (h Hunk) Stat() (int, int) { var ins, del int for _, l := range h.Lines { switch l.Kind { case Delete: del++ case Insert: ins++ } } return ins, del } type Line struct { Kind Operation `json:"kind"` Content string `json:"content"` } func Unified(ctx context.Context, opts *Options) (*Patch, error) { sink := &Sink{ Index: make(map[string]int), } a, err := sink.parseLines(opts.R1, opts.S1) if err != nil { return nil, err } b, err := sink.parseLines(opts.R2, opts.S2) if err != nil { return nil, err } changes, err := DiffSlices(ctx, a, b, opts.A) if err != nil { return nil, err } return sink.ToPatch(opts.From, opts.To, changes, a, b, DefaultContextLines), nil } ================================================ FILE: modules/diferenco/unified_encoder.go ================================================ package diferenco import ( "fmt" "io" "strconv" "strings" "github.com/antgroup/hugescm/modules/diferenco/color" ) const ( ZERO_OID_MAX = "0000000000000000000000000000000000000000000000000000000000000000" // ZERO OID MAX ) var ( operationChar = map[Operation]byte{ Insert: '+', Delete: '-', Equal: ' ', } operationColorKey = map[Operation]color.ColorKey{ Insert: color.New, Delete: color.Old, Equal: color.Context, } ) // UnifiedEncoder encodes an unified diff into the provided Writer. It does not // support similarity index for renames or sorting hash representations. type UnifiedEncoder struct { io.Writer // srcPrefix and dstPrefix are prepended to file paths when encoding a diff. srcPrefix string dstPrefix string vcs string noRename bool // colorConfig is the color configuration. The default is no color. color color.ColorConfig } // EncoderOption sets an option on UnifiedEncoder. type EncoderOption func(*UnifiedEncoder) // WithVCS sets the VCS name for the encoder. func WithVCS(vcs string) EncoderOption { return func(e *UnifiedEncoder) { if vcs != "" { e.vcs = vcs } } } // WithColor sets the color configuration. func WithColor(cc color.ColorConfig) EncoderOption { return func(e *UnifiedEncoder) { e.color = cc } } // WithSrcPrefix sets the source prefix for file paths. func WithSrcPrefix(prefix string) EncoderOption { return func(e *UnifiedEncoder) { e.srcPrefix = prefix } } // WithDstPrefix sets the destination prefix for file paths. func WithDstPrefix(prefix string) EncoderOption { return func(e *UnifiedEncoder) { e.dstPrefix = prefix } } // WithNoRename disables rename detection in the output. func WithNoRename() EncoderOption { return func(e *UnifiedEncoder) { e.noRename = true } } // NewUnifiedEncoder returns a new UnifiedEncoder that writes to w. func NewUnifiedEncoder(w io.Writer, opts ...EncoderOption) *UnifiedEncoder { e := &UnifiedEncoder{ Writer: w, srcPrefix: "a/", dstPrefix: "b/", vcs: "zeta", } for _, opt := range opts { opt(e) } return e } func (e *UnifiedEncoder) Encode(patches []*Patch) error { for _, p := range patches { if err := e.writePatch(p); err != nil { return err } } return nil } func (e *UnifiedEncoder) appendPathLines(lines []string, fromPath, toPath string, isBinary bool, isFragments bool) []string { if isFragments { return append(lines, fmt.Sprintf("Fragments files %s and %s differ", fromPath, toPath), ) } if isBinary { return append(lines, fmt.Sprintf("Binary files %s and %s differ", fromPath, toPath), ) } return append(lines, "--- "+fromPath, "+++ "+toPath, ) } func (e *UnifiedEncoder) writeFilePatchHeader(p *Patch, b *strings.Builder) { from, to := p.From, p.To if from == nil && to == nil { return } var lines []string switch { case from != nil && to != nil: hashEquals := from.Hash == to.Hash lines = append(lines, fmt.Sprintf("diff --%s %s%s %s%s", e.vcs, e.srcPrefix, from.Name, e.dstPrefix, to.Name), ) if from.Mode != to.Mode { lines = append(lines, fmt.Sprintf("old mode %o", from.Mode), fmt.Sprintf("new mode %o", to.Mode), ) } if !e.noRename { if from.Name != to.Name { lines = append(lines, "rename from "+from.Name, "rename to "+to.Name, ) } } if from.Mode != to.Mode && !hashEquals { lines = append(lines, fmt.Sprintf("index %s..%s", from.Hash, to.Hash), ) } else if !hashEquals { lines = append(lines, fmt.Sprintf("index %s..%s %o", from.Hash, to.Hash, from.Mode), ) } if !hashEquals { lines = e.appendPathLines(lines, e.srcPrefix+from.Name, e.dstPrefix+to.Name, p.IsBinary, p.IsFragments) } case from == nil: lines = append(lines, fmt.Sprintf("diff --%s %s %s", e.vcs, e.srcPrefix+to.Name, e.dstPrefix+to.Name), fmt.Sprintf("new file mode %o", to.Mode), fmt.Sprintf("index %s..%s", ZERO_OID_MAX[0:min(len(to.Hash), len(ZERO_OID_MAX))], to.Hash), ) lines = e.appendPathLines(lines, "/dev/null", e.dstPrefix+to.Name, p.IsBinary, p.IsFragments) case to == nil: lines = append(lines, fmt.Sprintf("diff --%s %s %s", e.vcs, e.srcPrefix+from.Name, e.dstPrefix+from.Name), fmt.Sprintf("deleted file mode %o", from.Mode), fmt.Sprintf("index %s..%s", from.Hash, ZERO_OID_MAX[0:min(len(from.Hash), len(ZERO_OID_MAX))]), ) lines = e.appendPathLines(lines, e.srcPrefix+from.Name, "/dev/null", p.IsBinary, p.IsFragments) } b.WriteString(e.color[color.Meta]) b.WriteString(lines[0]) for _, line := range lines[1:] { b.WriteByte('\n') b.WriteString(line) } b.WriteString(e.color.Reset(color.Meta)) b.WriteByte('\n') } func (e *UnifiedEncoder) writePatchHunk(b *strings.Builder, hunk *Hunk) { fromCount, toCount := 0, 0 for _, l := range hunk.Lines { switch l.Kind { case Delete: fromCount++ case Insert: toCount++ default: fromCount++ toCount++ } } _, _ = b.WriteString(e.color[color.Frag]) _, _ = b.WriteString("@@") if fromCount > 1 { _, _ = b.WriteString(" -") _, _ = b.WriteString(strconv.Itoa(hunk.FromLine)) _ = b.WriteByte(',') _, _ = b.WriteString(strconv.Itoa(fromCount)) } else if hunk.FromLine == 1 && fromCount == 0 { // Match odd GNU diff -u behavior adding to empty file. _, _ = b.WriteString(" -0,0") } else { _, _ = b.WriteString(" -") _, _ = b.WriteString(strconv.Itoa(hunk.FromLine)) } if toCount > 1 { _, _ = b.WriteString(" +") _, _ = b.WriteString(strconv.Itoa(hunk.ToLine)) _ = b.WriteByte(',') _, _ = b.WriteString(strconv.Itoa(toCount)) } else if hunk.ToLine == 1 && toCount == 0 { // Match odd GNU diff -u behavior adding to empty file. _, _ = b.WriteString(" +0,0") } else { _, _ = b.WriteString(" +") _, _ = b.WriteString(strconv.Itoa(hunk.ToLine)) } _, _ = b.WriteString(" @@") if hunk.Section != "" { _, _ = b.WriteString(" ") _, _ = b.WriteString(hunk.Section) } _, _ = b.WriteString(e.color.Reset(color.Frag)) _ = b.WriteByte('\n') for _, line := range hunk.Lines { e.writeLine(b, &line) } } func (e *UnifiedEncoder) writeLine(b *strings.Builder, o *Line) { colorKey := operationColorKey[o.Kind] _, _ = b.WriteString(e.color[colorKey]) _ = b.WriteByte(operationChar[o.Kind]) if before, ok := strings.CutSuffix(o.Content, "\n"); ok { _, _ = b.WriteString(before) _, _ = b.WriteString(e.color.Reset(colorKey)) _ = b.WriteByte('\n') return } _, _ = b.WriteString(o.Content) _, _ = b.WriteString(e.color.Reset(colorKey)) _, _ = b.WriteString("\n\\ No newline at end of file\n") } func (e *UnifiedEncoder) writePatch(p *Patch) error { b := &strings.Builder{} if len(p.Message) != 0 { _, _ = b.WriteString(p.Message) if !strings.HasSuffix(p.Message, "\n") { _ = b.WriteByte('\n') } } e.writeFilePatchHeader(p, b) if len(p.Hunks) == 0 { if _, err := io.WriteString(e.Writer, b.String()); err != nil { return err } return nil } for _, hunk := range p.Hunks { e.writePatchHunk(b, hunk) } if _, err := io.WriteString(e.Writer, b.String()); err != nil { return err } return nil } ================================================ FILE: modules/env/broker.go ================================================ package env import ( "os" "slices" "strconv" "strings" "sync" "syscall" "time" "github.com/antgroup/hugescm/modules/strengthen" ) type Broker interface { ExpandEnv(s string) string LookupEnv(key string) (string, bool) Getenv(string) string Setenv(key, value string) error Unsetenv(key string) error Environ() []string Clearenv() } type broker struct { } func (b *broker) ExpandEnv(s string) string { return os.ExpandEnv(s) } func (b *broker) LookupEnv(key string) (string, bool) { return os.LookupEnv(key) } func (b *broker) Getenv(key string) string { return os.Getenv(key) } func (b *broker) Setenv(key, value string) error { return os.Setenv(key, value) } func (b *broker) Unsetenv(key string) error { return os.Unsetenv(key) } func (b *broker) Clearenv() { os.Clearenv() } func (b *broker) Environ() []string { return os.Environ() } type sanitizer struct { keys map[string]int env []string mu sync.RWMutex } func NewSanitizer() Broker { b := &sanitizer{ keys: make(map[string]int), env: slices.Clone(Environ()), } for i, e := range b.env { k, _, ok := strings.Cut(e, "=") if !ok { continue } b.keys[k] = i } return b } func (b *sanitizer) ExpandEnv(s string) string { return os.Expand(s, b.Getenv) } func (b *sanitizer) LookupEnv(key string) (string, bool) { if len(key) == 0 { return "", false } b.mu.RLock() defer b.mu.RUnlock() i, ok := b.keys[key] if !ok { return "", false } s := b.env[i] if len(s) != 0 { if _, v, ok := strings.Cut(s, "="); ok { return v, true } } return "", false } func (b *sanitizer) Getenv(key string) string { v, _ := b.LookupEnv(key) return v } func (b *sanitizer) Setenv(key, value string) error { if len(key) == 0 { return syscall.EINVAL } for i := range len(key) { if key[i] == '=' || key[i] == 0 { return syscall.EINVAL } } kv := key + "=" + value b.mu.Lock() defer b.mu.Unlock() i, ok := b.keys[key] if ok { b.env[i] = kv return nil } i = len(b.env) b.env = append(b.env, kv) b.keys[key] = i return nil } func (b *sanitizer) Unsetenv(key string) error { b.mu.Lock() defer b.mu.Unlock() if i, ok := b.keys[key]; ok { b.env[i] = "" delete(b.keys, key) } return nil } func (b *sanitizer) Clearenv() { b.mu.Lock() defer b.mu.Unlock() b.keys = make(map[string]int) b.env = []string{} } func (b *sanitizer) Environ() []string { b.mu.RLock() defer b.mu.RUnlock() a := make([]string, 0, len(b.env)+16) // Reduce the number of memory allocations for _, env := range b.env { if env != "" { a = append(a, env) } } return a } func (b *sanitizer) Find(k K) string { return b.Getenv(string(k)) } func (b *sanitizer) SimpleAtoi(k K, dv int64) int64 { v := b.Getenv(string(k)) if i, err := strconv.ParseInt(v, 10, 64); err == nil { return i } return dv } func (b *sanitizer) SimpleAtou(k K, dv uint64) uint64 { v := b.Getenv(string(k)) if i, err := strconv.ParseUint(v, 10, 64); err == nil { return i } return dv } func (b *sanitizer) SimpleAtob(k K, dv bool) bool { v := b.Getenv(string(k)) return strengthen.SimpleAtob(v, dv) } func (b *sanitizer) Duration(k K, dv time.Duration) time.Duration { v := b.Getenv(string(k)) if d, err := time.ParseDuration(v); err == nil { return d } return dv } func (b *sanitizer) Strings(k K) []string { s := b.Getenv(string(k)) return strings.Split(s, StandardSeparator) } var ( SystemBroker Broker = &broker{} ) ================================================ FILE: modules/env/builder.go ================================================ package env type Builder interface { Environ() []string } type builder struct { } func (b *builder) Environ() []string { return Environ() } func NewBuilder() Builder { return &builder{} } ================================================ FILE: modules/env/constant.go ================================================ package env import ( "os" "strconv" "strings" "github.com/antgroup/hugescm/modules/strengthen" ) type K string // VALUE const ( ZETA_TERMINAL_PROMPT K = "ZETA_TERMINAL_PROMPT" ZETA_NO_SSH_AUTH_SOCK K = "ZETA_NO_SSH_AUTH_SOCK" StandardSeparator string = ";" ) func (k K) With(s string) string { return string(k) + "=" + s } func (k K) WithBool(b bool) string { if b { return string(k) + "=true" } return string(k) + "=false" } func (k K) WithInt(i int64) string { return string(k) + "=" + strconv.FormatInt(i, 10) } func (k K) WithPaths(sv []string) string { return string(k) + "=" + strings.Join(sv, string(os.PathListSeparator)) } func (k K) Withs(sv []string) string { return string(k) + "=" + strings.Join(sv, StandardSeparator) } func (k K) Find() string { return os.Getenv(string(k)) } // find envkey Strings to array func (k K) Strings() []string { s := os.Getenv(string(k)) return strings.Split(s, StandardSeparator) } // find envkey split to array func (k K) StrSplit(sep string) []string { s := os.Getenv(string(k)) return strings.Split(s, sep) } // SimpleAtob Obtain the boolean variable from the environment variable, if it does not exist, return the default value func (k K) SimpleAtob(dv bool) bool { s, ok := os.LookupEnv(string(k)) if !ok { return dv } return strengthen.SimpleAtob(s, dv) } func (k K) SimpleAtoi(dv int64) int64 { s, ok := os.LookupEnv(string(k)) if !ok { return dv } if i, err := strconv.ParseInt(s, 10, 64); err == nil { return i } return dv } ================================================ FILE: modules/env/env.go ================================================ package env import ( "fmt" "os" "strconv" "strings" "time" ) func SanitizeEnv(keys ...string) []string { excludedKeys := make(map[string]bool) for _, k := range keys { excludedKeys[k] = true } originEnv := os.Environ() sanitizedEnv := make([]string, 0, len(originEnv)) for _, e := range originEnv { k, _, ok := strings.Cut(e, "=") if !ok || excludedKeys[k] { // skip keys continue } sanitizedEnv = append(sanitizedEnv, e) } return sanitizedEnv } // GetBool fetches and parses a boolean typed environment variable // // If the variable is empty, returns `fallback` and no error. // If there is an error, returns `fallback` and the error. func GetBool(name string, fallback bool) (bool, error) { s := os.Getenv(name) if s == "" { return fallback, nil } v, err := strconv.ParseBool(s) if err != nil { return fallback, fmt.Errorf("get bool %s: %w", name, err) } return v, nil } // GetInt fetches and parses an integer typed environment variable // // If the variable is empty, returns `fallback` and no error. // If there is an error, returns `fallback` and the error. func GetInt(name string, fallback int) (int, error) { s := os.Getenv(name) if s == "" { return fallback, nil } v, err := strconv.Atoi(s) if err != nil { return fallback, fmt.Errorf("get int %s: %w", name, err) } return v, nil } // GetDuration fetches and parses a duration typed environment variable func GetDuration(name string, fallback time.Duration) (time.Duration, error) { s := os.Getenv(name) if s == "" { return fallback, nil } v, err := time.ParseDuration(s) if err != nil { return fallback, fmt.Errorf("get duration %s: %w", name, err) } return v, nil } // GetString fetches a given name from the environment and falls back to a // default value if the name is not available. The value is stripped of // leading and trailing whitespace. func GetString(name string, fallback string) string { value := os.Getenv(name) if value == "" { return fallback } return strings.TrimSpace(value) } ================================================ FILE: modules/env/env_test.go ================================================ package env import ( "fmt" "os" "strings" "testing" "time" ) func TestEnviron(t *testing.T) { now := time.Now() env := Environ() fmt.Fprintf(os.Stderr, "use time: %v\n", time.Since(now)) fmt.Fprintf(os.Stderr, "%s\n", strings.Join(env, "\n")) } func TestEnvironForEach(t *testing.T) { for range 10 { now := time.Now() env := Environ() fmt.Fprintf(os.Stderr, "%d use time: %v\n", len(env), time.Since(now)) } } func TestSanitizeEnv(t *testing.T) { for _, e := range SanitizeEnv("PATH") { fmt.Fprintf(os.Stderr, "%s\n", e) } } ================================================ FILE: modules/env/env_unix.go ================================================ //go:build !windows package env import ( "os" "os/exec" "path/filepath" "slices" "strings" "sync" ) var ( allowedEnv = map[string]bool{ "HOME": true, "USER": true, "LOGNAME": true, "PATH": true, "TZ": true, "LANG": true, //Replace by en_US.UTF-8 "LD_LIBRARY_PATH": true, "SHELL": true, "TMPDIR": true, // Git HTTP proxy settings: https://git-scm.com/docs/git-config#git-config-httpproxy "all_proxy": true, "http_proxy": true, "HTTP_PROXY": true, "https_proxy": true, "HTTPS_PROXY": true, // libcurl settings: https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html "no_proxy": true, "NO_PROXY": true, // Environment variables to tell git to use custom SSH executable or command "GIT_SSH": true, "GIT_SSH_COMMAND": true, // Environment variables neesmd for ssh-agent based authentication "SSH_AUTH_SOCK": true, "SSH_AGENT_PID": true, // Export git tracing variables for easier debugging "GIT_TRACE": true, "GIT_TRACE_PACK_ACCESS": true, "GIT_TRACE_PACKET": true, "GIT_TRACE_PERFORMANCE": true, "GIT_TRACE_SETUP": true, "GIT_CURL_VERBOSE": true, } ) var ( Environ = sync.OnceValue(func() []string { originEnv := os.Environ() sanitizedEnv := make([]string, 0, len(originEnv)) for _, e := range originEnv { k, _, ok := strings.Cut(e, "=") if !ok || !allowedEnv[k] { continue } sanitizedEnv = append(sanitizedEnv, e) } slices.Sort(sanitizedEnv) return sanitizedEnv }) ) func DelayInitializeEnv() error { pathEnv := os.Getenv("PATH") pathList := strings.Split(pathEnv, string(os.PathListSeparator)) pathNewList := make([]string, 0, len(pathList)) seen := make(map[string]bool) for _, p := range pathList { cleanedPath := filepath.Clean(p) if cleanedPath == "." { continue } u := strings.ToLower(cleanedPath) if seen[u] { continue } seen[u] = true pathNewList = append(pathNewList, cleanedPath) } _ = os.Setenv("PATH", strings.Join(pathNewList, string(os.PathListSeparator))) return nil } func LookupPager(name string) (string, error) { return exec.LookPath(name) } ================================================ FILE: modules/env/env_windows.go ================================================ //go:build windows package env import ( "os" "os/exec" "path/filepath" "slices" "strings" "sync" "golang.org/x/sys/windows/registry" ) var ( allowedEnv = map[string]bool{ // Environment variables to tell git to use custom SSH executable or command "GIT_SSH": true, "GIT_SSH_COMMAND": true, // Export git tracing variables for easier debugging "GIT_TRACE": true, "GIT_TRACE_PACK_ACCESS": true, "GIT_TRACE_PACKET": true, "GIT_TRACE_PERFORMANCE": true, "GIT_TRACE_SETUP": true, "GIT_CURL_VERBOSE": true, } ) var ( Environ = sync.OnceValue(func() []string { originEnv := os.Environ() sanitizedEnv := make([]string, 0, len(originEnv)) for _, s := range originEnv { k, _, ok := strings.Cut(s, "=") if !ok || strings.HasPrefix(k, "GIT_") && !allowedEnv[k] { continue } sanitizedEnv = append(sanitizedEnv, s) } slices.Sort(sanitizedEnv) // order by return sanitizedEnv }) ) var ( lookupGitForWindowsInstall = sync.OnceValues(func() (string, error) { gitForWindowsKey, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\GitForWindows`, registry.QUERY_VALUE) if err != nil { return "", nil } defer gitForWindowsKey.Close() // nolint installPath, _, err := gitForWindowsKey.GetStringValue("InstallPath") if err != nil { return "", err } return installPath, nil }) ) func hasGitExe(installDir string) bool { gitExe := filepath.Join(installDir, "cmd", "git.exe") if _, err := os.Stat(gitExe); err != nil { return false } return true } func cleanupEnv(pathList []string) { pathNewList := make([]string, 0, len(pathList)+2) seen := make(map[string]bool) for _, p := range pathList { cleanedPath := filepath.Clean(p) if cleanedPath == "." { continue } u := strings.ToLower(cleanedPath) if seen[u] { continue } seen[u] = true pathNewList = append(pathNewList, cleanedPath) } _ = os.Setenv("PATH", strings.Join(pathNewList, string(os.PathListSeparator))) } // DelayInitializeEnv: initialize path env func DelayInitializeEnv() error { gitForWindowsInstall, err := lookupGitForWindowsInstall() if err != nil { cleanupEnv(strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))) return nil } pathEnv := os.Getenv("PATH") pathList := strings.Split(pathEnv, string(os.PathListSeparator)) pathNewList := make([]string, 0, len(pathList)+2) if _, err := exec.LookPath("git"); err != nil && hasGitExe(gitForWindowsInstall) { pathNewList = append(pathNewList, filepath.Join(gitForWindowsInstall, "cmd")) } seen := make(map[string]bool) for _, p := range pathList { cleanedPath := filepath.Clean(p) if cleanedPath == "." { continue } u := strings.ToLower(cleanedPath) if seen[u] { continue } seen[u] = true pathNewList = append(pathNewList, cleanedPath) } _ = os.Setenv("PATH", strings.Join(pathNewList, string(os.PathListSeparator))) return nil } func LookupPager(name string) (string, error) { pagerExe, err := exec.LookPath(name) if err == nil { return pagerExe, nil } gitForWindowsInstall, err := lookupGitForWindowsInstall() if err != nil { return "", err } // C:\Program Files\Git\usr\bin\less.exe lessExe := filepath.Join(gitForWindowsInstall, "usr/bin/less.exe") if _, err := os.Stat(lessExe); err != nil { return "", err } return lessExe, nil } ================================================ FILE: modules/env/env_windows_test.go ================================================ //go:build windows package env import ( "fmt" "os" "testing" ) func TestInitializeEnv(t *testing.T) { _ = os.Setenv("PATH", os.Getenv("PATH")+";C:\\Windows") if err := DelayInitializeEnv(); err != nil { fmt.Fprintf(os.Stderr, "initialize env error: %v\n", err) } } func TestLookupPager(t *testing.T) { lessExe, err := LookupPager("less") if err != nil { fmt.Fprintf(os.Stderr, "search less exe error: %v\n", err) return } fmt.Fprintf(os.Stderr, "found less: %v\n", lessExe) } ================================================ FILE: modules/fnmatch/LICENSE ================================================ Copyright (c) 2016, Daniel Wakefield All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: modules/fnmatch/VERSION ================================================ https://github.com/danwakefield/fnmatch cbb64ac3d964b81592e64f957ad53df015803288 ================================================ FILE: modules/fnmatch/fnmatch.go ================================================ // Provide string-matching based on fnmatch.3 package fnmatch // There are a few issues that I believe to be bugs, but this implementation is // based as closely as possible on BSD fnmatch. These bugs are present in the // source of BSD fnmatch, and so are replicated here. The issues are as follows: // // * FNM_PERIOD is no longer observed after the first * in a pattern // This only applies to matches done with FNM_PATHNAME as well // * FNM_PERIOD doesn't apply to ranges. According to the documentation, // a period must be matched explicitly, but a range will match it too import ( "strings" "unicode" "unicode/utf8" ) const ( FNM_NOESCAPE = (1 << iota) FNM_PATHNAME FNM_PERIOD FNM_LEADING_DIR FNM_CASEFOLD FNM_IGNORECASE = FNM_CASEFOLD FNM_FILE_NAME = FNM_PATHNAME ) func unpackRune(str *string) rune { r, size := utf8.DecodeRuneInString(*str) *str = (*str)[size:] return r } // Matches the pattern against the string, with the given flags, // and returns true if the match is successful. // This function should match fnmatch.3 as closely as possible. func Match(pattern, s string, flags int) bool { // The implementation for this function was patterned after the BSD fnmatch.c // source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html noescape := (flags&FNM_NOESCAPE != 0) pathname := (flags&FNM_PATHNAME != 0) period := (flags&FNM_PERIOD != 0) leadingdir := (flags&FNM_LEADING_DIR != 0) casefold := (flags&FNM_CASEFOLD != 0) // the following is some bookkeeping that the original fnmatch.c implementation did not do // We are forced to do this because we're not keeping indexes into C strings but rather // processing utf8-encoded strings. Use a custom unpacker to maintain our state for us sAtStart := true sLastAtStart := true sLastSlash := false sLastUnpacked := rune(0) unpackS := func() rune { sLastSlash = (sLastUnpacked == '/') sLastUnpacked = unpackRune(&s) sLastAtStart = sAtStart sAtStart = false return sLastUnpacked } for len(pattern) > 0 { c := unpackRune(&pattern) switch c { case '?': if len(s) == 0 { return false } sc := unpackS() if pathname && sc == '/' { return false } if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) { return false } case '*': // collapse multiple *'s // don't use unpackRune here, the only char we care to detect is ASCII for len(pattern) > 0 && pattern[0] == '*' { pattern = pattern[1:] } if period && (len(s) == 0 || s[0] == '.') && (sAtStart || (pathname && sLastUnpacked == '/')) { return false } // optimize for patterns with * at end or before / if len(pattern) == 0 { if pathname { return leadingdir || (strings.IndexByte(s, '/') == -1) } return true } else if pathname && pattern[0] == '/' { offset := strings.IndexByte(s, '/') if offset == -1 { return false } // we already know our pattern and string have a /, skip past it s = s[offset:] // use unpackS here to maintain our bookkeeping state unpackS() pattern = pattern[1:] // we know / is one byte long break } // general case, recurse for test := s; len(test) > 0; unpackRune(&test) { // I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified // but this follows exactly from how fnmatch.c implements it if Match(pattern, test, (flags &^ FNM_PERIOD)) { return true } if pathname && test[0] == '/' { break } } return false case '[': if len(s) == 0 { return false } if pathname && s[0] == '/' { return false } sc := unpackS() if !rangematch(&pattern, sc, flags) { return false } case '\\': if !noescape { if len(pattern) > 0 { c = unpackRune(&pattern) } } fallthrough default: if len(s) == 0 { return false } sc := unpackS() switch { case sc == c: case casefold && unicode.ToLower(sc) == unicode.ToLower(c): default: return false } } } return len(s) == 0 || (leadingdir && s[0] == '/') } func rangematch(pattern *string, test rune, flags int) bool { if len(*pattern) == 0 { return false } casefold := (flags&FNM_CASEFOLD != 0) noescape := (flags&FNM_NOESCAPE != 0) if casefold { test = unicode.ToLower(test) } var negate, matched bool if (*pattern)[0] == '^' || (*pattern)[0] == '!' { negate = true (*pattern) = (*pattern)[1:] } for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' { c := unpackRune(pattern) if !noescape && c == '\\' { if len(*pattern) > 1 { c = unpackRune(pattern) } else { return false } } if casefold { c = unicode.ToLower(c) } if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' { unpackRune(pattern) // skip the - c2 := unpackRune(pattern) if !noescape && c2 == '\\' { if len(*pattern) > 0 { c2 = unpackRune(pattern) } else { return false } } if casefold { c2 = unicode.ToLower(c2) } // this really should be more intelligent, but it looks like // fnmatch.c does simple int comparisons, therefore we will as well if c <= test && test <= c2 { matched = true } } else if c == test { matched = true } } // skip past the rest of the pattern ok := false for !ok && len(*pattern) > 0 { c := unpackRune(pattern) if c == '\\' && len(*pattern) > 0 { unpackRune(pattern) } else if c == ']' { ok = true } } return ok && matched != negate } ================================================ FILE: modules/fnmatch/fnmatch_test.go ================================================ package fnmatch_test import ( "fmt" "os" "testing" "github.com/antgroup/hugescm/modules/fnmatch" ) // This is a set of tests ported from a set of tests for C fnmatch // found at http://www.mail-archive.com/bug-gnulib@gnu.org/msg14048.html func TestMatch(t *testing.T) { assert := func(p, s string) { if !fnmatch.Match(p, s, 0) { t.Errorf("Assertion failed: Match(%#v, %#v, 0)", p, s) } } assert("", "") assert("*", "") assert("*", "foo") assert("*", "bar") assert("*", "*") assert("**", "f") assert("**", "foo.txt") assert("*.*", "foo.txt") assert("foo*.txt", "foobar.txt") assert("foo.txt", "foo.txt") assert("foo\\.txt", "foo.txt") if fnmatch.Match("foo\\.txt", "foo.txt", fnmatch.FNM_NOESCAPE) { t.Errorf("Assertion failed: Match(%#v, %#v, FNM_NOESCAPE) == false", "foo\\.txt", "foo.txt") } } func TestWildcard(t *testing.T) { // A wildcard pattern "*" should match anything cases := []struct { pattern string input string flags int want bool }{ {"*", "", 0, true}, {"*", "foo", 0, true}, {"*", "*", 0, true}, {"*", " ", 0, true}, {"*", ".foo", 0, true}, {"*", "わたし", 0, true}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestWildcardSlash(t *testing.T) { cases := []struct { pattern string input string flags int want bool }{ // Should match / when flags are 0 {"*", "foo/bar", 0, true}, {"*", "/", 0, true}, {"*", "/foo", 0, true}, {"*", "foo/", 0, true}, // Shouldnt match / when flags include FNM_PATHNAME {"*", "foo/bar", fnmatch.FNM_PATHNAME, false}, {"*", "/", fnmatch.FNM_PATHNAME, false}, {"*", "/foo", fnmatch.FNM_PATHNAME, false}, {"*", "foo/", fnmatch.FNM_PATHNAME, false}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } for _, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "fnmatch.Match('%s', '%s', %d) should be %v not %v", c.pattern, c.input, c.flags, c.want, got, ) } } } func TestWildcardFNMPeriod(t *testing.T) { // FNM_PERIOD means that . is not matched in some circumstances. cases := []struct { pattern string input string flags int want bool }{ {"*", ".foo", fnmatch.FNM_PERIOD, false}, {"/*", "/.foo", fnmatch.FNM_PERIOD, true}, {"/*", "/.foo", fnmatch.FNM_PERIOD | fnmatch.FNM_PATHNAME, false}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestQuestionMark(t *testing.T) { //A question mark pattern "?" should match a single character cases := []struct { pattern string input string flags int want bool }{ {"?", "", 0, false}, {"?", "f", 0, true}, {"?", ".", 0, true}, {"?", "?", 0, true}, {"?", "foo", 0, false}, {"?", "わ", 0, true}, {"?", "わた", 0, false}, // Even '/' when flags are 0 {"?", "/", 0, true}, // Except '/' when flags include FNM_PATHNAME {"?", "/", fnmatch.FNM_PATHNAME, false}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestQuestionMarkExceptions(t *testing.T) { //When flags include FNM_PERIOD a '?' might not match a '.' character. cases := []struct { pattern string input string flags int want bool }{ {"?", ".", fnmatch.FNM_PERIOD, false}, {"foo?", "foo.", fnmatch.FNM_PERIOD, true}, {"/?", "/.", fnmatch.FNM_PERIOD, true}, {"/?", "/.", fnmatch.FNM_PERIOD | fnmatch.FNM_PATHNAME, false}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestRange(t *testing.T) { azPat := "[a-z]" cases := []struct { pattern string input string flags int want bool }{ // Should match a single character inside its range {azPat, "a", 0, true}, {azPat, "q", 0, true}, {azPat, "z", 0, true}, {"[わ]", "わ", 0, true}, // Should not match characters outside its range {azPat, "-", 0, false}, {azPat, " ", 0, false}, {azPat, "D", 0, false}, {azPat, "é", 0, false}, //Should only match one character {azPat, "ab", 0, false}, {azPat, "", 0, false}, // Should not consume more of the pattern than necessary {azPat + "foo", "afoo", 0, true}, // Should match '-' if it is the first/last character or is // backslash escaped {"[-az]", "-", 0, true}, {"[-az]", "a", 0, true}, {"[-az]", "b", 0, false}, {"[az-]", "-", 0, true}, {"[a\\-z]", "-", 0, true}, {"[a\\-z]", "b", 0, false}, // ignore '\\' when FNM_NOESCAPE is given {"[a\\-z]", "\\", fnmatch.FNM_NOESCAPE, true}, {"[a\\-z]", "-", fnmatch.FNM_NOESCAPE, false}, // Should be negated if starting with ^ or !" {"[^a-z]", "a", 0, false}, {"[!a-z]", "b", 0, false}, {"[!a-z]", "é", 0, true}, {"[!a-z]", "わ", 0, true}, // Still match '-' if following the negation character {"[^-az]", "-", 0, false}, {"[^-az]", "b", 0, true}, // Should support multiple characters/ranges {"[abc]", "a", 0, true}, {"[abc]", "c", 0, true}, {"[abc]", "d", 0, false}, {"[a-cg-z]", "c", 0, true}, {"[a-cg-z]", "h", 0, true}, {"[a-cg-z]", "d", 0, false}, //Should not match '/' when flags is FNM_PATHNAME {"[abc/def]", "/", 0, true}, {"[abc/def]", "/", fnmatch.FNM_PATHNAME, false}, {"[.-0]", "/", 0, true}, // The range [.-0] includes / {"[.-0]", "/", fnmatch.FNM_PATHNAME, false}, // Should normally be case-sensitive {"[a-z]", "A", 0, false}, {"[A-Z]", "a", 0, false}, //Except when FNM_CASEFOLD is given {"[a-z]", "A", fnmatch.FNM_CASEFOLD, true}, {"[A-Z]", "a", fnmatch.FNM_CASEFOLD, true}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestBackSlash(t *testing.T) { cases := []struct { pattern string input string flags int want bool }{ //A backslash should escape the following characters {"\\\\", "\\", 0, true}, {"\\*", "*", 0, true}, {"\\*", "foo", 0, false}, {"\\?", "?", 0, true}, {"\\?", "f", 0, false}, {"\\[a-z]", "[a-z]", 0, true}, {"\\[a-z]", "a", 0, false}, {"\\foo", "foo", 0, true}, {"\\わ", "わ", 0, true}, // Unless FNM_NOESCAPE is given {"\\\\", "\\", fnmatch.FNM_NOESCAPE, false}, {"\\\\", "\\\\", fnmatch.FNM_NOESCAPE, true}, {"\\*", "foo", fnmatch.FNM_NOESCAPE, false}, {"\\*", "\\*", fnmatch.FNM_NOESCAPE, true}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestLiteral(t *testing.T) { cases := []struct { pattern string input string flags int want bool }{ //Literal characters should match themselves {"foo", "foo", 0, true}, {"foo", "foobar", 0, false}, {"foobar", "foo", 0, false}, {"foo", "Foo", 0, false}, {"わたし", "わたし", 0, true}, // And perform case-folding when FNM_CASEFOLD is given {"foo", "FOO", fnmatch.FNM_CASEFOLD, true}, {"FoO", "fOo", fnmatch.FNM_CASEFOLD, true}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestFNMLeadingDir(t *testing.T) { cases := []struct { pattern string input string flags int want bool }{ // FNM_LEADING_DIR should ignore trailing '/*' {"foo", "foo/bar", 0, false}, {"foo", "foo/bar", fnmatch.FNM_LEADING_DIR, true}, {"*", "foo/bar", fnmatch.FNM_PATHNAME, false}, {"*", "foo/bar", fnmatch.FNM_PATHNAME | fnmatch.FNM_LEADING_DIR, true}, } for tc, c := range cases { got := fnmatch.Match(c.pattern, c.input, c.flags) if got != c.want { t.Errorf( "Testcase #%d failed: fnmatch.Match('%s', '%s', %d) should be %v not %v", tc, c.pattern, c.input, c.flags, c.want, got, ) } } } func TestMatchBUG(t *testing.T) { fmt.Fprintf(os.Stderr, "%v\n", fnmatch.Match("abc.go*", "abc.go", fnmatch.FNM_PATHNAME|fnmatch.FNM_PERIOD)) } ================================================ FILE: modules/gcfg/LICENSE ================================================ Copyright (c) 2012 Péter Surányi. Portions Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: modules/gcfg/Makefile ================================================ # General WORKDIR = $(PWD) # Go parameters GOCMD = go GOTEST = $(GOCMD) test # Coverage COVERAGE_REPORT = coverage.out COVERAGE_MODE = count test: $(GOTEST) -race ./... test-coverage: echo "" > $(COVERAGE_REPORT); \ $(GOTEST) -coverprofile=$(COVERAGE_REPORT) -coverpkg=./... -covermode=$(COVERAGE_MODE) ./... ================================================ FILE: modules/gcfg/VERSION ================================================ https://github.com/go-git/gcfg 0429aa6c8f397bcdde3f49cf8704a97217a2529d # ONLY TIDY ================================================ FILE: modules/gcfg/doc.go ================================================ // Package gcfg reads "INI-style" text-based configuration files with // "name=value" pairs grouped into sections (gcfg files). // // This package is still a work in progress; see the sections below for planned // changes. // // # Syntax // // The syntax is based on that used by git config: // http://git-scm.com/docs/git-config#_syntax . // There are some (planned) differences compared to the git config format: // - improve data portability: // - must be encoded in UTF-8 (for now) and must not contain the 0 byte // - include and "path" type is not supported // (path type may be implementable as a user-defined type) // - internationalization // - section and variable names can contain unicode letters, unicode digits // (as defined in http://golang.org/ref/spec#Characters ) and hyphens // (U+002D), starting with a unicode letter // - disallow potentially ambiguous or misleading definitions: // - `[sec.sub]` format is not allowed (deprecated in gitconfig) // - `[sec ""]` is not allowed // - use `[sec]` for section name "sec" and empty subsection name // - (planned) within a single file, definitions must be contiguous for each: // - section: '[secA]' -> '[secB]' -> '[secA]' is an error // - subsection: '[sec "A"]' -> '[sec "B"]' -> '[sec "A"]' is an error // - multivalued variable: 'multi=a' -> 'other=x' -> 'multi=b' is an error // // # Data structure // // The functions in this package read values into a user-defined struct. // Each section corresponds to a struct field in the config struct, and each // variable in a section corresponds to a data field in the section struct. // The mapping of each section or variable name to fields is done either based // on the "gcfg" struct tag or by matching the name of the section or variable, // ignoring case. In the latter case, hyphens '-' in section and variable names // correspond to underscores '_' in field names. // Fields must be exported; to use a section or variable name starting with a // letter that is neither upper- or lower-case, prefix the field name with 'X'. // (See https://code.google.com/p/go/issues/detail?id=5763#c4 .) // // For sections with subsections, the corresponding field in config must be a // map, rather than a struct, with string keys and pointer-to-struct values. // Values for subsection variables are stored in the map with the subsection // name used as the map key. // (Note that unlike section and variable names, subsection names are case // sensitive.) // When using a map, and there is a section with the same section name but // without a subsection name, its values are stored with the empty string used // as the key. // It is possible to provide default values for subsections in the section // "default-" (or by setting values in the corresponding struct // field "Default_"). // // The functions in this package error if config is not a pointer to a struct, // or when a field is not of a suitable type (either a struct or a map with // string keys and pointer-to-struct values). // // # Parsing of values // // The section structs in the config struct may contain single-valued or // multi-valued variables. Variables of unnamed slice type (that is, a type // starting with `[]`) are treated as multi-value; all others (including named // slice types) are treated as single-valued variables. // // Single-valued variables are handled based on the type as follows. // Unnamed pointer types (that is, types starting with `*`) are dereferenced, // and if necessary, a new instance is allocated. // // For types implementing the encoding.TextUnmarshaler interface, the // UnmarshalText method is used to set the value. Implementing this method is // the recommended way for parsing user-defined types. // // For fields of string kind, the value string is assigned to the field, after // unquoting and unescaping as needed. // For fields of bool kind, the field is set to true if the value is "true", // "yes", "on" or "1", and set to false if the value is "false", "no", "off" or // "0", ignoring case. In addition, single-valued bool fields can be specified // with a "blank" value (variable name without equals sign and value); in such // case the value is set to true. // // Predefined integer types [u]int(|8|16|32|64) and big.Int are parsed as // decimal or hexadecimal (if having '0x' prefix). (This is to prevent // unintuitively handling zero-padded numbers as octal.) Other types having // [u]int* as the underlying type, such as os.FileMode and uintptr allow // decimal, hexadecimal, or octal values. // Parsing mode for integer types can be overridden using the struct tag option // ",int=mode" where mode is a combination of the 'd', 'h', and 'o' characters // (each standing for decimal, hexadecimal, and octal, respectively.) // // All other types are parsed using fmt.Sscanf with the "%v" verb. // // For multi-valued variables, each individual value is parsed as above and // appended to the slice. If the first value is specified as a "blank" value // (variable name without equals sign and value), a new slice is allocated; // that is any values previously set in the slice will be ignored. // // The types subpackage for provides helpers for parsing "enum-like" and integer // types. // // # Error handling // // There are 3 types of errors: // // 1. Logic errors: invalid configuration structure. // 2. Data errors (fatal): invalid configuration syntax. // 3. Data errors (warning): data that doesn't belong to any part of the config // structure. // All errors are handled via Go's built-in error convention. Warnings regarding // data errors are wrapped around ErrSyntaxWarning, so that it can be more easily // identified by consumers. This library do not cause panics. // // Data errors cause gcfg to return a non-nil error value. This includes the // case when there are extra unknown key-value definitions in the configuration // data (extra data). // However, in some occasions it is desirable to be able to proceed in // situations when the only data error is that of extra data. // These errors are handled at a different (warning) priority and can be // filtered out programmatically. To ignore extra data warnings, wrap the // gcfg.Read*Into invocation into a call to gcfg.FatalOnly. // // # TODO // // The following is a list of changes under consideration: // - documentation // - self-contained syntax documentation // - more practical examples // - move TODOs to issue tracker (eventually) // - syntax // - reconsider valid escape sequences // (gitconfig doesn't support \r in value, \t in subsection name, etc.) // - reading / parsing gcfg files // - define internal representation structure // - support multiple inputs (readers, strings, files) // - support declaring encoding (?) // - support varying fields sets for subsections (?) // - writing gcfg files // - error handling // - make error context accessible programmatically? // - limit input size? package gcfg ================================================ FILE: modules/gcfg/errors.go ================================================ package gcfg import ( "errors" "fmt" ) var ( ErrSyntaxWarning = errors.New("syntax warning") ErrMissingEscapeSequence = errors.New("missing escape sequence") ErrMissingEndQuote = errors.New("missing end quote") ) // FatalOnly filters the results of a Read*Into invocation and returns only // fatal errors. That is, errors (warnings) indicating data for unknown // sections / variables is ignored. Example invocation: // // err := gcfg.FatalOnly(gcfg.ReadFileInto(&cfg, configFile)) // if err != nil { // ... func FatalOnly(err error) error { for { if err == nil { return nil } err = errors.Unwrap(err) if !errors.Is(err, ErrSyntaxWarning) { return err } } } func newSyntaxWarning(sec, sub, variable string) error { msg := fmt.Sprintf("can't store data in section %q", sec) if sub != "" { msg += fmt.Sprintf(", subsection %q", sub) } if variable != "" { msg += fmt.Sprintf(", variable %q", variable) } return fmt.Errorf("%w: %s", ErrSyntaxWarning, msg) } func joinNonFatal(prev, cur error) (error, bool) { if !errors.Is(cur, ErrSyntaxWarning) { return cur, true } return errors.Join(prev, cur), false } ================================================ FILE: modules/gcfg/errors_test.go ================================================ package gcfg_test import "testing" func TestXxx(t *testing.T) { tests := []struct { name string in error want error }{} for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { // For }) } } ================================================ FILE: modules/gcfg/example_test.go ================================================ package gcfg_test import ( "fmt" "log" "github.com/antgroup/hugescm/modules/gcfg" ) func ExampleReadStringInto() { cfgStr := `; Comment line [section] name=value # comment` cfg := struct { Section struct { Name string } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Println(cfg.Section.Name) // Output: value } func ExampleReadStringInto_bool() { cfgStr := `; Comment line [section] switch=on` cfg := struct { Section struct { Switch bool } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Println(cfg.Section.Switch) // Output: true } func ExampleReadStringInto_hyphens() { cfgStr := `; Comment line [section-name] variable-name=value # comment` cfg := struct { Section_Name struct { Variable_Name string } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Println(cfg.Section_Name.Variable_Name) // Output: value } func ExampleReadStringInto_tags() { cfgStr := `; Comment line [section] var-name=value # comment` cfg := struct { Section struct { FieldName string `gcfg:"var-name"` } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Println(cfg.Section.FieldName) // Output: value } func ExampleReadStringInto_subsections() { cfgStr := `; Comment line [profile "A"] color = white [profile "B"] color = black ` cfg := struct { Profile map[string]*struct { Color string } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Printf("%s %s\n", cfg.Profile["A"].Color, cfg.Profile["B"].Color) // Output: white black } func ExampleReadStringInto_multivalue() { cfgStr := `; Comment line [section] multi=value1 multi=value2` cfg := struct { Section struct { Multi []string } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Println(cfg.Section.Multi) // Output: [value1 value2] } func ExampleReadStringInto_unicode() { cfgStr := `; Comment line [甲] 乙=丙 # comment` cfg := struct { X甲 struct { X乙 string } }{} err := gcfg.ReadStringInto(&cfg, cfgStr) if err != nil { log.Fatalf("Failed to parse gcfg data: %s", err) } fmt.Println(cfg.X甲.X乙) // Output: 丙 } ================================================ FILE: modules/gcfg/issues_test.go ================================================ package gcfg import ( "fmt" "math/big" "strings" "testing" ) type Config1 struct { Section struct { Int int BigInt big.Int } } var testsIssue1 = []struct { cfg string typename string }{ {"[section]\nint=X", "int"}, {"[section]\nint=", "int"}, {"[section]\nint=1A", "int"}, {"[section]\nbigint=X", "big.Int"}, {"[section]\nbigint=", "big.Int"}, {"[section]\nbigint=1A", "big.Int"}, } // Value parse error should: // - include plain type name // - not include reflect internals func TestIssue1(t *testing.T) { for i, tt := range testsIssue1 { var c Config1 err := ReadStringInto(&c, tt.cfg) switch { case err == nil: t.Errorf("%d fail: got ok; wanted error", i) case !strings.Contains(err.Error(), tt.typename): t.Errorf("%d fail: error message doesn't contain type name %q: %v", i, tt.typename, err) case strings.Contains(err.Error(), "reflect"): t.Errorf("%d fail: error message includes reflect internals: %v", i, err) default: t.Logf("%d pass: %v", i, err) } } } type confIssue2 struct{ Main struct{ Foo string } } var testsIssue2 = []readtest{ {"[main]\n;\nfoo = bar\n", &confIssue2{struct{ Foo string }{"bar"}}, true}, {"[main]\r\n;\r\nfoo = bar\r\n", &confIssue2{struct{ Foo string }{"bar"}}, true}, } func TestIssue2(t *testing.T) { for i, tt := range testsIssue2 { id := fmt.Sprintf("issue2:%d", i) testRead(t, id, tt) } } ================================================ FILE: modules/gcfg/read.go ================================================ package gcfg import ( "fmt" "io" "os" "strings" "github.com/antgroup/hugescm/modules/gcfg/scanner" "github.com/antgroup/hugescm/modules/gcfg/token" ) var unescape = map[rune]rune{'\\': '\\', '"': '"', 'n': '\n', 't': '\t', 'b': '\b', '\n': '\n'} // no error: invalid literals should be caught by scanner func unquote(s string) (string, error) { u, q, esc := make([]rune, 0, len(s)), false, false for _, c := range s { if esc { uc, ok := unescape[c] switch { case ok: u = append(u, uc) fallthrough case !q && c == '\n': esc = false continue } return "", ErrMissingEscapeSequence } switch c { case '"': q = !q case '\\': esc = true default: u = append(u, c) } } if q { return "", ErrMissingEndQuote } if esc { return "", ErrMissingEscapeSequence } return string(u), nil } func read(callback func(string, string, string, string, bool) error, fset *token.FileSet, file *token.File, src []byte) error { // var s scanner.Scanner var errs scanner.ErrorList _ = s.Init(file, src, func(p token.Position, m string) { errs.Add(p, m) }, 0) sect, sectsub := "", "" pos, tok, lit, err := s.Scan() errfn := func(msg string) error { return fmt.Errorf("%s: %s", fset.Position(pos), msg) } if err != nil { return err } var accErr error for { if errs.Len() > 0 { if err, fatal := joinNonFatal(accErr, errs.Err()); fatal { return err } } switch tok { case token.EOF: return nil case token.EOL, token.COMMENT: pos, tok, lit, err = s.Scan() if err != nil { return err } case token.LBRACK: pos, tok, lit, err = s.Scan() if err != nil { return err } if errs.Len() > 0 { if err, fatal := joinNonFatal(accErr, errs.Err()); fatal { return err } } if tok != token.IDENT { if err, fatal := joinNonFatal(accErr, errfn("expected section name")); fatal { return err } } sect, sectsub = lit, "" pos, tok, lit, err = s.Scan() if err != nil { return err } if errs.Len() > 0 { if err, fatal := joinNonFatal(accErr, errs.Err()); fatal { return err } } if tok == token.STRING { ss, err := unquote(lit) if err != nil { return err } sectsub = ss _, tok, _, err = s.Scan() if err != nil { return err } if errs.Len() > 0 { if err, fatal := joinNonFatal(accErr, errs.Err()); fatal { return err } } } if tok != token.RBRACK { if err, fatal := joinNonFatal(accErr, errfn("expected right bracket")); fatal { return err } } pos, tok, lit, err = s.Scan() if err != nil { return err } if tok != token.EOL && tok != token.EOF && tok != token.COMMENT { if err, fatal := joinNonFatal(accErr, errfn("expected EOL, EOF, or comment")); fatal { return err } } // If a section/subsection header was found, ensure a // container object is created, even if there are no // variables further down. err := callback(sect, sectsub, "", "", true) if err != nil { return err } case token.IDENT: if sect == "" { if err, fatal := joinNonFatal(accErr, errfn("expected section header")); fatal { return err } } n := lit pos, tok, lit, err = s.Scan() if err != nil { return err } if errs.Len() > 0 { return errs.Err() } blank, v := tok == token.EOF || tok == token.EOL || tok == token.COMMENT, "" if !blank { if tok != token.ASSIGN { if err, fatal := joinNonFatal(accErr, errfn("expected '='")); fatal { return err } } pos, tok, lit, err = s.Scan() if err != nil { return err } if errs.Len() > 0 { if err, fatal := joinNonFatal(accErr, errs.Err()); fatal { return err } } if tok != token.STRING { if err, fatal := joinNonFatal(accErr, errfn("expected value")); fatal { return err } } unq, err := unquote(lit) if err != nil { return err } v = unq pos, tok, lit, err = s.Scan() if err != nil { return err } if errs.Len() > 0 { if err, fatal := joinNonFatal(accErr, errs.Err()); fatal { return err } } if tok != token.EOL && tok != token.EOF && tok != token.COMMENT { if err, fatal := joinNonFatal(accErr, errfn("expected EOL, EOF, or comment")); fatal { return err } } } err := callback(sect, sectsub, n, v, blank) if err != nil { return err } default: if sect == "" { if err, fatal := joinNonFatal(accErr, errfn("expected section header")); fatal { return err } } if err, fatal := joinNonFatal(accErr, errfn("expected section header or variable declaration")); fatal { return err } } } } func readInto(config any, fset *token.FileSet, file *token.File, src []byte) error { // firstPassCallback := func(s string, ss string, k string, v string, bv bool) error { return set(config, s, ss, k, v, bv, false) } err := read(firstPassCallback, fset, file, src) if err != nil { return err } secondPassCallback := func(s string, ss string, k string, v string, bv bool) error { return set(config, s, ss, k, v, bv, true) } return read(secondPassCallback, fset, file, src) } // ReadWithCallback reads gcfg formatted data from reader and calls // callback with each section and option found. // // Callback is called with section, subsection, option key, option value // and blank value flag as arguments. // // When a section is found, callback is called with nil subsection, option key // and option value. // // When a subsection is found, callback is called with nil option key and // option value. // // If blank value flag is true, it means that the value was not set for an option // (as opposed to set to empty string). // // If callback returns an error, ReadWithCallback terminates with an error too. func ReadWithCallback(reader io.Reader, callback func(string, string, string, string, bool) error) error { src, err := io.ReadAll(reader) if err != nil { return err } fset := token.NewFileSet() file, err := fset.AddFile("", fset.Base(), len(src)) if err != nil { return err } return read(callback, fset, file, src) } // ReadInto reads gcfg formatted data from reader and sets the values into the // corresponding fields in config. func ReadInto(config any, reader io.Reader) error { src, err := io.ReadAll(reader) if err != nil { return err } fset := token.NewFileSet() file, err := fset.AddFile("", fset.Base(), len(src)) if err != nil { return err } return readInto(config, fset, file, src) } // ReadStringInto reads gcfg formatted data from str and sets the values into // the corresponding fields in config. func ReadStringInto(config any, str string) error { r := strings.NewReader(str) return ReadInto(config, r) } // ReadFileInto reads gcfg formatted data from the file filename and sets the // values into the corresponding fields in config. func ReadFileInto(config any, filename string) error { f, err := os.Open(filename) if err != nil { return err } defer f.Close() // nolint src, err := io.ReadAll(f) if err != nil { return err } fset := token.NewFileSet() file, err := fset.AddFile(filename, fset.Base(), len(src)) if err != nil { return err } return readInto(config, fset, file, src) } ================================================ FILE: modules/gcfg/read_test.go ================================================ package gcfg import ( "bytes" "encoding" "fmt" "math/big" "os" "reflect" "strconv" "testing" "errors" ) const ( // 64 spaces sp64 = " " // 512 spaces sp512 = sp64 + sp64 + sp64 + sp64 + sp64 + sp64 + sp64 + sp64 // 4096 spaces sp4096 = sp512 + sp512 + sp512 + sp512 + sp512 + sp512 + sp512 + sp512 ) type cBasic struct { Section cBasicS1 Hyphen_In_Section cBasicS2 unexported cBasicS1 // nolint Exported cBasicS3 TagName cBasicS1 `gcfg:"tag-name"` } type cBasicS1 struct { Name string Int int PName *string } type cBasicS2 struct { Hyphen_In_Name string } type cBasicS3 struct { unexported string // nolint } type nonMulti []string type unmarshalable string func (u *unmarshalable) UnmarshalText(text []byte) error { s := string(text) if s == "error" { return fmt.Errorf("%s", s) } *u = unmarshalable(s) return nil } var _ encoding.TextUnmarshaler = new(unmarshalable) type cUni struct { X甲 cUniS1 XSection cUniS2 } type cUniS1 struct { X乙 string } type cUniS2 struct { XName string } type cMulti struct { M1 cMultiS1 M2 cMultiS2 M3 cMultiS3 } type cMultiS1 struct{ Multi []string } type cMultiS2 struct{ NonMulti nonMulti } type cMultiS3 struct{ PMulti *[]string } type cSubs struct{ Sub map[string]*cSubsS1 } type cSubsS1 struct{ Name string } type cBool struct{ Section cBoolS1 } type cBoolS1 struct{ Bool bool } type cTxUnm struct{ Section cTxUnmS1 } type cTxUnmS1 struct{ Name unmarshalable } type cNum struct { N1 cNumS1 N2 cNumS2 N3 cNumS3 } type cNumS1 struct { Int int IntDHO int `gcfg:",int=dho"` Big *big.Int } type cNumS2 struct { MultiInt []int MultiBig []*big.Int } type cNumS3 struct{ FileMode os.FileMode } type readtest struct { gcfg string exp any ok bool } func newString(s string) *string { p := new(string) *p = s return p } func newStringSlice(s ...string) *[]string { p := new([]string) *p = s return p } var readtests = []struct { group string tests []readtest }{{"scanning", []readtest{ {"[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, // hyphen in name {"[hyphen-in-section]\nhyphen-in-name=value", &cBasic{Hyphen_In_Section: cBasicS2{Hyphen_In_Name: "value"}}, true}, // quoted string value {"[section]\nname=\"\"", &cBasic{Section: cBasicS1{Name: ""}}, true}, {"[section]\nname=\" \"", &cBasic{Section: cBasicS1{Name: " "}}, true}, {"[section]\nname=\"value\"", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname=\" value \"", &cBasic{Section: cBasicS1{Name: " value "}}, true}, {"\n[section]\nname=\"va ; lue\"", &cBasic{Section: cBasicS1{Name: "va ; lue"}}, true}, {"[section]\nname=\"val\" \"ue\"", &cBasic{Section: cBasicS1{Name: "val ue"}}, true}, {"[section]\nname=\"value", &cBasic{}, false}, // escape sequences {"[section]\nname=\"va\\\\lue\"", &cBasic{Section: cBasicS1{Name: "va\\lue"}}, true}, {"[section]\nname=\"va\\\"lue\"", &cBasic{Section: cBasicS1{Name: "va\"lue"}}, true}, {"[section]\nname=\"va\\nlue\"", &cBasic{Section: cBasicS1{Name: "va\nlue"}}, true}, {"[section]\nname=\"va\\tlue\"", &cBasic{Section: cBasicS1{Name: "va\tlue"}}, true}, {"[section]\nname=x:\\\\path\\\\", &cBasic{Section: cBasicS1{Name: "x:\\path\\"}}, true}, {"[section]\nname=\\b", &cBasic{Section: cBasicS1{Name: "\b"}}, true}, {"\n[section]\nname=\\", &cBasic{}, false}, {"\n[section]\nname=\\a", &cBasic{}, false}, {"\n[section]\nname=\"val\\a\"", &cBasic{}, false}, {"\n[section]\nname=val\\", &cBasic{}, false}, // {"\n[sub \"A\\\n\"]\nname=value", &cSubs{}, false}, {"\n[sub \"A\\\t\"]\nname=value", &cSubs{}, false}, // broken line // {"[section]\nname=value \\\n value", &cBasic{Section: cBasicS1{Name: "value value"}}, true}, // {"[section]\nname=\"value \\\n value\"", &cBasic{}, false}, }}, {"scanning:whitespace", []readtest{ {" \n[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {" [section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\t[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[ section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section ]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\n name=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname =value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname= value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname=value ", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\r\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\r\nname=value\r\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {";cmnt\r\n[section]\r\nname=value\r\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, // long lines {sp4096 + "[section]\nname=value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[" + sp4096 + "section]\nname=value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section" + sp4096 + "]\nname=value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]" + sp4096 + "\nname=value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\n" + sp4096 + "name=value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname" + sp4096 + "=value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname=" + sp4096 + "value\n", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname=value\n" + sp4096, &cBasic{Section: cBasicS1{Name: "value"}}, true}, }}, {"scanning:comments", []readtest{ {"; cmnt\n[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"# cmnt\n[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {" ; cmnt\n[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\t; cmnt\n[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section]; cmnt\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section] ; cmnt\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section]\nname=value; cmnt", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section]\nname=value ; cmnt", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section]\nname=\"value\" ; cmnt", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section]\nname=value ; \"cmnt", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"\n[section]\nname=\"va ; lue\" ; cmnt", &cBasic{Section: cBasicS1{Name: "va ; lue"}}, true}, {"\n[section]\nname=; cmnt", &cBasic{Section: cBasicS1{Name: ""}}, true}, }}, {"scanning:subsections", []readtest{ {"\n[sub \"A\"]\nname=value", &cSubs{map[string]*cSubsS1{"A": {"value"}}}, true}, {"\n[sub \"b\"]\nname=value", &cSubs{map[string]*cSubsS1{"b": {"value"}}}, true}, {"\n[sub \"A\\\\\"]\nname=value", &cSubs{map[string]*cSubsS1{"A\\": {"value"}}}, true}, {"\n[sub \"A\\\"\"]\nname=value", &cSubs{map[string]*cSubsS1{"A\"": {"value"}}}, true}, }}, {"syntax", []readtest{ // invalid line {"\n[section]\n=", &cBasic{}, false}, // no section {"name=value", &cBasic{}, false}, // empty section {"\n[]\nname=value", &cBasic{}, false}, // empty subsection name {"\n[sub \"\"]\nname=value", &cSubs{Sub: map[string]*cSubsS1{"": {"value"}}}, true}, }}, {"setting", []readtest{ {"[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, // pointer {"[section]", &cBasic{Section: cBasicS1{PName: nil}}, true}, {"[section]\npname=value", &cBasic{Section: cBasicS1{PName: newString("value")}}, true}, {"[m3]", &cMulti{M3: cMultiS3{PMulti: nil}}, true}, {"[m3]\npmulti", &cMulti{M3: cMultiS3{PMulti: newStringSlice()}}, true}, {"[m3]\npmulti=value", &cMulti{M3: cMultiS3{PMulti: newStringSlice("value")}}, true}, {"[m3]\npmulti=value1\npmulti=value2", &cMulti{M3: cMultiS3{PMulti: newStringSlice("value1", "value2")}}, true}, // section name not matched {"\n[nonexistent]\nname=value", &cBasic{}, false}, // subsection name not matched {"\n[section \"nonexistent\"]\nname=value", &cBasic{}, false}, // variable name not matched {"\n[section]\nnonexistent=value", &cBasic{}, false}, // hyphen in name {"[hyphen-in-section]\nhyphen-in-name=value", &cBasic{Hyphen_In_Section: cBasicS2{Hyphen_In_Name: "value"}}, true}, // ignore unexported fields {"[unexported]\nname=value", &cBasic{}, false}, {"[exported]\nunexported=value", &cBasic{}, false}, // 'X' prefix for non-upper/lower-case letters {"[甲]\n乙=丙", &cUni{X甲: cUniS1{X乙: "丙"}}, true}, //{"[section]\nxname=value", &cBasic{XSection: cBasicS4{XName: "value"}}, false}, //{"[xsection]\nname=value", &cBasic{XSection: cBasicS4{XName: "value"}}, false}, // name specified as struct tag {"[tag-name]\nname=value", &cBasic{TagName: cBasicS1{Name: "value"}}, true}, // empty subsections {"\n[sub \"A\"]\n[sub \"B\"]", &cSubs{map[string]*cSubsS1{"A": {}, "B": {}}}, true}, }}, {"multivalue", []readtest{ // unnamed slice type: treat as multi-value {"\n[m1]", &cMulti{M1: cMultiS1{}}, true}, {"\n[m1]\nmulti=value", &cMulti{M1: cMultiS1{[]string{"value"}}}, true}, {"\n[m1]\nmulti=value1\nmulti=value2", &cMulti{M1: cMultiS1{[]string{"value1", "value2"}}}, true}, // "blank" empties multi-valued slice -- here same result as above {"\n[m1]\nmulti\nmulti=value1\nmulti=value2", &cMulti{M1: cMultiS1{[]string{"value1", "value2"}}}, true}, // named slice type: do not treat as multi-value {"\n[m2]", &cMulti{}, true}, {"\n[m2]\nmulti=value", &cMulti{}, false}, {"\n[m2]\nmulti=value1\nmulti=value2", &cMulti{}, false}, }}, {"type:string", []readtest{ {"[section]\nname=value", &cBasic{Section: cBasicS1{Name: "value"}}, true}, {"[section]\nname=", &cBasic{Section: cBasicS1{Name: ""}}, true}, }}, {"type:bool", []readtest{ // explicit values {"[section]\nbool=true", &cBool{cBoolS1{true}}, true}, {"[section]\nbool=yes", &cBool{cBoolS1{true}}, true}, {"[section]\nbool=on", &cBool{cBoolS1{true}}, true}, {"[section]\nbool=1", &cBool{cBoolS1{true}}, true}, {"[section]\nbool=tRuE", &cBool{cBoolS1{true}}, true}, {"[section]\nbool=false", &cBool{cBoolS1{false}}, true}, {"[section]\nbool=no", &cBool{cBoolS1{false}}, true}, {"[section]\nbool=off", &cBool{cBoolS1{false}}, true}, {"[section]\nbool=0", &cBool{cBoolS1{false}}, true}, {"[section]\nbool=NO", &cBool{cBoolS1{false}}, true}, // "blank" value handled as true {"[section]\nbool", &cBool{cBoolS1{true}}, true}, // bool parse errors {"[section]\nbool=maybe", &cBool{}, false}, {"[section]\nbool=t", &cBool{}, false}, {"[section]\nbool=truer", &cBool{}, false}, {"[section]\nbool=2", &cBool{}, false}, {"[section]\nbool=-1", &cBool{}, false}, }}, {"type:numeric", []readtest{ {"[section]\nint=0", &cBasic{Section: cBasicS1{Int: 0}}, true}, {"[section]\nint=1", &cBasic{Section: cBasicS1{Int: 1}}, true}, {"[section]\nint=-1", &cBasic{Section: cBasicS1{Int: -1}}, true}, {"[section]\nint=0.2", &cBasic{}, false}, {"[section]\nint=1e3", &cBasic{}, false}, // primitive [u]int(|8|16|32|64) and big.Int is parsed as dec or hex (not octal) {"[n1]\nint=010", &cNum{N1: cNumS1{Int: 10}}, true}, {"[n1]\nint=0x10", &cNum{N1: cNumS1{Int: 0x10}}, true}, {"[n1]\nbig=1", &cNum{N1: cNumS1{Big: big.NewInt(1)}}, true}, {"[n1]\nbig=0x10", &cNum{N1: cNumS1{Big: big.NewInt(0x10)}}, true}, {"[n1]\nbig=010", &cNum{N1: cNumS1{Big: big.NewInt(10)}}, true}, {"[n2]\nmultiint=010", &cNum{N2: cNumS2{MultiInt: []int{10}}}, true}, {"[n2]\nmultibig=010", &cNum{N2: cNumS2{MultiBig: []*big.Int{big.NewInt(10)}}}, true}, // set parse mode for int types via struct tag {"[n1]\nintdho=010", &cNum{N1: cNumS1{IntDHO: 010}}, true}, // octal allowed for named type {"[n3]\nfilemode=0777", &cNum{N3: cNumS3{FileMode: 0777}}, true}, }}, {"type:textUnmarshaler", []readtest{ {"[section]\nname=value", &cTxUnm{Section: cTxUnmS1{Name: "value"}}, true}, {"[section]\nname=error", &cTxUnm{}, false}, }}, } func TestReadStringInto(t *testing.T) { for _, tg := range readtests { for i, tt := range tg.tests { id := fmt.Sprintf("%s:%d", tg.group, i) t.Run(id+tt.gcfg, func(t *testing.T) { testRead(t, id, tt) }) } } } func TestReadStringIntoMultiBlankPreset(t *testing.T) { tt := readtest{"\n[m1]\nmulti\nmulti=value1\nmulti=value2", &cMulti{M1: cMultiS1{[]string{"value1", "value2"}}}, true} cfg := &cMulti{M1: cMultiS1{[]string{"preset1", "preset2"}}} testReadInto(t, "multi:blank", tt, cfg) } func testRead(t *testing.T, id string, tt readtest) { // get the type of the expected result restyp := reflect.TypeOf(tt.exp).Elem() // create a new instance to hold the actual result res := reflect.New(restyp).Interface() testReadInto(t, id, tt, res) } func testReadInto(t *testing.T, id string, tt readtest, res any) { err := ReadStringInto(res, tt.gcfg) if tt.ok { if err != nil { t.Errorf("%s fail: got error %v, wanted ok", id, err) return } else if !reflect.DeepEqual(res, tt.exp) { t.Errorf("%s fail: got value %#v, wanted value %#v", id, res, tt.exp) return } if !testing.Short() { t.Logf("%s pass: got value %#v", id, res) } } else { // !tt.ok if err == nil { t.Errorf("%s fail: got value %#v, wanted error", id, res) return } if !testing.Short() { t.Logf("%s pass: got error %v", id, err) } } } func TestReadFileInto(t *testing.T) { res := &struct{ Section struct{ Name string } }{} err := ReadFileInto(res, "testdata/gcfg_test.gcfg") if err != nil { t.Error(err) } if res.Section.Name != "value" { t.Errorf("got %q, wanted %q", res.Section.Name, "value") } } func TestReadFileIntoUnicode(t *testing.T) { res := &struct{ X甲 struct{ X乙 string } }{} err := ReadFileInto(res, "testdata/gcfg_unicode_test.gcfg") if err != nil { t.Error(err) } if res.X甲.X乙 != "丙" { t.Errorf("got %q, wanted %q", res.X甲.X乙, "丙") } } func TestReadStringIntoSubsectDefaults(t *testing.T) { type subsect struct { Color string Orientation string } res := &struct { Default_Profile subsect Profile map[string]*subsect }{Default_Profile: subsect{Color: "green"}} cfg := ` [profile "one"] orientation = left` err := ReadStringInto(res, cfg) if err != nil { t.Error(err) } if res.Profile["one"].Color != "green" { t.Errorf("got %q; want %q", res.Profile["one"].Color, "green") } } func TestReadStringIntoExtraData(t *testing.T) { res := &struct { Section struct { Name string } }{} cfg := ` [section] name = value name2 = value2` err := FatalOnly(ReadStringInto(res, cfg)) if err != nil { t.Errorf("unexpected error: %v", err) } if res.Section.Name != "value" { t.Errorf("res.Section.Name=%q; want %q", res.Section.Name, "value") } } func TestReadWithCallback(t *testing.T) { results := [][]string{} cb := func(s string, ss string, k string, v string, bv bool) error { results = append(results, []string{s, ss, k, v, strconv.FormatBool(bv)}) return nil } text := ` [sect1] key1=value1 [sect1 "subsect1"] key2=value2 key3=value3 key4 key5= [sect1 "subsect2"] [sect2] [sect3] foo = "!f(){ \ echo hello; \ };f" ` expected := [][]string{ {"sect1", "", "", "", "true"}, {"sect1", "", "key1", "value1", "false"}, {"sect1", "subsect1", "", "", "true"}, {"sect1", "subsect1", "key2", "value2", "false"}, {"sect1", "subsect1", "key3", "value3", "false"}, {"sect1", "subsect1", "key4", "", "true"}, {"sect1", "subsect1", "key5", "", "false"}, {"sect1", "subsect2", "", "", "true"}, {"sect2", "", "", "", "true"}, {"sect3", "", "", "", "true"}, {"sect3", "", "foo", "!f(){ \n\techo hello; \n\t};f", "false"}, } err := ReadWithCallback(bytes.NewReader([]byte(text)), cb) if err != nil { t.Error(err) } if !reflect.DeepEqual(results, expected) { t.Errorf("expected %+v, got %+v", expected, results) } i := 0 expectedErr := errors.New("FATAL ERROR") results = [][]string{} cbWithError := func(s string, ss string, k string, v string, bv bool) error { results = append(results, []string{s, ss, k, v, strconv.FormatBool(bv)}) i += 1 if i == 3 { return expectedErr } return nil } err = ReadWithCallback(bytes.NewReader([]byte(text)), cbWithError) if !errors.Is(err, expectedErr) { t.Errorf("expected error: %+v", err) } if !reflect.DeepEqual(results, expected[:3]) { t.Errorf("expected %+v, got %+v", expected, results[:3]) } } func TestReadWithCallback_WithError(t *testing.T) { results := [][]string{} cb := func(s string, ss string, k string, v string, bv bool) error { results = append(results, []string{s, ss, k, v, strconv.FormatBool(bv)}) return nil } text := ` [sect1] key1=value1 [sect1 "subsect1"] key2=value2 key3=value3 key4 key5= [sect1 "subsect2"] [sect2] ` expected := [][]string{ {"sect1", "", "", "", "true"}, {"sect1", "", "key1", "value1", "false"}, {"sect1", "subsect1", "", "", "true"}, {"sect1", "subsect1", "key2", "value2", "false"}, {"sect1", "subsect1", "key3", "value3", "false"}, {"sect1", "subsect1", "key4", "", "true"}, {"sect1", "subsect1", "key5", "", "false"}, {"sect1", "subsect2", "", "", "true"}, {"sect2", "", "", "", "true"}, } err := ReadWithCallback(bytes.NewReader([]byte(text)), cb) if err != nil { t.Error(err) } if !reflect.DeepEqual(results, expected) { t.Errorf("expected %+v, got %+v", expected, results) } } ================================================ FILE: modules/gcfg/scanner/errors.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package scanner import ( "errors" "fmt" "io" "sort" "github.com/antgroup/hugescm/modules/gcfg/token" ) // In an ErrorList, an error is represented by an *Error. // The position Pos, if valid, points to the beginning of // the offending token, and the error condition is described // by Msg. type Error struct { Pos token.Position Msg string } // Error implements the error interface. func (e Error) Error() string { if e.Pos.Filename != "" || e.Pos.IsValid() { // don't print "" // TODO(gri) reconsider the semantics of Position.IsValid return e.Pos.String() + ": " + e.Msg } return e.Msg } // ErrorList is a list of *Errors. // The zero value for an ErrorList is an empty ErrorList ready to use. type ErrorList []*Error // Add adds an Error with given position and error message to an ErrorList. func (p *ErrorList) Add(pos token.Position, msg string) { *p = append(*p, &Error{pos, msg}) } // Reset resets an ErrorList to no errors. func (p *ErrorList) Reset() { *p = (*p)[0:0] } // ErrorList implements the sort Interface. func (p ErrorList) Len() int { return len(p) } func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p ErrorList) Less(i, j int) bool { e := &p[i].Pos f := &p[j].Pos if e.Filename < f.Filename { return true } if e.Filename == f.Filename { return e.Offset < f.Offset } return false } // Sort sorts an ErrorList. *Error entries are sorted by position, // other errors are sorted by error message, and before any *Error // entry. func (p ErrorList) Sort() { sort.Sort(p) } // RemoveMultiples sorts an ErrorList and removes all but the first error per line. func (p *ErrorList) RemoveMultiples() { sort.Sort(p) var last token.Position // initial last.Line is != any legal error line i := 0 for _, e := range *p { if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line { last = e.Pos (*p)[i] = e i++ } } (*p) = (*p)[0:i] } // An ErrorList implements the error interface. func (p ErrorList) Error() string { switch len(p) { case 0: return "no errors" case 1: return p[0].Error() } return fmt.Sprintf("%s (and %d more errors)", p[0], len(p)-1) } // Err returns an error equivalent to this error list. // If the list is empty, Err returns nil. func (p ErrorList) Err() error { if len(p) == 0 { return nil } return p } // PrintError is a utility function that prints a list of errors to w, // one error per line, if the err parameter is an ErrorList. Otherwise // it prints the err string. func PrintError(w io.Writer, err error) { if list, ok := errors.AsType[ErrorList](err); ok { for _, e := range list { _, _ = fmt.Fprintf(w, "%s\n", e) } return } if err != nil { _, _ = fmt.Fprintf(w, "%s\n", err) } } ================================================ FILE: modules/gcfg/scanner/example_test.go ================================================ // Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package scanner_test import ( "fmt" "log" "github.com/antgroup/hugescm/modules/gcfg/scanner" "github.com/antgroup/hugescm/modules/gcfg/token" ) func ExampleScanner_Scan() { // src is the input that we want to tokenize. src := []byte(`[profile "A"] color = blue ; Comment`) // Initialize the scanner. var s scanner.Scanner fset := token.NewFileSet() // positions are relative to fset file, err := fset.AddFile("", fset.Base(), len(src)) // register input "file" if err != nil { log.Fatalf("failed to add file: %v", err) } err = s.Init(file, src, nil /* no error handler */, scanner.ScanComments) if err != nil { log.Fatalf("failed to initialize scanner: %v", err) } // Repeated calls to Scan yield the token sequence found in the input. for { pos, tok, lit, err := s.Scan() if err != nil { log.Fatalf("failed to scan: %v", err) } if tok == token.EOF { break } fmt.Printf("%s\t%q\t%q\n", fset.Position(pos), tok, lit) } // output: // 1:1 "[" "" // 1:2 "IDENT" "profile" // 1:10 "STRING" "\"A\"" // 1:13 "]" "" // 1:14 "\n" "" // 2:1 "IDENT" "color" // 2:7 "=" "" // 2:9 "STRING" "blue" // 2:14 "COMMENT" "; Comment" } ================================================ FILE: modules/gcfg/scanner/scanner.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package scanner implements a scanner for gcfg configuration text. // It takes a []byte as source which can then be tokenized // through repeated calls to the Scan method. // // Note that the API for the scanner package may change to accommodate new // features or implementation changes in gcfg. package scanner import ( "errors" "fmt" "path/filepath" "unicode" "unicode/utf8" "github.com/antgroup/hugescm/modules/gcfg/token" ) var ErrSourceLenAndSizeMismatch = errors.New("source length and file size mismatch") // An ErrorHandler may be provided to Scanner.Init. If a syntax error is // encountered and a handler was installed, the handler is called with a // position and an error message. The position points to the beginning of // the offending token. type ErrorHandler func(pos token.Position, msg string) // A Scanner holds the scanner's internal state while processing // a given text. It can be allocated as part of another data // structure but must be initialized via Init before use. type Scanner struct { // immutable state file *token.File // source file handle dir string // directory portion of file.Name() src []byte // source err ErrorHandler // error reporting; or nil mode Mode // scanning mode // scanning state ch rune // current character offset int // character offset rdOffset int // reading offset (position after current character) lineOffset int // current line offset nextVal bool // next token is expected to be a value // public state - ok to modify ErrorCount int // number of errors encountered } // Read the next Unicode char into s.ch. // s.ch < 0 means end-of-file. func (s *Scanner) next() error { if s.rdOffset < len(s.src) { s.offset = s.rdOffset if s.ch == '\n' { s.lineOffset = s.offset s.file.AddLine(s.offset) } r, w := rune(s.src[s.rdOffset]), 1 switch { case r == 0: err := s.error(s.offset, "illegal character NUL") if err != nil { return err } case r >= 0x80: // not ASCII r, w = utf8.DecodeRune(s.src[s.rdOffset:]) if r == utf8.RuneError && w == 1 { err := s.error(s.offset, "illegal UTF-8 encoding") if err != nil { return err } } } s.rdOffset += w s.ch = r } else { s.offset = len(s.src) if s.ch == '\n' { s.lineOffset = s.offset s.file.AddLine(s.offset) } s.ch = -1 // eof } return nil } // A mode value is a set of flags (or 0). // They control scanner behavior. type Mode uint const ( ScanComments Mode = 1 << iota // return comments as COMMENT tokens ) // Init prepares the scanner s to tokenize the text src by setting the // scanner at the beginning of src. The scanner uses the file set file // for position information and it adds line information for each line. // It is ok to re-use the same file when re-scanning the same file as // line information which is already present is ignored. Init returns // ErrSourceLenAndSizeMismatch if the file size does not match the src // size. // // Calls to Scan will invoke the error handler err if they encounter a // syntax error and err is not nil. Also, for each error encountered, // the Scanner field ErrorCount is incremented by one. The mode parameter // determines how comments are handled. // // Note that Init may call err if there is an error in the first character // of the file. func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) error { // Explicitly initialize all fields since a scanner may be reused. if file.Size() != len(src) { return fmt.Errorf("%w: file size (%d) src len (%d)", ErrSourceLenAndSizeMismatch, file.Size(), len(src)) } s.file = file s.dir, _ = filepath.Split(file.Name()) s.src = src s.err = err s.mode = mode s.ch = ' ' s.offset = 0 s.rdOffset = 0 s.lineOffset = 0 s.ErrorCount = 0 s.nextVal = false _ = s.next() return nil } func (s *Scanner) error(offs int, msg string) error { if s.err != nil { pos, err := s.file.Pos(offs) if err != nil { return err } position, err := s.file.Position(pos) if err != nil { return err } s.err(position, msg) } s.ErrorCount++ return nil } func (s *Scanner) scanComment() string { // initial [;#] already consumed offs := s.offset - 1 // position of initial [;#] for s.ch != '\n' && s.ch >= 0 { _ = s.next() } return string(s.src[offs:s.offset]) } func isLetter(ch rune) bool { return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch >= 0x80 && unicode.IsLetter(ch) } func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) } func (s *Scanner) scanIdentifier() string { offs := s.offset for isLetter(s.ch) || isDigit(s.ch) || s.ch == '-' { _ = s.next() } return string(s.src[offs:s.offset]) } // val indicate if we are scanning a value (vs a header) func (s *Scanner) scanEscape(val bool) error { offs := s.offset ch := s.ch _ = s.next() // always make progress switch ch { case '\\', '"', '\n': // ok case 'n', 't', 'b': if val { break // ok } fallthrough default: err := s.error(offs, "unknown escape sequence") if err != nil { return err } } return nil } func (s *Scanner) scanString() (string, error) { // '"' opening already consumed offs := s.offset - 1 for s.ch != '"' { ch := s.ch _ = s.next() if ch == '\n' || ch < 0 { err := s.error(offs, "string not terminated") if err != nil { return "", err } break } if ch == '\\' { _ = s.scanEscape(false) } } err := s.next() if err != nil { return "", err } return string(s.src[offs:s.offset]), nil } func stripCR(b []byte) []byte { c := make([]byte, len(b)) i := 0 for _, ch := range b { if ch != '\r' { c[i] = ch i++ } } return c[:i] } func (s *Scanner) scanValString() (string, error) { offs := s.offset hasCR := false end := offs inQuote := false loop: for inQuote || s.ch >= 0 && s.ch != '\n' && s.ch != ';' && s.ch != '#' { ch := s.ch _ = s.next() switch { case inQuote && ch == '\\': _ = s.scanEscape(true) case !inQuote && ch == '\\': if s.ch == '\r' { hasCR = true _ = s.next() } if s.ch != '\n' { _ = s.scanEscape(true) } else { _ = s.next() } case ch == '"': inQuote = !inQuote case ch == '\r': hasCR = true case ch < 0 || inQuote && ch == '\n': err := s.error(offs, "string not terminated") if err != nil { return "", err } break loop } if inQuote || !isWhiteSpace(ch) { end = s.offset } } lit := s.src[offs:end] if hasCR { lit = stripCR(lit) } return string(lit), nil } func isWhiteSpace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\r' } func (s *Scanner) skipWhitespace() { for isWhiteSpace(s.ch) { _ = s.next() } } // Scan scans the next token and returns the token position, the token, // and its literal string if applicable. The source end is indicated by // token.EOF. // // If the returned token is a literal (token.IDENT, token.STRING) or // token.COMMENT, the literal string has the corresponding value. // // If the returned token is token.ILLEGAL, the literal string is the // offending character. // // In all other cases, Scan returns an empty literal string. // // For more tolerant parsing, Scan will return a valid token if // possible even if a syntax error was encountered. Thus, even // if the resulting token sequence contains no illegal tokens, // a client may not assume that no error occurred. Instead it // must check the scanner's ErrorCount or the number of calls // of the error handler, if there was one installed. // // Scan adds line information to the file added to the file // set with Init. Token positions are relative to that file // and thus relative to the file set. func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string, err error) { scanAgain: s.skipWhitespace() // current token start p, err2 := s.file.Pos(s.offset) if err2 != nil { err = fmt.Errorf("unexpected error at pos %v offset %d: %w", p, s.offset, err2) return } pos = p // determine token value switch ch := s.ch; { case s.nextVal: l, err2 := s.scanValString() if err2 != nil { err = fmt.Errorf("unexpected error at ch %v: %w", ch, err2) return } lit = l tok = token.STRING s.nextVal = false case isLetter(ch): lit = s.scanIdentifier() tok = token.IDENT default: _ = s.next() // always make progress switch ch { case -1: tok = token.EOF case '\n': tok = token.EOL case '"': tok = token.STRING l, err2 := s.scanString() if err2 != nil { err = fmt.Errorf("unexpected error at ch %v: %w", ch, err2) return } lit = l case '[': tok = token.LBRACK case ']': tok = token.RBRACK case ';', '#': // comment lit = s.scanComment() if s.mode&ScanComments == 0 { // skip comment goto scanAgain } tok = token.COMMENT case '=': tok = token.ASSIGN s.nextVal = true default: offset, err2 := s.file.Offset(pos) if err2 != nil { err = fmt.Errorf("unexpected error at pos %v: %w", pos, err2) return } err2 = s.error(offset, fmt.Sprintf("illegal character %#U", ch)) if err2 != nil { err = fmt.Errorf("unexpected error at ch %v offset %d: %w", ch, s.offset, err2) return } tok = token.ILLEGAL lit = string(ch) } } return } ================================================ FILE: modules/gcfg/scanner/scanner_test.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package scanner import ( "os" "strings" "testing" "github.com/antgroup/hugescm/modules/gcfg/token" ) var fset = token.NewFileSet() const /* class */ ( special = iota literal operator ) func tokenclass(tok token.Token) int { switch { case tok.IsLiteral(): return literal case tok.IsOperator(): return operator } return special } type elt struct { tok token.Token lit string class int pre string suf string } var tokens = [...]elt{ // Special tokens {token.COMMENT, "; a comment", special, "", "\n"}, {token.COMMENT, "# a comment", special, "", "\n"}, // Operators and delimiters {token.ASSIGN, "=", operator, "", "value"}, {token.LBRACK, "[", operator, "", ""}, {token.RBRACK, "]", operator, "", ""}, {token.EOL, "\n", operator, "", ""}, // Identifiers {token.IDENT, "foobar", literal, "", ""}, {token.IDENT, "a۰۱۸", literal, "", ""}, {token.IDENT, "foo६४", literal, "", ""}, {token.IDENT, "bar9876", literal, "", ""}, {token.IDENT, "foo-bar", literal, "", ""}, {token.IDENT, "foo", literal, ";\n", ""}, // String literals (subsection names) {token.STRING, `"foobar"`, literal, "", ""}, {token.STRING, `"\""`, literal, "", ""}, // String literals (values) {token.STRING, `"\n"`, literal, "=", ""}, {token.STRING, `"foobar"`, literal, "=", ""}, {token.STRING, `"foo\nbar"`, literal, "=", ""}, {token.STRING, `"foo\"bar"`, literal, "=", ""}, {token.STRING, `"foo\\bar"`, literal, "=", ""}, {token.STRING, `"foobar"`, literal, "=", ""}, {token.STRING, `"foobar"`, literal, "= ", ""}, {token.STRING, `"foobar"`, literal, "=", "\n"}, {token.STRING, `"foobar"`, literal, "=", ";"}, {token.STRING, `"foobar"`, literal, "=", " ;"}, {token.STRING, `"foobar"`, literal, "=", "#"}, {token.STRING, `"foobar"`, literal, "=", " #"}, {token.STRING, "foobar", literal, "=", ""}, {token.STRING, "foobar", literal, "= ", ""}, {token.STRING, "foobar", literal, "=", " "}, {token.STRING, `"foo" "bar"`, literal, "=", " "}, {token.STRING, "foo\\\nbar", literal, "=", ""}, {token.STRING, "foo\\\r\nbar", literal, "=", ""}, } const whitespace = " \t \n\n\n" // to separate tokens var source = func() []byte { var src []byte for _, t := range tokens { src = append(src, t.pre...) src = append(src, t.lit...) src = append(src, t.suf...) src = append(src, whitespace...) } return src }() func newlineCount(s string) int { n := 0 for i := 0; i < len(s); i++ { if s[i] == '\n' { n++ } } return n } func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) { pos := fset.Position(p) if pos.Filename != expected.Filename { t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) } if pos.Offset != expected.Offset { t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) } if pos.Line != expected.Line { t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) } if pos.Column != expected.Column { t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) } } // Verify that calling Scan() provides the correct results. func TestScan(t *testing.T) { // make source src_linecount := newlineCount(string(source)) whitespace_linecount := newlineCount(whitespace) index := 0 // error handler eh := func(_ token.Position, msg string) { t.Errorf("%d: error handler called (msg = %s)", index, msg) } // verify scan var s Scanner file, err := fset.AddFile("", fset.Base(), len(source)) if err != nil { t.Errorf("unexpected error: %v", err) } _ = s.Init(file, source, eh, ScanComments) // epos is the expected position epos := token.Position{ Filename: "", Offset: 0, Line: 1, Column: 1, } for { pos, tok, lit, err := s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } if lit == "" { // no literal value for non-literal tokens lit = tok.String() } e := elt{token.EOF, "", special, "", ""} if index < len(tokens) { e = tokens[index] } if tok == token.EOF { lit = "" epos.Line = src_linecount epos.Column = 2 } if e.pre != "" && strings.ContainsRune("=;#", rune(e.pre[0])) { epos.Column = 1 checkPos(t, lit, pos, epos) var etok token.Token if e.pre[0] == '=' { etok = token.ASSIGN } else { etok = token.COMMENT } if tok != etok { t.Errorf("bad token for %q: got %q, expected %q", lit, tok, etok) } pos, tok, lit, err = s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } } epos.Offset += len(e.pre) if tok != token.EOF { epos.Column = 1 + len(e.pre) } if e.pre != "" && e.pre[len(e.pre)-1] == '\n' { epos.Offset-- epos.Column-- checkPos(t, lit, pos, epos) if tok != token.EOL { t.Errorf("bad token for %q: got %q, expected %q", lit, tok, token.EOL) } epos.Line++ epos.Offset++ epos.Column = 1 pos, tok, lit, err = s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } } checkPos(t, lit, pos, epos) if tok != e.tok { t.Errorf("bad token for %q: got %q, expected %q", lit, tok, e.tok) } if e.tok.IsLiteral() { // no CRs in value string literals elit := e.lit if strings.ContainsRune(e.pre, '=') { elit = string(stripCR([]byte(elit))) epos.Offset += len(e.lit) - len(lit) // correct position } if lit != elit { t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit) } } if tokenclass(tok) != e.class { t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) } epos.Offset += len(lit) + len(e.suf) + len(whitespace) epos.Line += newlineCount(lit) + newlineCount(e.suf) + whitespace_linecount index++ if tok == token.EOF { break } if e.suf == "value" { _, tok, lit, err = s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } if tok != token.STRING { t.Errorf("bad token for %q: got %q, expected %q", lit, tok, token.STRING) } } else if strings.ContainsRune(e.suf, ';') || strings.ContainsRune(e.suf, '#') { _, tok, lit, err = s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } if tok != token.COMMENT { t.Errorf("bad token for %q: got %q, expected %q", lit, tok, token.COMMENT) } } // skip EOLs for i := 0; i < whitespace_linecount+newlineCount(e.suf); i++ { _, tok, lit, err = s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } if tok != token.EOL { t.Errorf("bad token for %q: got %q, expected %q", lit, tok, token.EOL) } } } if s.ErrorCount != 0 { t.Errorf("found %d errors", s.ErrorCount) } } func TestScanValStringEOF(t *testing.T) { var s Scanner src := "= value" f, err := fset.AddFile("src", fset.Base(), len(src)) if err != nil { t.Errorf("unexpected error: %v", err) } _ = s.Init(f, []byte(src), nil, 0) _, _, _, _ = s.Scan() // = _, _, _, _ = s.Scan() // value _, tok, _, err := s.Scan() // EOF if err != nil { t.Errorf("unexpected error: %v", err) } if tok != token.EOF { t.Errorf("bad token: got %s, expected %s", tok, token.EOF) } if s.ErrorCount > 0 { t.Error("scanning error") } } // Verify that initializing the same scanner more then once works correctly. func TestInit(t *testing.T) { var s Scanner // 1st init src1 := "\nname = value" f1, err := fset.AddFile("src1", fset.Base(), len(src1)) if err != nil { t.Errorf("unexpected error: %v", err) } _ = s.Init(f1, []byte(src1), nil, 0) if f1.Size() != len(src1) { t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) } _, _, _, _ = s.Scan() // \n _, _, _, _ = s.Scan() // name _, tok, _, err := s.Scan() // = if err != nil { t.Errorf("unexpected error: %v", err) } if tok != token.ASSIGN { t.Errorf("bad token: got %s, expected %s", tok, token.ASSIGN) } // 2nd init src2 := "[section]" f2, err := fset.AddFile("src2", fset.Base(), len(src2)) if err != nil { t.Errorf("unexpected error: %v", err) } _ = s.Init(f2, []byte(src2), nil, 0) if f2.Size() != len(src2) { t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) } _, tok, _, err = s.Scan() // [ if err != nil { t.Errorf("unexpected error: %v", err) } if tok != token.LBRACK { t.Errorf("bad token: got %s, expected %s", tok, token.LBRACK) } if s.ErrorCount != 0 { t.Errorf("found %d errors", s.ErrorCount) } } func TestStdErrorHandler(t *testing.T) { const src = "@\n" + // illegal character, cause an error "@ @\n" // two errors on the same line var list ErrorList eh := func(pos token.Position, msg string) { list.Add(pos, msg) } var s Scanner file, err := fset.AddFile("File1", fset.Base(), len(src)) if err != nil { t.Errorf("unexpected error: %v", err) } _ = s.Init(file, []byte(src), eh, 0) for { _, tok, _, err := s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } if tok == token.EOF { break } } if len(list) != s.ErrorCount { t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount) } if len(list) != 3 { t.Errorf("found %d raw errors, expected 3", len(list)) PrintError(os.Stderr, list) } list.Sort() if len(list) != 3 { t.Errorf("found %d sorted errors, expected 3", len(list)) PrintError(os.Stderr, list) } list.RemoveMultiples() if len(list) != 2 { t.Errorf("found %d one-per-line errors, expected 2", len(list)) PrintError(os.Stderr, list) } } type errorCollector struct { cnt int // number of errors encountered msg string // last error message encountered pos token.Position // last error position encountered } func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { var s Scanner var h errorCollector eh := func(pos token.Position, msg string) { h.cnt++ h.msg = msg h.pos = pos } file, err2 := fset.AddFile("", fset.Base(), len(src)) if err2 != nil { t.Errorf("unexpected error: %v", err2) } _ = s.Init(file, []byte(src), eh, ScanComments) if src[0] == '=' { _, _, _, err := s.Scan() if err != nil { t.Errorf("unexpected error: %v", err) } } _, tok0, _, err2 := s.Scan() if err2 != nil { t.Errorf("unexpected error: %v", err2) } _, tok1, _, err2 := s.Scan() if err2 != nil { t.Errorf("unexpected error: %v", err2) } if tok0 != tok { t.Errorf("%q: got %s, expected %s", src, tok0, tok) } if tok1 != token.EOF { t.Errorf("%q: got %s, expected EOF", src, tok1) } cnt := 0 if err != "" { cnt = 1 } if h.cnt != cnt { t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) } if h.msg != err { t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) } if h.pos.Offset != pos { t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos) } } var testErrors = []struct { src string tok token.Token pos int err string }{ {"\a", token.ILLEGAL, 0, "illegal character U+0007"}, {"/", token.ILLEGAL, 0, "illegal character U+002F '/'"}, {"_", token.ILLEGAL, 0, "illegal character U+005F '_'"}, {`…`, token.ILLEGAL, 0, "illegal character U+2026 '…'"}, {`""`, token.STRING, 0, ""}, {`"`, token.STRING, 0, "string not terminated"}, {"\"\n", token.STRING, 0, "string not terminated"}, {`="`, token.STRING, 1, "string not terminated"}, {"=\"\n", token.STRING, 1, "string not terminated"}, {"=\\", token.STRING, 2, "unknown escape sequence"}, {"=\\\r", token.STRING, 3, "unknown escape sequence"}, {`"\z"`, token.STRING, 2, "unknown escape sequence"}, {`"\a"`, token.STRING, 2, "unknown escape sequence"}, {`"\b"`, token.STRING, 2, "unknown escape sequence"}, {`"\f"`, token.STRING, 2, "unknown escape sequence"}, {`"\r"`, token.STRING, 2, "unknown escape sequence"}, {`"\t"`, token.STRING, 2, "unknown escape sequence"}, {`"\v"`, token.STRING, 2, "unknown escape sequence"}, {`"\0"`, token.STRING, 2, "unknown escape sequence"}, } func TestScanErrors(t *testing.T) { for _, e := range testErrors { checkError(t, e.src, e.tok, e.pos, e.err) } } func BenchmarkScan(b *testing.B) { b.StopTimer() fset := token.NewFileSet() file, err := fset.AddFile("", fset.Base(), len(source)) if err != nil { b.Fatalf("unexpected error: %v", err) } var s Scanner b.StartTimer() for i := b.N - 1; i >= 0; i-- { _ = s.Init(file, source, nil, ScanComments) for { _, tok, _, err := s.Scan() if err != nil { b.Fatalf("unexpected error: %v", err) } if tok == token.EOF { break } } } } ================================================ FILE: modules/gcfg/set.go ================================================ package gcfg import ( "bytes" "encoding" "encoding/gob" "errors" "fmt" "math/big" "reflect" "strings" "unicode" "unicode/utf8" "github.com/antgroup/hugescm/modules/gcfg/types" ) var ( ErrUnsupportedType = errors.New("unsupported type") ErrBlankUnsupported = errors.New("blank value not supported for type") ErrConfigMustBePointerToStruct = errors.New("config must be a pointer to a struct") ErrInvalidMapFieldForSection = errors.New("map field for section must have string keys and pointer-to-struct values") ErrInvalidFieldForSection = errors.New("field for section must be a map or a struct") ) type tag struct { ident string intMode string } func newTag(ts string) tag { t := tag{} s := strings.Split(ts, ",") t.ident = s[0] for _, tse := range s[1:] { if strings.HasPrefix(tse, "int=") { t.intMode = tse[len("int="):] } } return t } func fieldFold(v reflect.Value, name string) (reflect.Value, tag) { var n string r0, _ := utf8.DecodeRuneInString(name) if unicode.IsLetter(r0) && !unicode.IsLower(r0) && !unicode.IsUpper(r0) { n = "X" } n += strings.ReplaceAll(name, "-", "_") f, ok := v.Type().FieldByNameFunc(func(fieldName string) bool { if !v.FieldByName(fieldName).CanSet() { return false } f, _ := v.Type().FieldByName(fieldName) t := newTag(f.Tag.Get("gcfg")) if t.ident != "" { return strings.EqualFold(t.ident, name) } return strings.EqualFold(n, fieldName) }) if !ok { return reflect.Value{}, tag{} } return v.FieldByName(f.Name), newTag(f.Tag.Get("gcfg")) } type setter func(destp any, blank bool, val string, t tag) error var setters = []setter{ typeSetter, textUnmarshalerSetter, kindSetter, scanSetter, } func textUnmarshalerSetter(d any, blank bool, val string, t tag) error { dtu, ok := d.(encoding.TextUnmarshaler) if !ok { return ErrUnsupportedType } if blank { return ErrBlankUnsupported } return dtu.UnmarshalText([]byte(val)) } func boolSetter(d any, blank bool, val string, t tag) error { if blank { reflect.ValueOf(d).Elem().Set(reflect.ValueOf(true)) return nil } b, err := types.ParseBool(val) if err == nil { reflect.ValueOf(d).Elem().Set(reflect.ValueOf(b)) } return err } func intMode(mode string) types.IntMode { var m types.IntMode if strings.ContainsAny(mode, "dD") { m |= types.Dec } if strings.ContainsAny(mode, "hH") { m |= types.Hex } if strings.ContainsAny(mode, "oO") { m |= types.Oct } return m } var typeModes = map[reflect.Type]types.IntMode{ reflect.TypeFor[int](): types.Dec | types.Hex, reflect.TypeFor[int8](): types.Dec | types.Hex, reflect.TypeFor[int16](): types.Dec | types.Hex, reflect.TypeFor[int32](): types.Dec | types.Hex, reflect.TypeFor[int64](): types.Dec | types.Hex, reflect.TypeFor[uint](): types.Dec | types.Hex, reflect.TypeFor[uint8](): types.Dec | types.Hex, reflect.TypeFor[uint16](): types.Dec | types.Hex, reflect.TypeFor[uint32](): types.Dec | types.Hex, reflect.TypeFor[uint64](): types.Dec | types.Hex, // use default mode (allow dec/hex/oct) for uintptr type reflect.TypeFor[big.Int](): types.Dec | types.Hex, } func intModeDefault(t reflect.Type) types.IntMode { m, ok := typeModes[t] if !ok { m = types.Dec | types.Hex | types.Oct } return m } func intSetter(d any, blank bool, val string, t tag) error { if blank { return ErrBlankUnsupported } mode := intMode(t.intMode) if mode == 0 { mode = intModeDefault(reflect.TypeOf(d).Elem()) } return types.ParseInt(d, val, mode) } func stringSetter(d any, blank bool, val string, t tag) error { if blank { return ErrBlankUnsupported } dsp, ok := d.(*string) if !ok { return ErrUnsupportedType } *dsp = val return nil } var kindSetters = map[reflect.Kind]setter{ reflect.String: stringSetter, reflect.Bool: boolSetter, reflect.Int: intSetter, reflect.Int8: intSetter, reflect.Int16: intSetter, reflect.Int32: intSetter, reflect.Int64: intSetter, reflect.Uint: intSetter, reflect.Uint8: intSetter, reflect.Uint16: intSetter, reflect.Uint32: intSetter, reflect.Uint64: intSetter, reflect.Uintptr: intSetter, } var typeSetters = map[reflect.Type]setter{ reflect.TypeFor[big.Int](): intSetter, } func typeSetter(d any, blank bool, val string, tt tag) error { t := reflect.ValueOf(d).Type().Elem() setter, ok := typeSetters[t] if !ok { return ErrUnsupportedType } return setter(d, blank, val, tt) } func kindSetter(d any, blank bool, val string, tt tag) error { k := reflect.ValueOf(d).Type().Elem().Kind() setter, ok := kindSetters[k] if !ok { return ErrUnsupportedType } return setter(d, blank, val, tt) } func scanSetter(d any, blank bool, val string, tt tag) error { if blank { return ErrBlankUnsupported } return types.ScanFully(d, val, 'v') } func newValue(sect string, vCfg reflect.Value, vType reflect.Type) (reflect.Value, error) { // pv := reflect.New(vType) dfltName := "default-" + sect dfltField, _ := fieldFold(vCfg, dfltName) var err error if dfltField.IsValid() { b := bytes.NewBuffer(nil) ge := gob.NewEncoder(b) err = ge.EncodeValue(dfltField) if err != nil && errors.Is(err, ErrSyntaxWarning) { return pv, err } gd := gob.NewDecoder(bytes.NewReader(b.Bytes())) err = gd.DecodeValue(pv.Elem()) if err != nil && errors.Is(err, ErrSyntaxWarning) { return pv, err } } return pv, nil } func set(cfg any, sect, sub, name string, value string, blankValue bool, subsectPass bool) error { // vPCfg := reflect.ValueOf(cfg) if vPCfg.Kind() != reflect.Pointer || vPCfg.Elem().Kind() != reflect.Struct { return ErrConfigMustBePointerToStruct } vCfg := vPCfg.Elem() vSect, _ := fieldFold(vCfg, sect) if !vSect.IsValid() { return newSyntaxWarning(sect, "", "") } isSubsect := vSect.Kind() == reflect.Map if subsectPass != isSubsect { return nil } if isSubsect { vst := vSect.Type() if vst.Key().Kind() != reflect.String || vst.Elem().Kind() != reflect.Pointer || vst.Elem().Elem().Kind() != reflect.Struct { return fmt.Errorf("%w: section %q", ErrInvalidMapFieldForSection, sect) } if vSect.IsNil() { vSect.Set(reflect.MakeMap(vst)) } k := reflect.ValueOf(sub) pv := vSect.MapIndex(k) if !pv.IsValid() { vType := vSect.Type().Elem().Elem() var err error if pv, err = newValue(sect, vCfg, vType); err != nil { return err } vSect.SetMapIndex(k, pv) } vSect = pv.Elem() } else if vSect.Kind() != reflect.Struct { return fmt.Errorf("%w: section %q", ErrInvalidFieldForSection, sect) } else if sub != "" { return newSyntaxWarning(sect, sub, "") } // Empty name is a special value, meaning that only the // section/subsection object is to be created, with no values set. if name == "" { return nil } vVar, t := fieldFold(vSect, name) if !vVar.IsValid() { var err error if isSubsect { err = newSyntaxWarning(sect, sub, name) } else { err = newSyntaxWarning(sect, "", name) } return err } // vVal is either single-valued var, or newly allocated value within multi-valued var var vVal reflect.Value // multi-value if unnamed slice type isMulti := vVar.Type().Name() == "" && vVar.Kind() == reflect.Slice || vVar.Type().Name() == "" && vVar.Kind() == reflect.Pointer && vVar.Type().Elem().Name() == "" && vVar.Type().Elem().Kind() == reflect.Slice if isMulti && vVar.Kind() == reflect.Pointer { if vVar.IsNil() { vVar.Set(reflect.New(vVar.Type().Elem())) } vVar = vVar.Elem() } if isMulti && blankValue { vVar.Set(reflect.Zero(vVar.Type())) return nil } if isMulti { vVal = reflect.New(vVar.Type().Elem()).Elem() } else { vVal = vVar } isDeref := vVal.Type().Name() == "" && vVal.Type().Kind() == reflect.Pointer isNew := isDeref && vVal.IsNil() // vAddr is address of value to set (dereferenced & allocated as needed) var vAddr reflect.Value switch { case isNew: vAddr = reflect.New(vVal.Type().Elem()) case isDeref && !isNew: vAddr = vVal default: vAddr = vVal.Addr() } vAddrI := vAddr.Interface() err, ok := error(nil), false for _, s := range setters { err = s(vAddrI, blankValue, value, t) if err == nil { ok = true break } if !errors.Is(err, ErrUnsupportedType) { return err } } if !ok { // in case all setters returned ErrUnsupportedType return err } if isNew { // set reference if it was dereferenced and newly allocated vVal.Set(vAddr) } if isMulti { // append if multi-valued vVar.Set(reflect.Append(vVar, vVal)) } return nil } ================================================ FILE: modules/gcfg/token/position.go ================================================ // Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // TODO(gri) consider making this a separate package outside the go directory. package token import ( "errors" "fmt" "sort" "sync" ) var ( ErrIllegalFileOffset = errors.New("illegal file offset") ErrIllegalPosValue = errors.New("illegal Pos value") ErrIllegalBaseOrValue = errors.New("illegal base or value") ErrPosOffsetOverflow = errors.New("token.Pos offset overflow (> 2G of source code in file set)") ) // ----------------------------------------------------------------------------- // Positions // Position describes an arbitrary source position // including the file, line, and column location. // A Position is valid if the line number is > 0. type Position struct { Filename string // filename, if any Offset int // offset, starting at 0 Line int // line number, starting at 1 Column int // column number, starting at 1 (character count) } // IsValid returns true if the position is valid. func (pos *Position) IsValid() bool { return pos.Line > 0 } // String returns a string in one of several forms: // // file:line:column valid position with file name // line:column valid position without file name // file invalid position with file name // - invalid position without file name func (pos Position) String() string { s := pos.Filename if pos.IsValid() { if s != "" { s += ":" } s += fmt.Sprintf("%d:%d", pos.Line, pos.Column) } if s == "" { s = "-" } return s } // Pos is a compact encoding of a source position within a file set. // It can be converted into a Position for a more convenient, but much // larger, representation. // // The Pos value for a given file is a number in the range [base, base+size], // where base and size are specified when adding the file to the file set via // AddFile. // // To create the Pos value for a specific source offset, first add // the respective file to the current file set (via FileSet.AddFile) // and then call File.Pos(offset) for that file. Given a Pos value p // for a specific file set fset, the corresponding Position value is // obtained by calling fset.Position(p). // // Pos values can be compared directly with the usual comparison operators: // If two Pos values p and q are in the same file, comparing p and q is // equivalent to comparing the respective source file offsets. If p and q // are in different files, p < q is true if the file implied by p was added // to the respective file set before the file implied by q. type Pos int // The zero value for Pos is NoPos; there is no file and line information // associated with it, and NoPos().IsValid() is false. NoPos is always // smaller than any other Pos value. The corresponding Position value // for NoPos is the zero value for Position. const NoPos Pos = 0 // IsValid returns true if the position is valid. func (p Pos) IsValid() bool { return p != NoPos } // ----------------------------------------------------------------------------- // File // A File is a handle for a file belonging to a FileSet. // A File has a name, size, and line offset table. type File struct { set *FileSet name string // file name as provided to AddFile base int // Pos value range for this file is [base...base+size] size int // file size as provided to AddFile // lines and infos are protected by set.mutex lines []int infos []lineInfo } // Name returns the file name of file f as registered with AddFile. func (f *File) Name() string { return f.name } // Base returns the base offset of file f as registered with AddFile. func (f *File) Base() int { return f.base } // Size returns the size of file f as registered with AddFile. func (f *File) Size() int { return f.size } // LineCount returns the number of lines in file f. func (f *File) LineCount() int { f.set.mutex.RLock() n := len(f.lines) f.set.mutex.RUnlock() return n } // AddLine adds the line offset for a new line. // The line offset must be larger than the offset for the previous line // and smaller than the file size; otherwise the line offset is ignored. func (f *File) AddLine(offset int) { f.set.mutex.Lock() if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size { f.lines = append(f.lines, offset) } f.set.mutex.Unlock() } // SetLines sets the line offsets for a file and returns true if successful. // The line offsets are the offsets of the first character of each line; // for instance for the content "ab\nc\n" the line offsets are {0, 3}. // An empty file has an empty line offset table. // Each line offset must be larger than the offset for the previous line // and smaller than the file size; otherwise SetLines fails and returns // false. func (f *File) SetLines(lines []int) bool { // verify validity of lines table size := f.size for i, offset := range lines { if i > 0 && offset <= lines[i-1] || size <= offset { return false } } // set lines table f.set.mutex.Lock() f.lines = lines f.set.mutex.Unlock() return true } // SetLinesForContent sets the line offsets for the given file content. func (f *File) SetLinesForContent(content []byte) { var lines []int line := 0 for offset, b := range content { if line >= 0 { lines = append(lines, line) } line = -1 if b == '\n' { line = offset + 1 } } // set lines table f.set.mutex.Lock() f.lines = lines f.set.mutex.Unlock() } // A lineInfo object describes alternative file and line number // information (such as provided via a //line comment in a .go // file) for a given file offset. type lineInfo struct { // fields are exported to make them accessible to gob Offset int Filename string Line int } // AddLineInfo adds alternative file and line number information for // a given file offset. The offset must be larger than the offset for // the previously added alternative line info and smaller than the // file size; otherwise the information is ignored. // // AddLineInfo is typically used to register alternative position // information for //line filename:line comments in source files. func (f *File) AddLineInfo(offset int, filename string, line int) { f.set.mutex.Lock() if i := len(f.infos); i == 0 || f.infos[i-1].Offset < offset && offset < f.size { f.infos = append(f.infos, lineInfo{offset, filename, line}) } f.set.mutex.Unlock() } // Pos returns the Pos value for the given file offset; // the offset must be <= f.Size(). // f.Pos(f.Offset(p)) == p. func (f *File) Pos(offset int) (Pos, error) { if offset > f.size { return 0, ErrIllegalFileOffset } return Pos(f.base + offset), nil } // Offset returns the offset for the given file position p; // p must be a valid Pos value in that file. // f.Offset(f.Pos(offset)) == offset. func (f *File) Offset(p Pos) (int, error) { if int(p) < f.base || int(p) > f.base+f.size { return 0, ErrIllegalPosValue } return int(p) - f.base, nil } // Line returns the line number for the given file position p; // p must be a Pos value in that file or NoPos. func (f *File) Line(p Pos) (int, error) { // TODO(gri) this can be implemented much more efficiently position, err := f.Position(p) if err != nil { return 0, err } return position.Line, nil } func searchLineInfos(a []lineInfo, x int) int { return sort.Search(len(a), func(i int) bool { return a[i].Offset > x }) - 1 } // info returns the file name, line, and column number for a file offset. func (f *File) info(offset int) (filename string, line, column int) { filename = f.name if i := searchInts(f.lines, offset); i >= 0 { line, column = i+1, offset-f.lines[i]+1 } if len(f.infos) > 0 { // almost no files have extra line infos if i := searchLineInfos(f.infos, offset); i >= 0 { alt := &f.infos[i] filename = alt.Filename if i := searchInts(f.lines, alt.Offset); i >= 0 { line += alt.Line - i - 1 } } } return } func (f *File) position(p Pos) (pos Position) { offset := int(p) - f.base pos.Offset = offset pos.Filename, pos.Line, pos.Column = f.info(offset) return } // Position returns the Position value for the given file position p; // p must be a Pos value in that file or NoPos. func (f *File) Position(p Pos) (Position, error) { if p != NoPos { if int(p) < f.base || int(p) > f.base+f.size { return Position{}, ErrIllegalPosValue } return f.position(p), nil } return Position{}, nil } // ----------------------------------------------------------------------------- // FileSet // A FileSet represents a set of source files. // Methods of file sets are synchronized; multiple goroutines // may invoke them concurrently. type FileSet struct { mutex sync.RWMutex // protects the file set base int // base offset for the next file files []*File // list of files in the order added to the set last *File // cache of last file looked up } // NewFileSet creates a new file set. func NewFileSet() *FileSet { s := new(FileSet) s.base = 1 // 0 == NoPos return s } // Base returns the minimum base offset that must be provided to // AddFile when adding the next file. func (s *FileSet) Base() int { s.mutex.RLock() b := s.base s.mutex.RUnlock() return b } // AddFile adds a new file with a given filename, base offset, and file size // to the file set s and returns the file. Multiple files may have the same // name. The base offset must not be smaller than the FileSet's Base(), and // size must not be negative. // // Adding the file will set the file set's Base() value to base + size + 1 // as the minimum base value for the next file. The following relationship // exists between a Pos value p for a given file offset offs: // // int(p) = base + offs // // with offs in the range [0, size] and thus p in the range [base, base+size]. // For convenience, File.Pos may be used to create file-specific position // values from a file offset. func (s *FileSet) AddFile(filename string, base, size int) (*File, error) { s.mutex.Lock() defer s.mutex.Unlock() if base < s.base || size < 0 { return nil, ErrIllegalBaseOrValue } // base >= s.base && size >= 0 f := &File{s, filename, base, size, []int{0}, nil} base += size + 1 // +1 because EOF also has a position if base < 0 { return nil, ErrPosOffsetOverflow } // add the file to the file set s.base = base s.files = append(s.files, f) s.last = f return f, nil } // Iterate calls f for the files in the file set in the order they were added // until f returns false. func (s *FileSet) Iterate(f func(*File) bool) { for i := 0; ; i++ { var file *File s.mutex.RLock() if i < len(s.files) { file = s.files[i] } s.mutex.RUnlock() if file == nil || !f(file) { break } } } func searchFiles(a []*File, x int) int { return sort.Search(len(a), func(i int) bool { return a[i].base > x }) - 1 } func (s *FileSet) file(p Pos) *File { // common case: p is in last file if f := s.last; f != nil && f.base <= int(p) && int(p) <= f.base+f.size { return f } // p is not in last file - search all files if i := searchFiles(s.files, int(p)); i >= 0 { f := s.files[i] // f.base <= int(p) by definition of searchFiles if int(p) <= f.base+f.size { s.last = f return f } } return nil } // File returns the file that contains the position p. // If no such file is found (for instance for p == NoPos), // the result is nil. func (s *FileSet) File(p Pos) (f *File) { if p != NoPos { s.mutex.RLock() f = s.file(p) s.mutex.RUnlock() } return } // Position converts a Pos in the fileset into a general Position. func (s *FileSet) Position(p Pos) (pos Position) { if p != NoPos { s.mutex.RLock() if f := s.file(p); f != nil { pos = f.position(p) } s.mutex.RUnlock() } return } // ----------------------------------------------------------------------------- // Helper functions func searchInts(a []int, x int) int { // This function body is a manually inlined version of: // // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 // // With better compiler optimizations, this may not be needed in the // future, but at the moment this change improves the go/printer // benchmark performance by ~30%. This has a direct impact on the // speed of gofmt and thus seems worthwhile (2011-04-29). // TODO(gri): Remove this when compilers have caught up. i, j := 0, len(a) for i < j { h := i + (j-i)/2 // avoid overflow when computing h // i ≤ h < j if a[h] <= x { i = h + 1 } else { j = h } } return i - 1 } ================================================ FILE: modules/gcfg/token/position_test.go ================================================ // Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package token import ( "fmt" "testing" ) func checkPos(t *testing.T, msg string, p, q Position) { if p.Filename != q.Filename { t.Errorf("%s: expected filename = %q; got %q", msg, q.Filename, p.Filename) } if p.Offset != q.Offset { t.Errorf("%s: expected offset = %d; got %d", msg, q.Offset, p.Offset) } if p.Line != q.Line { t.Errorf("%s: expected line = %d; got %d", msg, q.Line, p.Line) } if p.Column != q.Column { t.Errorf("%s: expected column = %d; got %d", msg, q.Column, p.Column) } } func TestNoPos(t *testing.T) { if NoPos.IsValid() { t.Errorf("NoPos should not be valid") } var fset *FileSet checkPos(t, "nil NoPos", fset.Position(NoPos), Position{}) fset = NewFileSet() checkPos(t, "fset NoPos", fset.Position(NoPos), Position{}) } var tests = []struct { filename string source []byte // may be nil size int lines []int }{ {"a", []byte{}, 0, []int{}}, {"b", []byte("01234"), 5, []int{0}}, {"c", []byte("\n\n\n\n\n\n\n\n\n"), 9, []int{0, 1, 2, 3, 4, 5, 6, 7, 8}}, {"d", nil, 100, []int{0, 5, 10, 20, 30, 70, 71, 72, 80, 85, 90, 99}}, {"e", nil, 777, []int{0, 80, 100, 120, 130, 180, 267, 455, 500, 567, 620}}, {"f", []byte("package p\n\nimport \"fmt\""), 23, []int{0, 10, 11}}, {"g", []byte("package p\n\nimport \"fmt\"\n"), 24, []int{0, 10, 11}}, {"h", []byte("package p\n\nimport \"fmt\"\n "), 25, []int{0, 10, 11, 24}}, } func linecol(lines []int, offs int) (int, int) { prevLineOffs := 0 for line, lineOffs := range lines { if offs < lineOffs { return line, offs - prevLineOffs + 1 } prevLineOffs = lineOffs } return len(lines), offs - prevLineOffs + 1 } func verifyPositions(t *testing.T, fset *FileSet, f *File, lines []int) { for offs := 0; offs < f.Size(); offs++ { p, err := f.Pos(offs) if err != nil { t.Fatalf("unexpected error: %v", err) } offs2, err := f.Offset(p) if err != nil { t.Fatalf("unexpected error: %v", err) } if offs2 != offs { t.Errorf("%s, Offset: expected offset %d; got %d", f.Name(), offs, offs2) } line, col := linecol(lines, offs) msg := fmt.Sprintf("%s (offs = %d, p = %d)", f.Name(), offs, p) pos, err := f.Pos(offs) if err != nil { t.Fatalf("unexpected error: %v", err) } position, err := f.Position(pos) if err != nil { t.Fatalf("unexpected error: %v", err) } checkPos(t, msg, position, Position{f.Name(), offs, line, col}) checkPos(t, msg, fset.Position(p), Position{f.Name(), offs, line, col}) } } func makeTestSource(size int, lines []int) []byte { src := make([]byte, size) for _, offs := range lines { if offs > 0 { src[offs-1] = '\n' } } return src } func TestPositions(t *testing.T) { const delta = 7 // a non-zero base offset increment fset := NewFileSet() for _, test := range tests { // verify consistency of test case if test.source != nil && len(test.source) != test.size { t.Errorf("%s: inconsistent test case: expected file size %d; got %d", test.filename, test.size, len(test.source)) } // add file and verify name and size f, err := fset.AddFile(test.filename, fset.Base()+delta, test.size) if err != nil { t.Fatalf("unexpected error: %v", err) } if f.Name() != test.filename { t.Errorf("expected filename %q; got %q", test.filename, f.Name()) } if f.Size() != test.size { t.Errorf("%s: expected file size %d; got %d", f.Name(), test.size, f.Size()) } pos, err := f.Pos(0) if err != nil { t.Errorf("unexpected error %v", err) } if fset.File(pos) != f { t.Errorf("%s: f.Pos(0) was not found in f", f.Name()) } // add lines individually and verify all positions for i, offset := range test.lines { f.AddLine(offset) if f.LineCount() != i+1 { t.Errorf("%s, AddLine: expected line count %d; got %d", f.Name(), i+1, f.LineCount()) } // adding the same offset again should be ignored f.AddLine(offset) if f.LineCount() != i+1 { t.Errorf("%s, AddLine: expected unchanged line count %d; got %d", f.Name(), i+1, f.LineCount()) } verifyPositions(t, fset, f, test.lines[0:i+1]) } // add lines with SetLines and verify all positions if ok := f.SetLines(test.lines); !ok { t.Errorf("%s: SetLines failed", f.Name()) } if f.LineCount() != len(test.lines) { t.Errorf("%s, SetLines: expected line count %d; got %d", f.Name(), len(test.lines), f.LineCount()) } verifyPositions(t, fset, f, test.lines) // add lines with SetLinesForContent and verify all positions src := test.source if src == nil { // no test source available - create one from scratch src = makeTestSource(test.size, test.lines) } f.SetLinesForContent(src) if f.LineCount() != len(test.lines) { t.Errorf("%s, SetLinesForContent: expected line count %d; got %d", f.Name(), len(test.lines), f.LineCount()) } verifyPositions(t, fset, f, test.lines) } } func TestLineInfo(t *testing.T) { fset := NewFileSet() f, err := fset.AddFile("foo", fset.Base(), 500) if err != nil { t.Fatalf("unexpected error: %v", err) } lines := []int{0, 42, 77, 100, 210, 220, 277, 300, 333, 401} // add lines individually and provide alternative line information for _, offs := range lines { f.AddLine(offs) f.AddLineInfo(offs, "bar", 42) } // verify positions for all offsets for offs := 0; offs <= f.Size(); offs++ { p, err := f.Pos(offs) if err != nil { t.Errorf("unexpected error: %v", err) } _, col := linecol(lines, offs) msg := fmt.Sprintf("%s (offs = %d, p = %d)", f.Name(), offs, p) pos, err := f.Pos(offs) if err != nil { t.Errorf("unexpected error: %v", err) } position, err := f.Position(pos) if err != nil { t.Errorf("unexpected error: %v", err) } checkPos(t, msg, position, Position{"bar", offs, 42, col}) checkPos(t, msg, fset.Position(p), Position{"bar", offs, 42, col}) } } func TestFiles(t *testing.T) { fset := NewFileSet() for i, test := range tests { _, _ = fset.AddFile(test.filename, fset.Base(), test.size) j := 0 fset.Iterate(func(f *File) bool { if f.Name() != tests[j].filename { t.Errorf("expected filename = %s; got %s", tests[j].filename, f.Name()) } j++ return true }) if j != i+1 { t.Errorf("expected %d files; got %d", i+1, j) } } } ================================================ FILE: modules/gcfg/token/serialize.go ================================================ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package token type serializedFile struct { // fields correspond 1:1 to fields with same (lower-case) name in File Name string Base int Size int Lines []int Infos []lineInfo } type serializedFileSet struct { Base int Files []serializedFile } // Read calls decode to deserialize a file set into s; s must not be nil. func (s *FileSet) Read(decode func(any) error) error { var ss serializedFileSet if err := decode(&ss); err != nil { return err } s.mutex.Lock() s.base = ss.Base files := make([]*File, len(ss.Files)) for i := 0; i < len(ss.Files); i++ { f := &ss.Files[i] files[i] = &File{s, f.Name, f.Base, f.Size, f.Lines, f.Infos} } s.files = files s.last = nil s.mutex.Unlock() return nil } // Write calls encode to serialize the file set s. func (s *FileSet) Write(encode func(any) error) error { var ss serializedFileSet s.mutex.Lock() ss.Base = s.base files := make([]serializedFile, len(s.files)) for i, f := range s.files { files[i] = serializedFile{f.name, f.base, f.size, f.lines, f.infos} } ss.Files = files s.mutex.Unlock() return encode(ss) } ================================================ FILE: modules/gcfg/token/serialize_test.go ================================================ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package token import ( "bytes" "encoding/gob" "fmt" "testing" ) // equal returns nil if p and q describe the same file set; // otherwise it returns an error describing the discrepancy. func equal(p, q *FileSet) error { if p == q { // avoid deadlock if p == q return nil } // not strictly needed for the test p.mutex.Lock() q.mutex.Lock() defer q.mutex.Unlock() defer p.mutex.Unlock() if p.base != q.base { return fmt.Errorf("different bases: %d != %d", p.base, q.base) } if len(p.files) != len(q.files) { return fmt.Errorf("different number of files: %d != %d", len(p.files), len(q.files)) } for i, f := range p.files { g := q.files[i] if f.set != p { return fmt.Errorf("wrong fileset for %q", f.name) } if g.set != q { return fmt.Errorf("wrong fileset for %q", g.name) } if f.name != g.name { return fmt.Errorf("different filenames: %q != %q", f.name, g.name) } if f.base != g.base { return fmt.Errorf("different base for %q: %d != %d", f.name, f.base, g.base) } if f.size != g.size { return fmt.Errorf("different size for %q: %d != %d", f.name, f.size, g.size) } for j, l := range f.lines { m := g.lines[j] if l != m { return fmt.Errorf("different offsets for %q", f.name) } } for j, l := range f.infos { m := g.infos[j] if l.Offset != m.Offset || l.Filename != m.Filename || l.Line != m.Line { return fmt.Errorf("different infos for %q", f.name) } } } // we don't care about .last - it's just a cache return nil } func checkSerialize(t *testing.T, p *FileSet) { var buf bytes.Buffer encode := func(x any) error { return gob.NewEncoder(&buf).Encode(x) } if err := p.Write(encode); err != nil { t.Errorf("writing fileset failed: %s", err) return } q := NewFileSet() decode := func(x any) error { return gob.NewDecoder(&buf).Decode(x) } if err := q.Read(decode); err != nil { t.Errorf("reading fileset failed: %s", err) return } if err := equal(p, q); err != nil { t.Errorf("filesets not identical: %s", err) } } func TestSerialization(t *testing.T) { p := NewFileSet() checkSerialize(t, p) // add some files for i := range 10 { f, err := p.AddFile(fmt.Sprintf("file%d", i), p.Base()+i, i*100) if err != nil { t.Fatalf("unexpected error: %v", err) } checkSerialize(t, p) // add some lines and alternative file infos line := 1000 for offs := 0; offs < f.Size(); offs += 40 + i { f.AddLine(offs) if offs%7 == 0 { f.AddLineInfo(offs, fmt.Sprintf("file%d", offs), line) line += 33 } } checkSerialize(t, p) } } ================================================ FILE: modules/gcfg/token/token.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package token defines constants representing the lexical tokens of the gcfg // configuration syntax and basic operations on tokens (printing, predicates). // // Note that the API for the token package may change to accommodate new // features or implementation changes in gcfg. package token import "strconv" // Token is the set of lexical tokens of the gcfg configuration syntax. type Token int // The list of tokens. const ( // Special tokens ILLEGAL Token = iota EOF COMMENT literal_beg // Identifiers and basic type literals // (these tokens stand for classes of literals) IDENT // section-name, variable-name STRING // "subsection-name", variable value literal_end operator_beg // Operators and delimiters ASSIGN // = LBRACK // [ RBRACK // ] EOL // \n operator_end ) var tokens = [...]string{ ILLEGAL: "ILLEGAL", EOF: "EOF", COMMENT: "COMMENT", IDENT: "IDENT", STRING: "STRING", ASSIGN: "=", LBRACK: "[", RBRACK: "]", EOL: "\n", } // String returns the string corresponding to the token tok. // For operators and delimiters, the string is the actual token character // sequence (e.g., for the token ASSIGN, the string is "="). For all other // tokens the string corresponds to the token constant name (e.g. for the // token IDENT, the string is "IDENT"). func (tok Token) String() string { s := "" if 0 <= tok && tok < Token(len(tokens)) { s = tokens[tok] } if s == "" { s = "token(" + strconv.Itoa(int(tok)) + ")" } return s } // Predicates // IsLiteral returns true for tokens corresponding to identifiers // and basic type literals; it returns false otherwise. func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end } // IsOperator returns true for tokens corresponding to operators and // delimiters; it returns false otherwise. func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator_end } ================================================ FILE: modules/gcfg/types/bool.go ================================================ package types // BoolValues defines the name and value mappings for ParseBool. var BoolValues = map[string]any{ "true": true, "yes": true, "on": true, "1": true, "false": false, "no": false, "off": false, "0": false, } var boolParser = func() *EnumParser { ep := &EnumParser{} ep.AddVals(BoolValues) return ep }() // ParseBool parses bool values according to the definitions in BoolValues. // Parsing is case-insensitive. func ParseBool(s string) (bool, error) { v, err := boolParser.Parse(s) if err != nil { return false, err } return v.(bool), nil } ================================================ FILE: modules/gcfg/types/doc.go ================================================ // Package types defines helpers for type conversions. // // The API for this package is not finalized yet. package types ================================================ FILE: modules/gcfg/types/enum.go ================================================ package types import ( "fmt" "reflect" "strings" ) // EnumParser parses "enum" values; i.e. a predefined set of strings to // predefined values. type EnumParser struct { Type string // type name; if not set, use type of first value added CaseMatch bool // if true, matching of strings is case-sensitive // PrefixMatch bool vals map[string]any } // AddVals adds strings and values to an EnumParser. func (ep *EnumParser) AddVals(vals map[string]any) { if ep.vals == nil { ep.vals = make(map[string]any) } for k, v := range vals { if ep.Type == "" { ep.Type = reflect.TypeOf(v).Name() } if !ep.CaseMatch { k = strings.ToLower(k) } ep.vals[k] = v } } // Parse parses the string and returns the value or an error. func (ep EnumParser) Parse(s string) (any, error) { if !ep.CaseMatch { s = strings.ToLower(s) } v, ok := ep.vals[s] if !ok { return false, fmt.Errorf("failed to parse %s %#q", ep.Type, s) } return v, nil } ================================================ FILE: modules/gcfg/types/enum_test.go ================================================ package types import ( "testing" ) func TestEnumParserBool(t *testing.T) { for _, tt := range []struct { val string res bool ok bool }{ {val: "tRuE", res: true, ok: true}, {val: "False", res: false, ok: true}, {val: "t", ok: false}, } { b, err := ParseBool(tt.val) switch { case tt.ok && err != nil: t.Errorf("%q: got error %v, want %v", tt.val, err, tt.res) case !tt.ok && err == nil: t.Errorf("%q: got %v, want error", tt.val, b) case tt.ok && b != tt.res: t.Errorf("%q: got %v, want %v", tt.val, b, tt.res) default: t.Logf("%q: got %v, %v", tt.val, b, err) } } } ================================================ FILE: modules/gcfg/types/int.go ================================================ package types import ( "errors" "fmt" "strings" ) var ( ErrAmbiguousInt = fmt.Errorf("ambiguous integer value; must include '0' prefix") ErrUnsupportedMode = errors.New("unsupported mode") ) // An IntMode is a mode for parsing integer values, representing a set of // accepted bases. type IntMode uint8 // IntMode values for ParseInt; can be combined using binary or. const ( Dec IntMode = 1 << iota Hex Oct ) // String returns a string representation of IntMode; e.g. `IntMode(Dec|Hex)`. func (m IntMode) String() string { var modes []string if m&Dec != 0 { modes = append(modes, "Dec") } if m&Hex != 0 { modes = append(modes, "Hex") } if m&Oct != 0 { modes = append(modes, "Oct") } return "IntMode(" + strings.Join(modes, "|") + ")" } func prefix0(val string) bool { return strings.HasPrefix(val, "0") || strings.HasPrefix(val, "-0") } func prefix0x(val string) bool { return strings.HasPrefix(val, "0x") || strings.HasPrefix(val, "-0x") } // ParseInt parses val using mode into intptr, which must be a pointer to an // integer kind type. Non-decimal value require prefix `0` or `0x` in the cases // when mode permits ambiguity of base; otherwise the prefix can be omitted. func ParseInt(intptr any, val string, mode IntMode) error { val = strings.TrimSpace(val) verb := byte(0) switch mode { case Dec: verb = 'd' case Dec + Hex: if prefix0x(val) { verb = 'v' } else { verb = 'd' } case Dec + Oct: if prefix0(val) && !prefix0x(val) { verb = 'v' } else { verb = 'd' } case Dec + Hex + Oct: verb = 'v' case Hex: if prefix0x(val) { verb = 'v' } else { verb = 'x' } case Oct: verb = 'o' case Hex + Oct: if prefix0(val) { verb = 'v' } else { return ErrAmbiguousInt } } if verb == 0 { return ErrUnsupportedMode } return ScanFully(intptr, val, verb) } ================================================ FILE: modules/gcfg/types/int_test.go ================================================ package types import ( "reflect" "testing" ) func elem(p any) any { return reflect.ValueOf(p).Elem().Interface() } func TestParseInt(t *testing.T) { for _, tt := range []struct { val string mode IntMode exp any ok bool }{ {"0", Dec, int(0), true}, {"10", Dec, int(10), true}, {"-10", Dec, int(-10), true}, {"x", Dec, int(0), false}, {"0xa", Hex, int(0xa), true}, {"a", Hex, int(0xa), true}, {"10", Hex, int(0x10), true}, {"-0xa", Hex, int(-0xa), true}, {"-a", Hex, int(-0xa), true}, {"-10", Hex, int(-0x10), true}, {"x", Hex, int(0), false}, {"10", Oct, int(010), true}, {"010", Oct, int(010), true}, {"-10", Oct, int(-010), true}, {"-010", Oct, int(-010), true}, {"10", Dec | Hex, int(10), true}, {"010", Dec | Hex, int(10), true}, {"0x10", Dec | Hex, int(0x10), true}, {"10", Dec | Oct, int(10), true}, {"010", Dec | Oct, int(010), true}, {"0x10", Dec | Oct, int(0), false}, {"10", Hex | Oct, int(0), false}, // need prefix to distinguish Hex/Oct {"010", Hex | Oct, int(010), true}, {"0x10", Hex | Oct, int(0x10), true}, {"10", Dec | Hex | Oct, int(10), true}, {"010", Dec | Hex | Oct, int(010), true}, {"0x10", Dec | Hex | Oct, int(0x10), true}, } { typ := reflect.TypeOf(tt.exp) res := reflect.New(typ).Interface() err := ParseInt(res, tt.val, tt.mode) switch { case tt.ok && err != nil: t.Errorf("ParseInt(%v, %#v, %v): fail; got error %v, want ok", typ, tt.val, tt.mode, err) case !tt.ok && err == nil: t.Errorf("ParseInt(%v, %#v, %v): fail; got %v, want error", typ, tt.val, tt.mode, elem(res)) case tt.ok && !reflect.DeepEqual(elem(res), tt.exp): t.Errorf("ParseInt(%v, %#v, %v): fail; got %v, want %v", typ, tt.val, tt.mode, elem(res), tt.exp) default: t.Logf("ParseInt(%v, %#v, %s): pass; got %v, error %v", typ, tt.val, tt.mode, elem(res), err) } } } ================================================ FILE: modules/gcfg/types/scan.go ================================================ package types import ( "errors" "fmt" "io" "reflect" ) // ScanFully uses fmt.Sscanf with verb to fully scan val into ptr. func ScanFully(ptr any, val string, verb byte) error { t := reflect.ValueOf(ptr).Elem().Type() // attempt to read extra bytes to make sure the value is consumed var b []byte n, err := fmt.Sscanf(val, "%"+string(verb)+"%s", ptr, &b) switch { case n < 1 || n == 1 && !errors.Is(err, io.EOF): return fmt.Errorf("failed to parse %q as %v: %w", val, t, err) case n > 1: return fmt.Errorf("failed to parse %q as %v: extra characters %q", val, t, string(b)) } // n == 1 && err == io.EOF return nil } ================================================ FILE: modules/gcfg/types/scan_test.go ================================================ package types import ( "reflect" "testing" ) func TestScanFully(t *testing.T) { for _, tt := range []struct { val string verb byte res any ok bool }{ {"a", 'v', int(0), false}, {"0x", 'd', int(0), false}, } { d := reflect.New(reflect.TypeOf(tt.res)).Interface() err := ScanFully(d, tt.val, tt.verb) switch { case tt.ok && err != nil: t.Errorf("ScanFully(%T, %q, '%c'): want ok, got error %v", d, tt.val, tt.verb, err) case !tt.ok && err == nil: t.Errorf("ScanFully(%T, %q, '%c'): want error, got %v", d, tt.val, tt.verb, elem(d)) case tt.ok && err == nil && !reflect.DeepEqual(tt.res, elem(d)): t.Errorf("ScanFully(%T, %q, '%c'): want %v, got %v", d, tt.val, tt.verb, tt.res, elem(d)) default: t.Logf("ScanFully(%T, %q, '%c') = %v; *ptr==%v", d, tt.val, tt.verb, err, elem(d)) } } } ================================================ FILE: modules/git/branch.go ================================================ package git import ( "bufio" "context" "errors" "fmt" "strings" "github.com/antgroup/hugescm/modules/command" ) func JoinBranchPrefix(b string) string { if strings.HasPrefix(b, refHeadPrefix) { return b } return refHeadPrefix + b } func JoinBranchRev(r string) string { if ValidateHexLax(r) { return r } if strings.HasPrefix(r, refPrefix) { return r } return refHeadPrefix + r } var ( ErrDetachedHEAD = errors.New("detached HEAD") ) // RevParseCurrentName: resolve the reference pointed to by HEAD func RevParseCurrentName(ctx context.Context, environ []string, repoPath string) (string, error) { // git symbolic-ref HEAD stderr := command.NewStderr() var stdout strings.Builder cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: repoPath, Environ: environ, Stderr: stderr, Stdout: &stdout, }, "git", "symbolic-ref", "HEAD") if err := cmd.Run(); err != nil { message := strings.TrimSpace(stderr.String()) if strings.Contains(message, "is not a symbolic ref") { return ReferenceNameDefault, ErrDetachedHEAD } if len(message) != 0 { err = errors.New(message) } return ReferenceNameDefault, err } symref, trailing, ok := strings.Cut(stdout.String(), "\n") if !ok { return ReferenceNameDefault, errors.New("expected symbolic reference to be terminated by newline") } if len(trailing) > 0 { return ReferenceNameDefault, errors.New("symbolic reference has trailing data") } return symref, nil } // RevParseCurrent parse HEAD return hash and refname func RevParseCurrent(ctx context.Context, environ []string, repoPath string) (refname string, hash string, err error) { if refname, err = RevParseCurrentName(ctx, environ, repoPath); err != nil { if !errors.Is(err, ErrDetachedHEAD) { return } refname = "HEAD" // git checkout commit } stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{RepoPath: repoPath, Environ: environ, Stderr: stderr}, "git", "rev-parse", "--verify", "--end-of-options", refname) if hash, err = cmd.OneLine(); err != nil { if message := strings.TrimSpace(stderr.String()); len(message) != 0 { err = errors.New(message) } return ReferenceNameDefault, "", err } return refname, hash, nil } // SymReferenceLink: Update default branch or current branch func SymReferenceLink(ctx context.Context, repoPath string, refname string) error { cmd := command.New(ctx, repoPath, "git", "symbolic-ref", "HEAD", refname) if err := cmd.RunEx(); err != nil { return err } return nil } func FindBranch(ctx context.Context, repoPath string, name string) (*Reference, error) { stderr := command.NewStderr() reader, err := NewReader(ctx, &command.RunOpts{RepoPath: repoPath, Stderr: stderr}, "branch", "-l", "--format", ReferenceLineFormat, "--", name) if err != nil { return nil, err } defer reader.Close() // nolint scanner := bufio.NewScanner(reader) if scanner.Scan() { return ParseOneReference(scanner.Text()) } return nil, NewBranchNotFound(name) } var BranchFormatFields = []string{ "%(refname)", "%(refname:short)", "%(objectname)", "%(tree)", "%(contents:subject)", "%(authorname)", "%(authoremail)", "%(authordate:iso-strict)", "%(committername)", "%(committeremail)", "%(committerdate:iso-strict)", } func ParseBranchLineEx(referenceLine string) (*ReferenceEx, error) { elements := strings.SplitN(referenceLine, "\x00", len(BranchFormatFields)) if len(elements) != len(BranchFormatFields) { return nil, fmt.Errorf("invalid output from git for-each-ref command: %v", referenceLine) } cc := &Commit{ Hash: elements[2], Tree: elements[3], Message: elements[4], Author: Signature{ Name: elements[5], Email: elements[6], When: PareTimeFallback(elements[7]), }, Committer: Signature{ Name: elements[8], Email: elements[9], When: PareTimeFallback(elements[10]), }, } return &ReferenceEx{ Name: ReferenceName(elements[0]), Commit: cc, ShortName: elements[1]}, nil } ================================================ FILE: modules/git/command.go ================================================ package git import ( "context" "errors" "io" "github.com/antgroup/hugescm/modules/command" ) type commandReader struct { cmd *command.Command reader io.ReadCloser } func (c *commandReader) Read(p []byte) (int, error) { if c.reader == nil { panic("command has no reader") } return c.reader.Read(p) } func (c *commandReader) Close() (err error) { if c.reader != nil { _ = c.reader.Close() } return c.cmd.Wait() } // NewReaderFromOptions new git command as a reader func NewReader(ctx context.Context, opt *command.RunOpts, arg ...string) (io.ReadCloser, error) { if opt.Stdout != nil { return nil, errors.New("exec: Stdout should be nil") } cmdArgs := append([]string{"--git-dir", opt.RepoPath}, arg...) cmd := command.NewFromOptions(ctx, opt, "git", cmdArgs...) stdout, err := cmd.StdoutPipe() if err != nil { return nil, err } if err := cmd.Start(); err != nil { _ = stdout.Close() return nil, err } return &commandReader{cmd: cmd, reader: stdout}, nil } ================================================ FILE: modules/git/commit.go ================================================ package git import ( "bufio" "context" "fmt" "io" "strconv" "strings" "github.com/antgroup/hugescm/modules/command" ) // ExtraHeader encapsulates a key-value pairing of header key to header value. // It is stored as a struct{string, string} in memory as opposed to a // map[string]string to maintain ordering in a byte-for-byte encode/decode round // trip. type ExtraHeader struct { // K is the header key, or the first run of bytes up until a ' ' (\x20) // character. K string `json:"k"` // V is the header value, or the remaining run of bytes in the line, // stripping off the above "K" field as a prefix. V string `json:"v"` } type Commit struct { // Hash of the commit object. Hash string `json:"hash"` // Tree is the hash of the root tree of the commit. Tree string `json:"tree"` // Parents are the hashes of the parent commits of the commit. Parents []string `json:"parents"` // Author is the original author of the commit. Author Signature `json:"author"` // Committer is the one performing the commit, might be different from // Author. Committer Signature `json:"committer"` // ExtraHeaders stores headers not listed above, for instance // "encoding", "gpgsig", or "mergetag" (among others). ExtraHeaders []*ExtraHeader `json:"extra_header,omitempty"` // Message is the commit message, contains arbitrary text. Message string `json:"message"` size int64 } func (c *Commit) Size() int64 { return c.size } func (c *Commit) Signature() string { for _, e := range c.ExtraHeaders { if e.K == "gpgsig" { return e.V } } return "" } // CommitGPGSignature represents a git commit signature part. type CommitGPGSignature struct { Signature string Payload string // TODO check if can be reconstruct from the rest of commit information to not have duplicate data } func (c *Commit) ExtractCommitGPGSignature() *CommitGPGSignature { var signature string for _, e := range c.ExtraHeaders { if e.K == "gpgsig" { signature = e.V } } if len(signature) == 0 { return nil } var w strings.Builder var err error if _, err = fmt.Fprintf(&w, "tree %s\n", c.Tree); err != nil { return nil } for _, parent := range c.Parents { if _, err = fmt.Fprintf(&w, "parent %s\n", parent); err != nil { return nil } } if _, err = fmt.Fprint(&w, "author "); err != nil { return nil } if err = c.Author.Encode(&w); err != nil { return nil } if _, err = fmt.Fprint(&w, "\ncommitter "); err != nil { return nil } if err = c.Committer.Encode(&w); err != nil { return nil } if _, err = fmt.Fprintf(&w, "\n\n%s", c.Message); err != nil { return nil } return &CommitGPGSignature{ Signature: signature, Payload: w.String()} } func (c *Commit) Decode(hash string, reader io.Reader, size int64) error { c.Hash = hash c.size = size r, ok := reader.(*bufio.Reader) if !ok { r = bufio.NewReader(reader) } var message strings.Builder var finishedHeaders bool for { line, readErr := r.ReadString('\n') if readErr != nil && readErr != io.EOF { return readErr } text := strings.TrimSuffix(line, "\n") if len(text) == 0 && !finishedHeaders { finishedHeaders = true continue } if !finishedHeaders { // Check if this is a continuation line (starts with space) // Do this before strings.Cut to avoid unnecessary parsing if len(text) > 0 && text[0] == ' ' && len(c.ExtraHeaders) != 0 { last := c.ExtraHeaders[len(c.ExtraHeaders)-1] last.V += "\n" + text[1:] continue } key, value, ok := strings.Cut(text, " ") switch key { case "tree": if !ok || len(value) == 0 { continue } c.Tree = value case "parent": if !ok || len(value) == 0 { continue } c.Parents = append(c.Parents, value) case "author": if !ok || len(value) == 0 { continue } c.Author.Decode([]byte(value)) case "committer": if !ok || len(value) == 0 { continue } c.Committer.Decode([]byte(value)) default: // Skip malformed header lines (no space separator) or empty key if !ok || len(key) == 0 { continue } // New header c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{ K: key, V: value, }) } } else { _, _ = message.WriteString(line) } if readErr == io.EOF { break } } c.Message = message.String() return nil } func (c *Commit) Subject() string { if i := strings.IndexAny(c.Message, "\r\n"); i != -1 { return c.Message[0:i] } return c.Message } func RevUniqueList(ctx context.Context, repoPath string, ours, theirs string) ([]string, error) { stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: repoPath, Stderr: stderr, }, "git", "rev-list", "--cherry-pick", "--right-only", "--no-merges", "--topo-order", "--reverse", fmt.Sprintf("%s...%s", ours, theirs), ) stdout, err := cmd.StdoutPipe() if err != nil { return nil, err } defer stdout.Close() // nolint if err := cmd.Start(); err != nil { return nil, err } var todoList []string scanner := bufio.NewScanner(stdout) for scanner.Scan() { todoList = append(todoList, strings.TrimSpace(scanner.Text())) } if err := cmd.Wait(); err != nil { return nil, fmt.Errorf("rev-list error: %w stderr: %v", err, stderr.String()) } if err := scanner.Err(); err != nil { return nil, fmt.Errorf("scanning rev-list output: %w", err) } return todoList, nil } func RevDivergingCount(ctx context.Context, repoPath string, from, to string) (int, int, error) { psArgs := []string{"rev-list", "--count", "--left-right"} psArgs = append(psArgs, fmt.Sprintf("%s...%s", from, to)) stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{ Stderr: stderr, RepoPath: repoPath, }, "git", psArgs...) line, err := cmd.OneLine() if err != nil { return 0, 0, err } counts := strings.Fields(line) if len(counts) != 2 { return 0, 0, fmt.Errorf("invalid output from git rev-list --left-right: %v", line) } left, err := strconv.ParseInt(counts[0], 10, 32) if err != nil { return 0, 0, fmt.Errorf("invalid left count value: %v", counts[0]) } right, err := strconv.ParseInt(counts[1], 10, 32) if err != nil { return 0, 0, fmt.Errorf("invalid right count value: %v", counts[1]) } return int(left), int(right), nil } ================================================ FILE: modules/git/commit_test.go ================================================ package git import ( "strings" "testing" ) // TestCommitDecodeWithMultipleParents tests decoding with multiple parents func TestCommitDecodeWithMultipleParents(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2 parent b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3 parent c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4 author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 test message` commit := new(Commit) err := commit.Decode("test", strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Decode error: %v", err) } if len(commit.Parents) != 3 { t.Errorf("Expected 3 parents, got %d", len(commit.Parents)) } } // TestCommitDecodeWithSpecialCharacters tests decoding with special characters func TestCommitDecodeWithSpecialCharacters(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author 张三 1337892984 +0800 committer 张三 1337892984 +0800 custom value with spaces & special!@#$%^&*()_+-=[]{}|;':",./<>? test message with 中文 and 日本語` commit := new(Commit) err := commit.Decode("test", strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Error: %v", err) } if !strings.Contains(commit.Author.String(), "张三") { t.Errorf("Expected to contain '张三' in author") } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected %v, got %v", 1, len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "custom" { t.Errorf("Expected %v, got %v", "custom", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "value with spaces & special!@#$%^&*()_+-=[]{}|;':\",./<>?" { t.Errorf("Expected %v, got %v", "value with spaces & special!@#$%^&*()_+-=[]{}|;':\",./<>?", commit.ExtraHeaders[0].V) } if !strings.Contains(commit.Message, "中文") { t.Errorf("Expected message to contain '中文'") } if !strings.Contains(commit.Message, "日本語") { t.Errorf("Expected message to contain '日本語'") } } // TestCommitDecodeWithExtraHeaderBeforeStandard tests extra header before standard headers func TestCommitDecodeWithExtraHeaderBeforeStandard(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb custom extra header before standard author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 test message` commit := new(Commit) err := commit.Decode("test", strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Error: %v", err) } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected %v, got %v", 1, len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "custom" { t.Errorf("Expected %v, got %v", "custom", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "extra header before standard" { t.Errorf("Expected %v, got %v", "extra header before standard", commit.ExtraHeaders[0].V) } } // TestCommitDecodeWithComplexHeaders tests complex multi-line headers func TestCommitDecodeWithComplexHeaders(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent b343c8beec664ef6f0e9964d3001c7c7966331ae author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 mergetag object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd type commit tag random tagger J. Roe 1337889148 -0600 Random changes` commit := new(Commit) err := commit.Decode("test", strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Error: %v", err) } // Verify ExtraHeaders if len(commit.ExtraHeaders) != 1 { t.Fatalf("Expected %v, got %v", 1, len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "mergetag" { t.Fatalf("Expected %v, got %v", "mergetag", commit.ExtraHeaders[0].K) } if !strings.Contains(commit.ExtraHeaders[0].V, "object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd") { t.Errorf("Expected to contain 'object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd'") } if !strings.Contains(commit.ExtraHeaders[0].V, "type commit") { t.Errorf("Expected to contain 'type commit'") } if !strings.Contains(commit.ExtraHeaders[0].V, "tag random") { t.Errorf("Expected to contain 'tag random'") } if !strings.Contains(commit.ExtraHeaders[0].V, "tagger J. Roe 1337889148 -0600") { t.Errorf("Expected to contain 'tagger J. Roe 1337889148 -0600'") } } ================================================ FILE: modules/git/config/config.go ================================================ package config // New creates a new config instance. func New() *Config { return &Config{} } // Config contains all the sections, comments and includes from a config file. type Config struct { Comment *Comment Sections Sections Includes Includes } // Includes is a list of Includes in a config file. type Includes []*Include // Include is a reference to an included config file. type Include struct { Path string Config *Config } // Comment string without the prefix '#' or ';'. type Comment string const ( // NoSubsection token is passed to Config.Section and Config.SetSection to // represent the absence of a section. NoSubsection = "" ) // Section returns a existing section with the given name or creates a new one. func (c *Config) Section(name string) *Section { for i := len(c.Sections) - 1; i >= 0; i-- { s := c.Sections[i] if s.IsName(name) { return s } } s := &Section{Name: name} c.Sections = append(c.Sections, s) return s } // HasSection checks if the Config has a section with the specified name. func (c *Config) HasSection(name string) bool { for _, s := range c.Sections { if s.IsName(name) { return true } } return false } // RemoveSection removes a section from a config file. func (c *Config) RemoveSection(name string) *Config { result := Sections{} for _, s := range c.Sections { if !s.IsName(name) { result = append(result, s) } } c.Sections = result return c } // RemoveSubsection remove a subsection from a config file. func (c *Config) RemoveSubsection(section string, subsection string) *Config { for _, s := range c.Sections { if s.IsName(section) { result := Subsections{} for _, ss := range s.Subsections { if !ss.IsName(subsection) { result = append(result, ss) } } s.Subsections = result } } return c } // AddOption adds an option to a given section and subsection. Use the // NoSubsection constant for the subsection argument if no subsection is wanted. func (c *Config) AddOption(section string, subsection string, key string, value string) *Config { if subsection == "" { c.Section(section).AddOption(key, value) } else { c.Section(section).Subsection(subsection).AddOption(key, value) } return c } // SetOption sets an option to a given section and subsection. Use the // NoSubsection constant for the subsection argument if no subsection is wanted. func (c *Config) SetOption(section string, subsection string, key string, value string) *Config { if subsection == "" { c.Section(section).SetOption(key, value) } else { c.Section(section).Subsection(subsection).SetOption(key, value) } return c } func (c *Config) HashFormat() string { if c.HasSection("extensions") { if shaFormat := c.Section("extensions").Option("objectformat"); len(shaFormat) != 0 { return shaFormat } } return "sha1" } func (c *Config) ReferencesFormat() string { if c.HasSection("extensions") { if refFormat := c.Section("extensions").Option("refstorage"); len(refFormat) != 0 { return refFormat } } return "files" } ================================================ FILE: modules/git/config/decoder.go ================================================ package config import ( "io" "os" "path/filepath" "github.com/antgroup/hugescm/modules/gcfg" ) // A Decoder reads and decodes config files from an input stream. type Decoder struct { io.Reader } // NewDecoder returns a new decoder that reads from r. func NewDecoder(r io.Reader) *Decoder { return &Decoder{r} } // Decode reads the whole config from its input and stores it in the // value pointed to by config. func (d *Decoder) Decode(config *Config) error { cb := func(s string, ss string, k string, v string, bv bool) error { if ss == "" && k == "" { config.Section(s) return nil } if ss != "" && k == "" { config.Section(s).Subsection(ss) return nil } config.AddOption(s, ss, k, v) return nil } return gcfg.ReadWithCallback(d, cb) } func BareDecode(repoPath string) (*Config, error) { file := filepath.Join(repoPath, "config") fd, err := os.Open(file) if err != nil { return nil, err } defer fd.Close() // nolint cfg := New() if err := NewDecoder(fd).Decode(cfg); err != nil { return nil, err } return cfg, nil } ================================================ FILE: modules/git/config/option.go ================================================ package config import ( "fmt" "slices" "strings" ) // Option defines a key/value entity in a config file. type Option struct { // Key preserving original caseness. // Use IsKey instead to compare key regardless of caseness. Key string // Original value as string, could be not normalized. Value string } type Options []*Option // IsKey returns true if the given key matches // this option's key in a case-insensitive comparison. func (o *Option) IsKey(key string) bool { return strings.EqualFold(o.Key, key) } func (opts Options) GoString() string { var strs []string for _, opt := range opts { strs = append(strs, fmt.Sprintf("%#v", opt)) } return strings.Join(strs, ", ") } // Get gets the value for the given key if set, // otherwise it returns the empty string. // // # Note that there is no difference // // This matches git behaviour since git v1.8.1-rc1, // if there are multiple definitions of a key, the // last one wins. // // See: http://article.gmane.org/gmane.linux.kernel/1407184 // // In order to get all possible values for the same key, // use GetAll. func (opts Options) Get(key string) string { for i := len(opts) - 1; i >= 0; i-- { o := opts[i] if o.IsKey(key) { return o.Value } } return "" } // Has checks if an Option exist with the given key. func (opts Options) Has(key string) bool { for _, o := range opts { if o.IsKey(key) { return true } } return false } // GetAll returns all possible values for the same key. func (opts Options) GetAll(key string) []string { result := []string{} for _, o := range opts { if o.IsKey(key) { result = append(result, o.Value) } } return result } func (opts Options) withoutOption(key string) Options { result := Options{} for _, o := range opts { if !o.IsKey(key) { result = append(result, o) } } return result } func (opts Options) withAddedOption(key string, value string) Options { return append(opts, &Option{key, value}) } func (opts Options) withSettedOption(key string, values ...string) Options { var result Options var added []string for _, o := range opts { if !o.IsKey(key) { result = append(result, o) continue } if slices.Contains(values, o.Value) { added = append(added, o.Value) result = append(result, o) continue } } for _, value := range values { if slices.Contains(added, value) { continue } result = result.withAddedOption(key, value) } return result } ================================================ FILE: modules/git/config/section.go ================================================ package config import ( "fmt" "strings" ) // Section is the representation of a section inside git configuration files. // Each Section contains Options that are used by both the Git plumbing // and the porcelains. // Sections can be further divided into subsections. To begin a subsection // put its name in double quotes, separated by space from the section name, // in the section header, like in the example below: // // [section "subsection"] // // All the other lines (and the remainder of the line after the section header) // are recognized as option variables, in the form "name = value" (or just name, // which is a short-hand to say that the variable is the boolean "true"). // The variable names are case-insensitive, allow only alphanumeric characters // and -, and must start with an alphabetic character: // // [section "subsection1"] // option1 = value1 // option2 // [section "subsection2"] // option3 = value2 type Section struct { Name string Options Options Subsections Subsections } type Subsection struct { Name string Options Options } type Sections []*Section func (s Sections) GoString() string { var strs []string for _, ss := range s { strs = append(strs, fmt.Sprintf("%#v", ss)) } return strings.Join(strs, ", ") } type Subsections []*Subsection func (s Subsections) GoString() string { var strs []string for _, ss := range s { strs = append(strs, fmt.Sprintf("%#v", ss)) } return strings.Join(strs, ", ") } // IsName checks if the name provided is equals to the Section name, case insensitive. func (s *Section) IsName(name string) bool { return strings.EqualFold(s.Name, name) } // Subsection returns a Subsection from the specified Section. If the // Subsection does not exists, new one is created and added to Section. func (s *Section) Subsection(name string) *Subsection { for i := len(s.Subsections) - 1; i >= 0; i-- { ss := s.Subsections[i] if ss.IsName(name) { return ss } } ss := &Subsection{Name: name} s.Subsections = append(s.Subsections, ss) return ss } // HasSubsection checks if the Section has a Subsection with the specified name. func (s *Section) HasSubsection(name string) bool { for _, ss := range s.Subsections { if ss.IsName(name) { return true } } return false } // RemoveSubsection removes a subsection from a Section. func (s *Section) RemoveSubsection(name string) *Section { result := Subsections{} for _, s := range s.Subsections { if !s.IsName(name) { result = append(result, s) } } s.Subsections = result return s } // Option returns the value for the specified key. Empty string is returned if // key does not exists. func (s *Section) Option(key string) string { return s.Options.Get(key) } // OptionAll returns all possible values for an option with the specified key. // If the option does not exists, an empty slice will be returned. func (s *Section) OptionAll(key string) []string { return s.Options.GetAll(key) } // HasOption checks if the Section has an Option with the given key. func (s *Section) HasOption(key string) bool { return s.Options.Has(key) } // AddOption adds a new Option to the Section. The updated Section is returned. func (s *Section) AddOption(key string, value string) *Section { s.Options = s.Options.withAddedOption(key, value) return s } // SetOption adds a new Option to the Section. If the option already exists, is replaced. // The updated Section is returned. func (s *Section) SetOption(key string, value string) *Section { s.Options = s.Options.withSettedOption(key, value) return s } // Remove an option with the specified key. The updated Section is returned. func (s *Section) RemoveOption(key string) *Section { s.Options = s.Options.withoutOption(key) return s } // IsName checks if the name of the subsection is exactly the specified name. func (s *Subsection) IsName(name string) bool { return s.Name == name } // Option returns an option with the specified key. If the option does not exists, // empty spring will be returned. func (s *Subsection) Option(key string) string { return s.Options.Get(key) } // OptionAll returns all possible values for an option with the specified key. // If the option does not exists, an empty slice will be returned. func (s *Subsection) OptionAll(key string) []string { return s.Options.GetAll(key) } // HasOption checks if the Subsection has an Option with the given key. func (s *Subsection) HasOption(key string) bool { return s.Options.Has(key) } // AddOption adds a new Option to the Subsection. The updated Subsection is returned. func (s *Subsection) AddOption(key string, value string) *Subsection { s.Options = s.Options.withAddedOption(key, value) return s } // SetOption adds a new Option to the Subsection. If the option already exists, is replaced. // The updated Subsection is returned. func (s *Subsection) SetOption(key string, value ...string) *Subsection { s.Options = s.Options.withSettedOption(key, value...) return s } // RemoveOption removes the option with the specified key. The updated Subsection is returned. func (s *Subsection) RemoveOption(key string) *Subsection { s.Options = s.Options.withoutOption(key) return s } ================================================ FILE: modules/git/constant.go ================================================ package git import ( "crypto/sha1" "crypto/sha256" "fmt" "hash" ) const ( GIT_HASH_UNKNOWN = 0 GIT_HASH_SHA1 = 1 GIT_HASH_SHA256 = 2 GIT_SHA1_RAWSZ = 20 GIT_SHA1_HEXSZ = GIT_SHA1_RAWSZ * 2 GIT_SHA256_RAWSZ = 32 GIT_SHA256_HEXSZ = GIT_SHA256_RAWSZ * 2 GIT_MAX_RAWSZ = GIT_SHA256_RAWSZ GIT_MAX_HEXSZ = GIT_SHA256_HEXSZ GIT_SHA1_ZERO_HEX = "0000000000000000000000000000000000000000" GIT_SHA256_ZERO_HEX = "0000000000000000000000000000000000000000000000000000000000000000" GIT_SHA1_EMPTY_TREE = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" GIT_SHA1_EMPTY_BLOB = "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391" GIT_SHA256_EMPTY_TREE = "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321" GIT_SHA256_EMPTY_BLOB = "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813" GIT_SHA1_NAME = "sha1" GIT_SHA256_NAME = "sha256" HashKey = "hash-algo" ReferenceNameDefault = "refs/heads/master" ) const ( reverseHexTable = "" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ) // var ( // sha1Regex = regexp.MustCompile(`\A[0-9a-f]{40}\z`) // sha256Regex = regexp.MustCompile(`\A[0-9a-f]{64}\z`) // ) func ValidateHexLax(hs string) bool { bs := []byte(hs) if len(bs) < 5 || len(bs) > GIT_SHA256_HEXSZ { return false } for _, b := range bs { if c := reverseHexTable[b]; c > 0x0f { return false } } return true } func ValidateNumber(s string) bool { bs := []byte(s) for _, b := range bs { if c := reverseHexTable[b]; c > 0x9 { return false } } return true } func ValidateHex(hs string) error { bs := []byte(hs) if len(bs) != GIT_SHA1_HEXSZ && len(bs) != GIT_SHA256_HEXSZ { return fmt.Errorf("object id: %q was not a valid character hexadecimal, len=%d", hs, len(bs)) } for _, b := range bs { if c := reverseHexTable[b]; c > 0x0f { return fmt.Errorf("object id: %q was not a valid character hexadecimal", hs) } } return nil } func IsValidateSHA256(hs string) bool { if len(hs) != GIT_SHA256_HEXSZ { return false } bs := []byte(hs) for _, b := range bs { if c := reverseHexTable[b]; c > 0x0f { return false } } return true } func IsHashZero(hexOID string) bool { if len(hexOID) == GIT_SHA256_HEXSZ { return hexOID == GIT_SHA256_ZERO_HEX } return hexOID == GIT_SHA1_ZERO_HEX } func ConformingHashZero(hexOID string) string { if len(hexOID) == GIT_SHA256_HEXSZ { return GIT_SHA256_ZERO_HEX } return GIT_SHA1_ZERO_HEX } func ConformingEmptyTree(hexOID string) string { if len(hexOID) == GIT_SHA256_HEXSZ { return GIT_SHA256_EMPTY_TREE } return GIT_SHA1_EMPTY_TREE } func ConformingEmptyBlob(hexOID string) string { if len(hexOID) == GIT_SHA256_HEXSZ { return GIT_SHA256_EMPTY_BLOB } return GIT_SHA1_EMPTY_BLOB } // HashFormat: https://git-scm.com/docs/hash-function-transition/ type HashFormat int const ( HashUNKNOWN HashFormat = iota // UNKNOWN HashSHA1 // SHA1 HashSHA256 // SHA256 ) func (h HashFormat) String() string { switch h { case HashSHA1: return GIT_SHA1_NAME case HashSHA256: return GIT_SHA256_NAME } return "unknown" } // RawSize: raw length func (h HashFormat) RawSize() int { switch h { case HashSHA1: return GIT_SHA1_RAWSZ case HashSHA256: return GIT_SHA256_RAWSZ } return 0 } // HexSize: hex size func (h HashFormat) HexSize() int { switch h { case HashSHA1: return GIT_SHA1_HEXSZ case HashSHA256: return GIT_SHA256_HEXSZ } return 0 } func (h HashFormat) EmptyTreeID() string { switch h { case HashSHA1: return GIT_SHA1_EMPTY_TREE case HashSHA256: return GIT_SHA256_EMPTY_TREE } return "" } func (h HashFormat) EmptyBlobID() string { switch h { case HashSHA1: return GIT_SHA1_EMPTY_BLOB case HashSHA256: return GIT_SHA256_EMPTY_BLOB } return "" } func (h HashFormat) ZeroOID() string { switch h { case HashSHA1: return GIT_SHA1_ZERO_HEX case HashSHA256: return GIT_SHA256_ZERO_HEX } return "" } func (h HashFormat) Hasher() hash.Hash { switch h { case HashSHA1: return sha1.New() case HashSHA256: return sha256.New() } return sha1.New() } func HashFormatFromName(algo string) HashFormat { switch algo { case GIT_SHA1_NAME: return HashSHA1 case GIT_SHA256_NAME: return HashSHA256 } return HashSHA1 } func HashFormatFromSize(size int) HashFormat { switch size { case GIT_SHA1_HEXSZ: return HashSHA1 case GIT_SHA256_HEXSZ: return HashSHA256 } return HashUNKNOWN } func HashFormatFromBinarySize(bsize int) HashFormat { switch bsize { case GIT_SHA1_RAWSZ: return HashSHA1 case GIT_SHA256_RAWSZ: return HashSHA256 } return HashUNKNOWN } ================================================ FILE: modules/git/decode.go ================================================ package git import ( "bufio" "bytes" "context" "errors" "fmt" "io" "strconv" "strings" "github.com/antgroup/hugescm/modules/command" ) const ( // contentsCommand is the command expected by the `--batch-command` mode of git-cat-file(1) // for reading an objects contents. contentsCommand = "contents" // infoCommand is the command expected by the `--batch-command` mode of git-cat-file(1) // for reading an objects info. infoCommand = "info" // Used with --buffer to execute all preceding commands that were issued since the beginning or since the last flush was issued. // When --buffer is used, no output will come until a flush is issued. // When --buffer is not used, commands are flushed each time without issuing flush. flushCommand = "flush" ) type Decoder struct { stdout *bufio.Reader stdin *bufio.Writer cleanup func() } func NewDecoder(ctx context.Context, repoPath string) (*Decoder, error) { stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{Stderr: stderr}, "git", "--git-dir", repoPath, "cat-file", "--batch-command", "--buffer") stdout, err := cmd.StdoutPipe() if err != nil { return nil, err } stdin, err := cmd.StdinPipe() if err != nil { _ = stdout.Close() return nil, err } if err := cmd.Start(); err != nil { _ = stdout.Close() _ = stdin.Close() return nil, err } return &Decoder{ stdout: bufio.NewReader(stdout), stdin: bufio.NewWriter(stdin), cleanup: func() { _ = stdin.Close() _ = stdout.Close() _ = cmd.Wait() // if err := cmd.Wait(); err != nil { // logrus.Infof("stderr: %s", stderr.String()) // } }}, nil } func (d *Decoder) Close() error { if d.cleanup != nil { d.cleanup() } return nil } func (d *Decoder) flush() error { if _, err := d.stdin.WriteString(flushCommand); err != nil { return fmt.Errorf("writing flush command: %w", err) } if err := d.stdin.WriteByte('\n'); err != nil { return fmt.Errorf("terminating flush command: %w", err) } if err := d.stdin.Flush(); err != nil { return fmt.Errorf("flushing: %w", err) } return nil } func (d *Decoder) readObject(cmd, revision string) error { if strings.IndexByte(revision, '\n') != -1 { return NewObjectNotFound(revision) } if _, err := d.stdin.WriteString(cmd); err != nil { return fmt.Errorf("writing cmd request: %w", err) } if err := d.stdin.WriteByte(' '); err != nil { return fmt.Errorf("terminating object request: %w", err) } if _, err := d.stdin.WriteString(revision); err != nil { return fmt.Errorf("writing object request: %w", err) } if err := d.stdin.WriteByte('\n'); err != nil { return fmt.Errorf("terminating object request: %w", err) } return nil } const ( missingSuffix = " missing" ) // readBatchLine reads the header line from cat-file --batch-command -z --buffer // We expect: // SP SP LF // sha is a 40/64byte not 20/32byte here func (d *Decoder) readBatchLine() (string, string, int64, error) { line, err := d.stdout.ReadString('\n') if err != nil { return "", "", 0, err } if len(line) == 1 { if line, err = d.stdout.ReadString('\n'); err != nil { return "", "", 0, err } } line = strings.TrimSuffix(line, "\n") if strings.HasSuffix(line, missingSuffix) { return "", "", 0, NewObjectNotFound(line[0 : len(line)-len(missingSuffix)]) } before, after, ok := strings.Cut(line, " ") if !ok { return "", "", 0, NewObjectNotFound(line) } sha := before t, sizeSz, ok := strings.Cut(after, " ") if !ok { return "", "", 0, NewObjectNotFound(sha) } size, err := strconv.ParseInt(sizeSz, 10, 64) return sha, t, size, err } func (d *Decoder) Meta(objectKey string) (*Metadata, error) { if err := d.readObject(infoCommand, objectKey); err != nil { return nil, err } if err := d.flush(); err != nil { return nil, err } oid, objectType, size, err := d.readBatchLine() if err != nil { return nil, err } t, _ := ParseObjectType(objectType) return &Metadata{Hash: oid, Type: t, Size: size}, nil } func (d *Decoder) object(objectKey string) (*Object, error) { if err := d.readObject(contentsCommand, objectKey); err != nil { return nil, err } if err := d.flush(); err != nil { return nil, err } oid, objectType, size, err := d.readBatchLine() if err != nil { return nil, err } r := io.LimitReader(d.stdout, size) t, _ := ParseObjectType(objectType) return &Object{Hash: oid, Size: size, Type: t, dataReader: r}, nil } func (d *Decoder) ObjectReader(objectKey string) (*Object, error) { return d.object(objectKey) } func (d *Decoder) Object(objectKey string) (any, error) { o, err := d.object(objectKey) if err != nil { return nil, err } if o.Type == BlobObject { return o, nil } defer o.Discard() switch o.Type { case CommitObject: c := new(Commit) if err := c.Decode(o.Hash, o, o.Size); err != nil { return nil, err } return c, nil case TagObject: t := new(Tag) if err := t.Decode(o.Hash, o, o.Size); err != nil { return nil, err } return t, nil case TreeObject: t := new(Tree) if _, err := t.Decode(o.Hash, o, o.Size); err != nil { return nil, err } return t, nil default: } return nil, &ErrUnexpectedType{message: fmt.Sprintf("unexpected object '%s' type: %s", objectKey, o.Type)} } func (d *Decoder) Tree(objectKey string) (*Tree, error) { o, err := d.object(objectKey) if err != nil { return nil, err } defer o.Discard() if o.Type != TreeObject { return nil, &ErrUnexpectedType{message: fmt.Sprintf("object '%s' type is '%s' not tree", objectKey, o.Type)} } t := new(Tree) if _, err := t.Decode(o.Hash, o, o.Size); err != nil { return nil, err } t.size = o.Size return t, nil } func (d *Decoder) Commit(objectKey string) (*Commit, error) { o, err := d.object(objectKey) if err != nil { return nil, err } defer o.Discard() if o.Type != CommitObject { return nil, &ErrUnexpectedType{message: fmt.Sprintf("object '%s' type is '%s' not commit", objectKey, o.Type)} } c := new(Commit) if err := c.Decode(o.Hash, o, o.Size); err != nil { return nil, err } return c, nil } func (d *Decoder) Blob(objectKey string) (*Object, error) { o, err := d.object(objectKey) if err != nil { return nil, err } if o.Type != BlobObject { o.Discard() return nil, &ErrUnexpectedType{message: fmt.Sprintf("object '%s' type is '%s' not blob", objectKey, o.Type)} } return o, nil } func (d *Decoder) ReadOverflow(objectKey string, limit int64) (b []byte, err error) { br, err := d.Blob(objectKey) if err != nil { return nil, err } defer br.Discard() if limit > 0 && br.Size > limit { return nil, errors.New("reading file size limit exceeded") } b, err = io.ReadAll(br.dataReader) return } func (d *Decoder) BlobEntry(revision string, path string) (*Object, error) { return d.Blob(revision + ":" + path) } func (d *Decoder) ReadEntry(revision string, path string) (*Object, error) { return d.ObjectReader(revision + ":" + path) } // ParseRev resolve peeled commit func (d *Decoder) ParseRev(objectKey string) (*Commit, error) { oid := objectKey for { o, err := d.object(oid) if err != nil { return nil, err } switch o.Type { case CommitObject: c := new(Commit) if err := c.Decode(o.Hash, o, o.Size); err != nil { return nil, err } return c, nil case TagObject: t := new(Tag) if err := t.Decode(o.Hash, o, o.Size); err != nil { return nil, err } t.size = o.Size oid = t.Object default: o.Discard() return nil, &ErrUnexpectedType{message: fmt.Sprintf("object '%s' type is '%s' not commit", oid, o.Type)} } } } func (d *Decoder) ExhaustiveMeta(location string) (*Metadata, error) { bs := []byte(location) for i := 0; i < len(location); { pos := bytes.IndexByte(bs[i:], '/') if pos == -1 { return d.Meta(location) } bs[pos+i] = ':' m, err := d.Meta(string(bs)) if err == nil { return m, nil } if !IsErrNotExist(err) { return nil, err } bs[pos+i] = '/' i += pos + 1 } return nil, NewObjectNotFound(location) } // ExhaustiveObjectReader: Exhaustive read object // // Can two branches 'a' and 'a/b' exist at the same time in git? Normally, this is impossible, // but when we manually edit packed-refs, we can create 'a' and 'a/b' at the same time, // because packed-refs has no file system restrictions, of course this will Annoys git, // so it's not recommended, in the 'Exhaustive*' functions, we don't care about this unusual case. func (d *Decoder) ExhaustiveObjectReader(location string) (*Object, error) { bs := []byte(location) for i := 0; i < len(location); { pos := bytes.IndexByte(bs[i:], '/') if pos == -1 { return d.ObjectReader(location) } bs[pos+i] = ':' obj, err := d.ObjectReader(string(bs)) if err == nil { return obj, nil } if !IsErrNotExist(err) { return nil, err } bs[pos+i] = '/' i += pos + 1 } return nil, NewObjectNotFound(location) } func ParseRev(ctx context.Context, repoPath string, revision string) (*Commit, error) { d, err := NewDecoder(ctx, repoPath) if err != nil { return nil, err } defer d.Close() // nolint return d.ParseRev(revision) } ================================================ FILE: modules/git/error.go ================================================ package git import ( "errors" "fmt" "strings" ) // ErrNotExist commit not exist error type ErrNotExist struct { message string } // IsErrNotExist if some error is ErrNotExist func IsErrNotExist(err error) bool { var e *ErrNotExist return errors.As(err, &e) } func (err *ErrNotExist) Error() string { return err.message } func NewObjectNotFound(oid string) error { return &ErrNotExist{message: fmt.Sprintf("object '%s' does not exist", oid)} } func NewBranchNotFound(branch string) error { return &ErrNotExist{message: fmt.Sprintf("branch '%s' does not exist ", branch)} } var ( ErrNoBranches = NewBranchNotFound("HEAD") ) func NewTagNotFound(branch string) error { return &ErrNotExist{message: fmt.Sprintf("tag '%s' does not exist ", branch)} } func NewRevisionNotFound(branch string) error { return &ErrNotExist{message: fmt.Sprintf("revision '%s' does not exist ", branch)} } type ErrUnexpectedType struct { message string } func (e *ErrUnexpectedType) Error() string { return e.message } func IsErrUnexpectedType(err error) bool { var e *ErrUnexpectedType return errors.As(err, &e) } var ( notFoundPrefix = []string{ "fatal: ambiguous argument", "fatal: unable to read", "fatal: bad object", "fatal: bad revision", //"fatal: unable to read tree", } ) func ErrorIsNotFound(message string) bool { for _, s := range notFoundPrefix { if strings.HasPrefix(message, s) { return true } } return false } ================================================ FILE: modules/git/filemode.go ================================================ package git import ( "encoding/binary" "errors" "fmt" "os" "strconv" "strings" "github.com/antgroup/hugescm/modules/strengthen" ) // A FileMode represents the kind of tree entries used by git. It // resembles regular file systems modes, although FileModes are // considerably simpler (there are not so many), and there are some, // like Submodule that has no file system equivalent. type FileMode uint32 const ( // Empty is used as the FileMode of tree elements when comparing // trees in the following situations: // // - the mode of tree elements before their creation. - the mode of // tree elements after their deletion. - the mode of unmerged // elements when checking the index. // // Empty has no file system equivalent. As Empty is the zero value // of FileMode, it is also returned by New and // NewFromOsNewFromOSFileMode along with an error, when they fail. Empty FileMode = 0 // Dir represent a Directory. Dir FileMode = 0040000 // Regular represent non-executable files. Please note this is not // the same as golang regular files, which include executable files. Regular FileMode = 0100644 // Deprecated represent non-executable files with the group writable // bit set. This mode was supported by the first versions of git, // but it has been deprecated nowadays. This library uses them // internally, so you can read old packfiles, but will treat them as // Regulars when interfacing with the outside world. This is the // standard git behavior. Deprecated FileMode = 0100664 // Executable represents executable files. Executable FileMode = 0100755 // Symlink represents symbolic links to files. Symlink FileMode = 0120000 // Submodule represents git submodules. This mode has no file system // equivalent. Submodule FileMode = 0160000 ) // New takes the octal string representation of a FileMode and returns // the FileMode and a nil error. If the string can not be parsed to a // 32 bit unsigned octal number, it returns Empty and the parsing error. // // Example: "40000" means Dir, "100644" means Regular. // // Please note this function does not check if the returned FileMode // is valid in git or if it is malformed. For instance, "1" will // return the malformed FileMode(1) and a nil error. func New(s string) (FileMode, error) { n, err := strconv.ParseUint(s, 8, 32) if err != nil { return Empty, err } return FileMode(n), nil } // NewFromOS returns the FileMode used by git to represent // the provided file system modes and a nil error on success. If the // file system mode cannot be mapped to any valid git mode (as with // sockets or named pipes), it will return Empty and an error. // // Note that some git modes cannot be generated from os.FileModes, like // Deprecated and Submodule; while Empty will be returned, along with an // error, only when the method fails. func NewFromOS(m os.FileMode) (FileMode, error) { if m.IsRegular() { if isSetTemporary(m) { return Empty, fmt.Errorf("no equivalent git mode for %s", m) } if isSetCharDevice(m) { return Empty, fmt.Errorf("no equivalent git mode for %s", m) } if isSetUserExecutable(m) { return Executable, nil } return Regular, nil } if m.IsDir() { return Dir, nil } if isSetSymLink(m) { return Symlink, nil } return Empty, fmt.Errorf("no equivalent git mode for %s", m) } func isSetCharDevice(m os.FileMode) bool { return m&os.ModeCharDevice != 0 } func isSetTemporary(m os.FileMode) bool { return m&os.ModeTemporary != 0 } func isSetUserExecutable(m os.FileMode) bool { return m&0100 != 0 } func isSetSymLink(m os.FileMode) bool { return m&os.ModeSymlink != 0 } // Bytes return a slice of 4 bytes with the mode in little endian // encoding. func (m FileMode) Bytes() []byte { ret := make([]byte, 4) binary.LittleEndian.PutUint32(ret, uint32(m)) return ret } // IsMalformed returns if the FileMode should not appear in a git packfile, // this is: Empty and any other mode not mentioned as a constant in this // package. func (m FileMode) IsMalformed() bool { return m != Dir && m != Regular && m != Deprecated && m != Executable && m != Symlink && m != Submodule } // String returns the FileMode as a string in the standard git format, // this is, an octal number padded with ceros to 7 digits. Malformed // modes are printed in that same format, for easier debugging. // // Example: Regular is "0100644", Empty is "0000000". func (m FileMode) String() string { return fmt.Sprintf("%07o", uint32(m)) } // IsRegular returns if the FileMode represents that of a regular file, // this is, either Regular or Deprecated. Please note that Executable // are not regular even though in the UNIX tradition, they usually are: // See the IsFile method. func (m FileMode) IsRegular() bool { return m == Regular || m == Deprecated } // IsFile returns if the FileMode represents that of a file, this is, // Regular, Deprecated, Executable or Link. func (m FileMode) IsFile() bool { return m == Regular || m == Deprecated || m == Executable || m == Symlink } type ErrMalformedMode struct { m FileMode } func (e *ErrMalformedMode) Error() string { return fmt.Sprintf("malformed mode (%s)", e.m) } func IsErrMalformedMode(err error) bool { var e *ErrMalformedMode return errors.As(err, &e) } // ToOSFileMode returns the os.FileMode to be used when creating file // system elements with the given git mode and a nil error on success. // // When the provided mode cannot be mapped to a valid file system mode // (e.g. Submodule) it returns os.FileMode(0) and an error. // // The returned file mode does not take into account the umask. func (m FileMode) ToOSFileMode() (os.FileMode, error) { switch m { case Dir: return os.ModePerm | os.ModeDir, nil case Submodule: return os.ModePerm | os.ModeDir, nil case Regular: return os.FileMode(0644), nil // Deprecated is no longer allowed: treated as a Regular instead case Deprecated: return os.FileMode(0644), nil case Executable: return os.FileMode(0755), nil case Symlink: return os.ModePerm | os.ModeSymlink, nil } return os.FileMode(0), &ErrMalformedMode{m: m} } func (m FileMode) MarshalJSON() ([]byte, error) { return strengthen.BufferCat("\"", m.String(), "\""), nil } func (m *FileMode) UnmarshalJSON(b []byte) error { s := string(b) v, err := strconv.ParseInt(strings.TrimSuffix(strings.TrimPrefix(s, "\""), "\""), 8, 64) if err != nil { return err } *m = FileMode(v) return nil } ================================================ FILE: modules/git/gitobj/LICENSE.md ================================================ MIT License Copyright (c) 2017- GitHub, Inc. and Git LFS contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/git/gitobj/README.md ================================================ # object Port from [https://github.com/git-lfs/gitobj](https://github.com/git-lfs/gitobj) ## License MIT. [1]: https://git-scm.com/book/en/v2/Git-Internals-Packfiles ================================================ FILE: modules/git/gitobj/SECURITY.md ================================================ Please see [SECURITY.md](https://github.com/git-lfs/git-lfs/blob/master/SECURITY.md) in the main Git LFS repository for information on how to report security vulnerabilities in this package. ================================================ FILE: modules/git/gitobj/VERSION ================================================ https://github.com/git-lfs/gitobj b805ee788076aa592cd2f5e0e7d09d7efd38187a ================================================ FILE: modules/git/gitobj/backend.go ================================================ package gitobj import ( "bufio" "errors" "hash" "io" "os" "path" "regexp" "strconv" "strings" "github.com/antgroup/hugescm/modules/git/gitobj/pack" "github.com/antgroup/hugescm/modules/git/gitobj/storage" ) // NewFilesystemBackend initializes a new filesystem-based backend, // optionally with additional alternates as specified in the // `alternates` variable. The syntax is that of the Git environment variable // GIT_ALTERNATE_OBJECT_DIRECTORIES. The hash algorithm used is specified by // the algo parameter. func NewFilesystemBackend(root, tmp, alternates string, algo hash.Hash) (storage.Backend, error) { fo := newFileStorer(root, tmp) packs, err := pack.NewStorage(root, algo) if err != nil { return nil, err } storage, err := findAllBackends(fo, packs, root, algo) if err != nil { return nil, err } storage, err = addAlternatesFromEnvironment(storage, alternates, algo) if err != nil { return nil, err } return &filesystemBackend{ fs: fo, backends: storage, }, nil } func findAllBackends(mainLoose *fileStorer, mainPacked *pack.Storage, root string, algo hash.Hash) ([]storage.Storage, error) { storage := make([]storage.Storage, 2) storage[0] = mainLoose storage[1] = mainPacked f, err := os.Open(path.Join(root, "info", "alternates")) if err != nil { // No alternates file, no problem. if errors.Is(err, os.ErrNotExist) { return storage, nil } return nil, err } defer f.Close() // nolint scanner := bufio.NewScanner(f) for scanner.Scan() { storage, err = addAlternateDirectory(storage, scanner.Text(), algo) if err != nil { return nil, err } } if err := scanner.Err(); err != nil { return nil, err } return storage, nil } func addAlternateDirectory(s []storage.Storage, dir string, algo hash.Hash) ([]storage.Storage, error) { s = append(s, newFileStorer(dir, "")) pack, err := pack.NewStorage(dir, algo) if err != nil { return s, err } s = append(s, pack) return s, nil } func addAlternatesFromEnvironment(s []storage.Storage, env string, algo hash.Hash) ([]storage.Storage, error) { if len(env) == 0 { return s, nil } for _, dir := range splitAlternateString(env, alternatesSeparator) { var err error s, err = addAlternateDirectory(s, dir, algo) if err != nil { return nil, err } } return s, nil } var ( octalEscape = regexp.MustCompile(`\\[0-7]{1,3}`) hexEscape = regexp.MustCompile(`\\x[0-9a-fA-F]{2}`) replacements = []struct { olds string news string }{ {`\a`, "\a"}, {`\b`, "\b"}, {`\t`, "\t"}, {`\n`, "\n"}, {`\v`, "\v"}, {`\f`, "\f"}, {`\r`, "\r"}, {`\\`, "\\"}, {`\"`, "\""}, {`\'`, "'"}, } ) func splitAlternateString(env string, separator string) []string { dirs := strings.Split(env, separator) for i, s := range dirs { if !strings.HasPrefix(s, `"`) || !strings.HasSuffix(s, `"`) { continue } // Strip leading and trailing quotation marks s = s[1 : len(s)-1] for _, repl := range replacements { s = strings.ReplaceAll(s, repl.olds, repl.news) } s = octalEscape.ReplaceAllStringFunc(s, func(inp string) string { val, _ := strconv.ParseUint(inp[1:], 8, 64) return string([]byte{byte(val)}) }) s = hexEscape.ReplaceAllStringFunc(s, func(inp string) string { val, _ := strconv.ParseUint(inp[2:], 16, 64) return string([]byte{byte(val)}) }) dirs[i] = s } return dirs } // NewMemoryBackend initializes a new memory-based backend. // // A value of "nil" is acceptable and indicates that no entries should be added // to the memory backend at construction time. func NewMemoryBackend(m map[string]io.ReadWriter) (storage.Backend, error) { return &memoryBackend{ms: newMemoryStorer(m)}, nil } type filesystemBackend struct { fs *fileStorer backends []storage.Storage } func (b *filesystemBackend) Storage() (storage.Storage, storage.WritableStorage) { return storage.MultiStorage(b.backends...), b.fs } type memoryBackend struct { ms *memoryStorer } func (b *memoryBackend) Storage() (storage.Storage, storage.WritableStorage) { return b.ms, b.ms } ================================================ FILE: modules/git/gitobj/backend_nix.go ================================================ //go:build !windows package gitobj const alternatesSeparator = ":" ================================================ FILE: modules/git/gitobj/backend_test.go ================================================ package gitobj import ( "bytes" "encoding/hex" "io" "reflect" "testing" ) func TestNewMemoryBackend(t *testing.T) { backend, err := NewMemoryBackend(nil) if err != nil { t.Errorf("Expected nil, got %v", err) } ro, rw := backend.Storage() if ro != rw { t.Errorf("Expected %v, got %v", ro, rw) } if ro.(*memoryStorer) == nil { t.Errorf("Expected non-nil") } } func TestNewMemoryBackendWithReadOnlyData(t *testing.T) { sha := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" oid, err := hex.DecodeString(sha) if err != nil { t.Errorf("Expected nil, got %v", err) } m := map[string]io.ReadWriter{ sha: bytes.NewBuffer([]byte{0x1}), } backend, err := NewMemoryBackend(m) if err != nil { t.Errorf("Expected nil, got %v", err) } ro, _ := backend.Storage() reader, err := ro.Open(oid) if err != nil { t.Errorf("Expected nil, got %v", err) } contents, err := io.ReadAll(reader) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte{0x1}, contents) { t.Errorf("Expected %v, got %v", []byte{0x1}, contents) } } func TestNewMemoryBackendWithWritableData(t *testing.T) { sha := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" oid, err := hex.DecodeString(sha) if err != nil { t.Errorf("Expected nil, got %v", err) } backend, err := NewMemoryBackend(make(map[string]io.ReadWriter)) if err != nil { t.Errorf("Expected nil, got %v", err) } buf := bytes.NewBuffer([]byte{0x1}) ro, rw := backend.Storage() _, _ = rw.Store(oid, buf) reader, err := ro.Open(oid) if err != nil { t.Errorf("Expected nil, got %v", err) } contents, err := io.ReadAll(reader) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte{0x1}, contents) { t.Errorf("Expected %v, got %v", []byte{0x1}, contents) } } func TestSplitAlternatesString(t *testing.T) { testCases := []struct { input string expected []string }{ {"abc", []string{"abc"}}, {"abc:def", []string{"abc", "def"}}, {`"abc":def`, []string{"abc", "def"}}, {`"i\alike\bcomplicated\tstrings":def`, []string{"i\alike\bcomplicated\tstrings", "def"}}, {`abc:"i\nlike\vcomplicated\fstrings\r":def`, []string{"abc", "i\nlike\vcomplicated\fstrings\r", "def"}}, {`abc:"uni\xc2\xa9ode":def`, []string{"abc", "uni©ode", "def"}}, {`abc:"uni\302\251ode\10\0":def`, []string{"abc", "uni©ode\x08\x00", "def"}}, {`abc:"cookie\\monster\"":def`, []string{"abc", "cookie\\monster\"", "def"}}, } for _, test := range testCases { actual := splitAlternateString(test.input, ":") if !reflect.DeepEqual(actual, test.expected) { t.Errorf("unexpected output for %q: got %v, expected %v", test.input, actual, test.expected) } } } ================================================ FILE: modules/git/gitobj/backend_windows.go ================================================ //go:build windows package gitobj const alternatesSeparator = ";" ================================================ FILE: modules/git/gitobj/blob.go ================================================ package gitobj import ( "bytes" "fmt" "hash" "io" "os" ) // Blob represents a Git object of type "blob". type Blob struct { // Size is the total uncompressed size of the blob's contents. Size int64 // Contents is a reader that yields the uncompressed blob contents. It // may only be read once. It may or may not implement io.ReadSeeker. Contents io.Reader // closeFn is a function that is called to free any resources held by // the Blob. In particular, this will close a file, if the Blob is // being read from a file on disk. closeFn func() error } // NewBlobFromBytes returns a new *Blob that yields the data given. func NewBlobFromBytes(contents []byte) *Blob { return &Blob{ Contents: bytes.NewReader(contents), Size: int64(len(contents)), } } // NewBlobFromFile returns a new *Blob that contains the contents of the file // at location "path" on disk. NewBlobFromFile does not read the file ahead of // time, and instead defers this task until encoding the blob to the object // database. // // If the file cannot be opened or stat(1)-ed, an error will be returned. // // When the blob receives a function call Close(), the file will also be closed, // and any error encountered in doing so will be returned from Close(). func NewBlobFromFile(path string) (*Blob, error) { f, err := os.Open(path) if err != nil { return nil, fmt.Errorf("git/object: could not open: %s: %w", path, err) } stat, err := f.Stat() if err != nil { _ = f.Close() return nil, fmt.Errorf("git/object: could not stat %s: %w", path, err) } return &Blob{ Contents: f, Size: stat.Size(), closeFn: func() error { if err := f.Close(); err != nil { return fmt.Errorf("git/object: could not close %s: %w", path, err) } return nil }, }, nil } // Type implements Object.ObjectType by returning the correct object type for // Blobs, BlobObjectType. func (b *Blob) Type() ObjectType { return BlobObjectType } // Decode implements Object.Decode and decodes the uncompressed blob contents // being read. It returns the number of bytes that it consumed off of the // stream, which is always zero. // // If any errors are encountered while reading the blob, they will be returned. func (b *Blob) Decode(hash hash.Hash, r io.Reader, size int64) (n int, err error) { b.Size = size b.Contents = io.LimitReader(r, size) b.closeFn = func() error { if closer, ok := r.(io.Closer); ok { return closer.Close() } return nil } return 0, nil } // Encode encodes the blob's contents to the given io.Writer, "w". If there was // any error copying the blob's contents, that error will be returned. // // Otherwise, the number of bytes written will be returned. func (b *Blob) Encode(to io.Writer) (n int, err error) { nn, err := io.Copy(to, b.Contents) return int(nn), err } // Closes closes any resources held by the open Blob, or returns nil if there // were no errors. func (b *Blob) Close() error { if b.closeFn == nil { return nil } return b.closeFn() } // Equal returns whether the receiving and given blobs are equal, or in other // words, whether they are represented by the same SHA-1 when saved to the // object database. func (b *Blob) Equal(other *Blob) bool { if (b == nil) != (other == nil) { return false } if b != nil { return b.Contents == other.Contents && b.Size == other.Size } return true } ================================================ FILE: modules/git/gitobj/blob_test.go ================================================ package gitobj import ( "bytes" "crypto/sha1" "errors" "io" "strings" "sync/atomic" "testing" ) func TestBlobReturnsCorrectObjectType(t *testing.T) { if BlobObjectType != new(Blob).Type() { t.Errorf("Expected %v, got %v", BlobObjectType, new(Blob).Type()) } } func TestBlobFromString(t *testing.T) { given := []byte("example") glen := len(given) b := NewBlobFromBytes(given) if uint64(glen) != uint64(b.Size) { t.Errorf("Expected %v, got %v", glen, b.Size) } contents, err := io.ReadAll(b.Contents) if err != nil { t.Fatalf("ReadAll error: %v", err) } if !bytes.Equal(given, contents) { t.Errorf("Expected %v, got %v", given, contents) } } func TestBlobEncoding(t *testing.T) { const contents = "Hello, world!\n" b := &Blob{ Size: int64(len(contents)), Contents: strings.NewReader(contents), } var buf bytes.Buffer if _, err := b.Encode(&buf); err != nil { t.Fatal(err.Error()) } if contents != (&buf).String() { t.Errorf("Expected %v, got %v", contents, (&buf).String()) } } func TestBlobDecoding(t *testing.T) { const contents = "Hello, world!\n" from := strings.NewReader(contents) b := new(Blob) n, err := b.Decode(sha1.New(), from, int64(len(contents))) if n != 0 { t.Errorf("Expected %v, got %v", 0, n) } if err != nil { t.Errorf("Expected nil, got %v", err) } if uint64(len(contents)) != uint64(b.Size) { t.Errorf("Expected %v, got %v", len(contents), b.Size) } got, err := io.ReadAll(b.Contents) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte(contents), got) { t.Errorf("Expected %v, got %v", []byte(contents), got) } } func TestBlobCallCloseFn(t *testing.T) { var calls uint32 expected := errors.New("some close error") b := &Blob{ closeFn: func() error { atomic.AddUint32(&calls, 1) return expected }, } got := b.Close() if !errors.Is(got, expected) { t.Errorf("Expected %v, got %v", expected, got) } if uint32(1) != calls { t.Errorf("Expected %v, got %v", 1, calls) } } func TestBlobCanCloseWithoutCloseFn(t *testing.T) { b := &Blob{ closeFn: nil, } if b.Close() != nil { t.Errorf("Expected nil, got %v", b.Close()) } } func TestBlobEqualReturnsTrueWithUnchangedContents(t *testing.T) { c := strings.NewReader("Hello, world!") b1 := &Blob{Size: int64(c.Len()), Contents: c} b2 := &Blob{Size: int64(c.Len()), Contents: c} if !b1.Equal(b2) { t.Errorf("Expected true") } } func TestBlobEqualReturnsFalseWithChangedContents(t *testing.T) { c1 := strings.NewReader("Hello, world!") c2 := strings.NewReader("Goodbye, world!") b1 := &Blob{Size: int64(c1.Len()), Contents: c1} b2 := &Blob{Size: int64(c2.Len()), Contents: c2} if b1.Equal(b2) { t.Errorf("Expected false") } } func TestBlobEqualReturnsTrueWhenOneBlobIsNil(t *testing.T) { b1 := &Blob{Size: 1, Contents: bytes.NewReader([]byte{0xa})} b2 := (*Blob)(nil) if b1.Equal(b2) { t.Errorf("Expected false") } if b2.Equal(b1) { t.Errorf("Expected false") } } func TestBlobEqualReturnsTrueWhenBothBlobsAreNil(t *testing.T) { b1 := (*Blob)(nil) b2 := (*Blob)(nil) if !b1.Equal(b2) { t.Errorf("Expected true") } } ================================================ FILE: modules/git/gitobj/commit.go ================================================ package gitobj import ( "bufio" "bytes" "encoding/hex" "fmt" "hash" "io" "strings" "time" ) // Signature represents a commit signature, which can represent either // committership or authorship of the commit that this signature belongs to. It // specifies a name, email, and time that the signature was created. // // NOTE: this type is _not_ used by the `*Commit` instance, as it does not // preserve cruft bytes. It is kept as a convenience type to test with. type Signature struct { // Name is the first and last name of the individual holding this // signature. Name string // Email is the email address of the individual holding this signature. Email string // When is the instant in time when the signature was created. When time.Time } const ( formatTimeZoneOnly = "-0700" ) // String implements the fmt.Stringer interface and formats a Signature as // expected in the Git commit internal object format. For instance: // // Taylor Blau 1494258422 -0600 func (s *Signature) String() string { at := s.When.Unix() zone := s.When.Format(formatTimeZoneOnly) return fmt.Sprintf("%s <%s> %d %s", s.Name, s.Email, at, zone) } // ExtraHeader encapsulates a key-value pairing of header key to header value. // It is stored as a struct{string, string} in memory as opposed to a // map[string]string to maintain ordering in a byte-for-byte encode/decode round // trip. type ExtraHeader struct { // K is the header key, or the first run of bytes up until a ' ' (\x20) // character. K string // V is the header value, or the remaining run of bytes in the line, // stripping off the above "K" field as a prefix. V string } // Commit encapsulates a Git commit entry. type Commit struct { // Author is the Author this commit, or the original writer of the // contents. // // NOTE: this field is stored as a string to ensure any extra "cruft" // bytes are preserved through migration. Author string // Committer is the individual or entity that added this commit to the // history. // // NOTE: this field is stored as a string to ensure any extra "cruft" // bytes are preserved through migration. Committer string // ParentIDs are the IDs of all parents for which this commit is a // linear child. ParentIDs [][]byte // TreeID is the root Tree associated with this commit. TreeID []byte // ExtraHeaders stores headers not listed above, for instance // "encoding", "gpgsig", or "mergetag" (among others). ExtraHeaders []*ExtraHeader // Message is the commit message, including any signing information // associated with this commit. Message string } // Type implements Object.ObjectType by returning the correct object type for // Commits, CommitObjectType. func (c *Commit) Type() ObjectType { return CommitObjectType } // Decode implements Object.Decode and decodes the uncompressed commit being // read. It returns the number of uncompressed bytes being consumed off of the // stream, which should be strictly equal to the size given. // // If any error was encountered along the way, that will be returned, along with // the number of bytes read up to that point. func (c *Commit) Decode(hash hash.Hash, from io.Reader, size int64) (n int, err error) { var finishedHeaders bool r := bufio.NewReader(io.LimitReader(from, size)) var message strings.Builder for { line, readErr := r.ReadString('\n') if readErr != nil && readErr != io.EOF { return 0, readErr } text := strings.TrimSuffix(line, "\n") n += len(line) if len(text) == 0 && !finishedHeaders { finishedHeaders = true continue } if !finishedHeaders { // Check if this is a continuation line (starts with space) // Do this before strings.Cut to avoid unnecessary parsing if len(text) > 0 && text[0] == ' ' && len(c.ExtraHeaders) != 0 { last := c.ExtraHeaders[len(c.ExtraHeaders)-1] last.V += "\n" + text[1:] continue } key, value, ok := strings.Cut(text, " ") switch key { case "tree": if !ok || len(value) == 0 { continue } id, err := hex.DecodeString(value) if err != nil { return n, fmt.Errorf("error parsing tree: %w", err) } c.TreeID = id case "parent": if !ok || len(value) == 0 { continue } id, err := hex.DecodeString(value) if err != nil { return n, fmt.Errorf("error parsing parent: %w", err) } c.ParentIDs = append(c.ParentIDs, id) case "author": if !ok || len(value) == 0 { continue } c.Author = value case "committer": if !ok || len(value) == 0 { continue } c.Committer = value default: // Skip malformed header lines (no space separator) or empty key if !ok || len(key) == 0 { continue } // New header c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{ K: key, V: value, }) } } else { _, _ = message.WriteString(line) } if readErr == io.EOF { break } } c.Message = message.String() return n, err } // Encode encodes the commit's contents to the given io.Writer, "w". If there was // any error copying the commit's contents, that error will be returned. // // Otherwise, the number of bytes written will be returned. func (c *Commit) Encode(to io.Writer) (n int, err error) { n, err = fmt.Fprintf(to, "tree %s\n", hex.EncodeToString(c.TreeID)) if err != nil { return n, err } for _, pid := range c.ParentIDs { n1, err := fmt.Fprintf(to, "parent %s\n", hex.EncodeToString(pid)) if err != nil { return n, err } n += n1 } n2, err := fmt.Fprintf(to, "author %s\ncommitter %s\n", c.Author, c.Committer) if err != nil { return n, err } n += n2 for _, hdr := range c.ExtraHeaders { n3, err := fmt.Fprintf(to, "%s %s\n", hdr.K, strings.ReplaceAll(hdr.V, "\n", "\n ")) if err != nil { return n, err } n += n3 } // c.Message is built from messageParts in the Decode() function. // // Since each entry in messageParts _does not_ contain its trailing LF, // append an empty string to capture the final newline. n4, err := fmt.Fprintf(to, "\n%s", c.Message) if err != nil { return n, err } return n + n4, err } // Equal returns whether the receiving and given commits are equal, or in other // words, whether they are represented by the same SHA-1 when saved to the // object database. func (c *Commit) Equal(other *Commit) bool { if (c == nil) != (other == nil) { return false } if c != nil { if len(c.ParentIDs) != len(other.ParentIDs) { return false } for i := range c.ParentIDs { p1 := c.ParentIDs[i] p2 := other.ParentIDs[i] if !bytes.Equal(p1, p2) { return false } } if len(c.ExtraHeaders) != len(other.ExtraHeaders) { return false } for i := range c.ExtraHeaders { e1 := c.ExtraHeaders[i] e2 := other.ExtraHeaders[i] if e1.K != e2.K || e1.V != e2.V { return false } } return c.Author == other.Author && c.Committer == other.Committer && c.Message == other.Message && bytes.Equal(c.TreeID, other.TreeID) } return true } func (c *Commit) Subject() string { if i := strings.Index(c.Message, "\n"); i != -1 { return c.Message[0:i] } return c.Message } ================================================ FILE: modules/git/gitobj/commit_test.go ================================================ package gitobj import ( "bytes" "crypto/sha1" "encoding/hex" "fmt" "io" "os" "strings" "testing" "time" ) func TestCommitReturnsCorrectObjectType(t *testing.T) { if new(Commit).Type() != CommitObjectType { t.Errorf("Expected CommitObjectType, got %v", new(Commit).Type()) } } func TestCommitEncoding(t *testing.T) { author := &Signature{Name: "John Doe", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "Jane Doe", Email: "jane@example.com", When: time.Now()} sig := "-----BEGIN PGP SIGNATURE-----\n\n-----END PGP SIGNATURE-----" c := &Commit{ Author: author.String(), Committer: committer.String(), ParentIDs: [][]byte{ []byte("aaaaaaaaaaaaaaaaaaaa"), []byte("bbbbbbbbbbbbbbbbbbbb"), }, TreeID: []byte("cccccccccccccccccccc"), ExtraHeaders: []*ExtraHeader{ {"foo", "bar"}, {"gpgsig", sig}, }, Message: "initial commit", } buf := new(bytes.Buffer) _, err := c.Encode(buf) if err != nil { t.Fatalf("Encode error: %v", err) } assertLine(t, buf, "tree 6363636363636363636363636363636363636363") assertLine(t, buf, "parent 6161616161616161616161616161616161616161") assertLine(t, buf, "parent 6262626262626262626262626262626262626262") assertLine(t, buf, "author %s", author.String()) assertLine(t, buf, "committer %s", committer.String()) assertLine(t, buf, "foo bar") assertLine(t, buf, "gpgsig -----BEGIN PGP SIGNATURE-----") assertLine(t, buf, " ") assertLine(t, buf, " -----END PGP SIGNATURE-----") assertLine(t, buf, "") assertLine(t, buf, "initial commit") if buf.Len() != 0 { t.Errorf("Expected buffer length 0, got %d", buf.Len()) } } func TestCommitDecoding(t *testing.T) { author := &Signature{Name: "John Doe", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "Jane Doe", Email: "jane@example.com", When: time.Now()} p1 := []byte("aaaaaaaaaaaaaaaaaaaa") p2 := []byte("bbbbbbbbbbbbbbbbbbbb") treeId := []byte("cccccccccccccccccccc") from := new(bytes.Buffer) fmt.Fprintf(from, "author %s\n", author) fmt.Fprintf(from, "committer %s\n", committer) fmt.Fprintf(from, "parent %s\n", hex.EncodeToString(p1)) fmt.Fprintf(from, "parent %s\n", hex.EncodeToString(p2)) fmt.Fprintf(from, "foo bar\n") fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) fmt.Fprintf(from, "\ninitial commit") flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Fatalf("Decode error: %v", err) } if flen != n { t.Errorf("Expected %d, got %d", flen, n) } if author.String() != commit.Author { t.Errorf("Expected author %s, got %s", author.String(), commit.Author) } if committer.String() != commit.Committer { t.Errorf("Expected committer %s, got %s", committer.String(), commit.Committer) } if len(commit.ParentIDs) != 2 { t.Error("Expected 2 parent IDs") } if !bytes.Equal(p1, commit.ParentIDs[0]) { t.Error("First parent ID does not match") } if !bytes.Equal(p2, commit.ParentIDs[1]) { t.Error("Second parent ID does not match") } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected 1 extra header, got %d", len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "foo" { t.Errorf("Expected key 'foo', got %s", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "bar" { t.Errorf("Expected value 'bar', got %s", commit.ExtraHeaders[0].V) } if commit.Message != "initial commit" { t.Errorf("Expected 'initial commit', got %s", commit.Message) } } func TestCommitDecodingWithEmptyName(t *testing.T) { author := &Signature{Name: "", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "", Email: "jane@example.com", When: time.Now()} treeId := []byte("cccccccccccccccccccc") from := new(bytes.Buffer) fmt.Fprintf(from, "author %s\n", author) fmt.Fprintf(from, "committer %s\n", committer) fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) fmt.Fprintf(from, "\ninitial commit") flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Fatalf("Unexpected non-nil value") } if flen != n { t.Errorf("Expected %v, got %v", flen, n) } if author.String() != commit.Author { t.Errorf("Expected %v, got %v", author.String(), commit.Author) } if committer.String() != commit.Committer { t.Errorf("Expected %v, got %v", committer.String(), commit.Committer) } if commit.Message != "initial commit" { t.Errorf("Expected %v, got %v", "initial commit", commit.Message) } } func TestCommitDecodingWithLargeCommitMessage(t *testing.T) { message := "This message text is, with newline, exactly 64 characters long. " // This message will be exactly 10 MiB in size when part of the commit. longMessage := strings.Repeat(message, (10*1024*1024/64)-1) longMessage += strings.TrimSpace(message) author := &Signature{Name: "", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "", Email: "jane@example.com", When: time.Now()} treeId := []byte("cccccccccccccccccccc") from := new(bytes.Buffer) fmt.Fprintf(from, "author %s\n", author) fmt.Fprintf(from, "committer %s\n", committer) fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) fmt.Fprintf(from, "\n%s", longMessage) flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Fatalf("Unexpected non-nil value") } if flen != n { t.Errorf("Expected %v, got %v", flen, n) } if author.String() != commit.Author { t.Errorf("Expected %v, got %v", author.String(), commit.Author) } if committer.String() != commit.Committer { t.Errorf("Expected %v, got %v", committer.String(), commit.Committer) } if longMessage != commit.Message { t.Errorf("Expected %v, got %v", longMessage, commit.Message) } } func TestCommitDecodingWithMessageKeywordPrefix(t *testing.T) { author := &Signature{Name: "John Doe", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "Jane Doe", Email: "jane@example.com", When: time.Now()} treeId := []byte("aaaaaaaaaaaaaaaaaaaa") treeIdAscii := hex.EncodeToString(treeId) from := new(bytes.Buffer) fmt.Fprintf(from, "author %s\n", author) fmt.Fprintf(from, "committer %s\n", committer) fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) fmt.Fprintf(from, "\nfirst line\n\nsecond line") flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Errorf("Unexpected error: %v", err) } if flen != n { t.Errorf("Expected %v, got %v", flen, n) } if author.String() != commit.Author { t.Errorf("Expected %v, got %v", author.String(), commit.Author) } if committer.String() != commit.Committer { t.Errorf("Expected %v, got %v", committer.String(), commit.Committer) } if treeIdAscii != hex.EncodeToString(commit.TreeID) { t.Errorf("Expected %v, got %v", treeIdAscii, hex.EncodeToString(commit.TreeID)) } if commit.Message != "first line\n\nsecond line" { t.Errorf("Expected %v, got %v", "first line\n\nsecond line", commit.Message) } } func TestCommitDecodingWithWhitespace(t *testing.T) { author := &Signature{Name: "John Doe", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "Jane Doe", Email: "jane@example.com", When: time.Now()} treeId := []byte("aaaaaaaaaaaaaaaaaaaa") treeIdAscii := hex.EncodeToString(treeId) from := new(bytes.Buffer) fmt.Fprintf(from, "author %s\n", author) fmt.Fprintf(from, "committer %s\n", committer) fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) fmt.Fprintf(from, "\ntree <- initial commit") flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Errorf("Unexpected error: %v", err) } if flen != n { t.Errorf("Expected %v, got %v", flen, n) } if author.String() != commit.Author { t.Errorf("Expected %v, got %v", author.String(), commit.Author) } if committer.String() != commit.Committer { t.Errorf("Expected %v, got %v", committer.String(), commit.Committer) } if treeIdAscii != hex.EncodeToString(commit.TreeID) { t.Errorf("Expected %v, got %v", treeIdAscii, hex.EncodeToString(commit.TreeID)) } if commit.Message != "tree <- initial commit" { t.Errorf("Expected %v, got %v", "tree <- initial commit", commit.Message) } } func TestCommitDecodingMultilineHeader(t *testing.T) { author := &Signature{Name: "", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "", Email: "jane@example.com", When: time.Now()} treeId := []byte("cccccccccccccccccccc") from := new(bytes.Buffer) fmt.Fprintf(from, "author %s\n", author) fmt.Fprintf(from, "committer %s\n", committer) fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) fmt.Fprintf(from, "gpgsig -----BEGIN PGP SIGNATURE-----\n") fmt.Fprintf(from, " \n") fmt.Fprintf(from, " -----END PGP SIGNATURE-----\n") fmt.Fprintf(from, "\ninitial commit") flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Fatalf("Decode error: %v", err) } if flen != n { t.Errorf("Expected %d, got %d", flen, n) } if len(commit.ExtraHeaders) != 1 { t.Fatalf("Expected 1 extra header, got %d", len(commit.ExtraHeaders)) } hdr := commit.ExtraHeaders[0] if hdr.K != "gpgsig" { t.Errorf("Expected key 'gpgsig', got %s", hdr.K) } expectedLines := []string{ "-----BEGIN PGP SIGNATURE-----", "", "-----END PGP SIGNATURE-----"} actualLines := strings.Split(hdr.V, "\n") if !equalStringSlices(expectedLines, actualLines) { t.Errorf("Expected %v, got %v", expectedLines, actualLines) } } // Helper function to compare string slices func equalStringSlices(a, b []string) bool { if len(a) != len(b) { return false } for i := range a { if a[i] != b[i] { return false } } return true } func TestCommitDecodingBadMessageWithLineStartingWithTree(t *testing.T) { from := new(bytes.Buffer) // The tricky part here that we're testing is the "tree support" in the // `mergetag` header, which we should not try to parse as a tree header. // Note also that this entry contains trailing whitespace which must not // be trimmed. fmt.Fprintf(from, `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent b343c8beec664ef6f0e9964d3001c7c7966331ae parent author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 mergetag object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd type commit tag random tagger J. Roe 1337889148 -0600 Random changes This text contains some tree support code. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Not a real signature -----END PGP SIGNATURE----- Merge tag 'random' of git://git.example.ca/git/ `) flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Fatalf("Decode error: %v", err) } if flen != n { t.Errorf("Expected %d, got %d", flen, n) } expectedHeaders := []*ExtraHeader{ { K: "mergetag", V: `object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd type commit tag random tagger J. Roe 1337889148 -0600 Random changes This text contains some tree support code. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Not a real signature -----END PGP SIGNATURE-----`}, } if !equalExtraHeaders(commit.ExtraHeaders, expectedHeaders) { t.Error("ExtraHeaders do not match") } if commit.Message != "Merge tag 'random' of git://git.example.ca/git/\n" { t.Errorf("Unexpected message: %s", commit.Message) } } // Helper function to compare ExtraHeader slices func equalExtraHeaders(a, b []*ExtraHeader) bool { if len(a) != len(b) { return false } for i := range a { if a[i].K != b[i].K || a[i].V != b[i].V { return false } } return true } func TestCommitDecodingMessageWithLineStartingWithTree(t *testing.T) { from := new(bytes.Buffer) // The tricky part here that we're testing is the "tree support" in the // `mergetag` header, which we should not try to parse as a tree header. // Note also that this entry contains trailing whitespace which must not // be trimmed. fmt.Fprintf(from, `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent b343c8beec664ef6f0e9964d3001c7c7966331ae parent 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 mergetag object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd type commit tag random tagger J. Roe 1337889148 -0600 Random changes This text contains some tree support code. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Not a real signature -----END PGP SIGNATURE----- Merge tag 'random' of git://git.example.ca/git/ `) flen := from.Len() commit := new(Commit) n, err := commit.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Fatalf("Unexpected non-nil value: %v", err) } if flen != n { t.Fatalf("Expected %v, got %v", flen, n) } if len(commit.ExtraHeaders) != 1 { t.Fatalf("Expected 1 extra header, got %d", len(commit.ExtraHeaders)) } h := commit.ExtraHeaders[0] if h.K != "mergetag" { t.Errorf("Expected key %v, got %v", "mergetag", h.K) } expectedV := `object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd type commit tag random tagger J. Roe 1337889148 -0600 Random changes This text contains some tree support code. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Not a real signature -----END PGP SIGNATURE-----` if h.V != expectedV { t.Errorf("Expected value %v, got %v", expectedV, h.V) } if commit.Message != "Merge tag 'random' of git://git.example.ca/git/\n" { t.Fatalf("Expected %v, got %v", commit.Message, "Merge tag 'random' of git://git.example.ca/git/\n") } } func assertLine(t *testing.T, buf *bytes.Buffer, wanted string, args ...any) { got, err := buf.ReadString('\n') if err == io.EOF { err = nil } if err != nil { t.Fatalf("Unexpected non-nil value") } if fmt.Sprintf(wanted, args...) != strings.TrimSuffix(got, "\n") { t.Errorf("Expected %v, got %v", fmt.Sprintf(wanted, args...), strings.TrimSuffix(got, "\n")) } } func TestCommitEqualReturnsTrueWithIdenticalCommits(t *testing.T) { c1 := &Commit{ Author: "Jane Doe 1503956287 -0400", Committer: "Jane Doe 1503956287 -0400", ParentIDs: [][]byte{make([]byte, 20)}, TreeID: make([]byte, 20), ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Joe Smith"}, }, Message: "initial commit", } c2 := &Commit{ Author: "Jane Doe 1503956287 -0400", Committer: "Jane Doe 1503956287 -0400", ParentIDs: [][]byte{make([]byte, 20)}, TreeID: make([]byte, 20), ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Joe Smith"}, }, Message: "initial commit", } if !c1.Equal(c2) { t.Error("Expected true") } } func TestCommitEqualReturnsFalseWithDifferentParentCounts(t *testing.T) { c1 := &Commit{ ParentIDs: [][]byte{make([]byte, 20), make([]byte, 20)}, } c2 := &Commit{ ParentIDs: [][]byte{make([]byte, 20)}, } if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentParentsIds(t *testing.T) { c1 := &Commit{ ParentIDs: [][]byte{make([]byte, 20)}, } c2 := &Commit{ ParentIDs: [][]byte{make([]byte, 20)}, } c1.ParentIDs[0][1] = 0x1 if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentHeaderCounts(t *testing.T) { c1 := &Commit{ ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Joe Smith"}, {K: "GPG-Signature", V: "..."}, }, } c2 := &Commit{ ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Joe Smith"}, }, } if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentHeaders(t *testing.T) { c1 := &Commit{ ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Joe Smith"}, }, } c2 := &Commit{ ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Jane Smith"}, }, } if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentAuthors(t *testing.T) { c1 := &Commit{ Author: "Jane Doe 1503956287 -0400", } c2 := &Commit{ Author: "John Doe 1503956287 -0400", } if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentCommitters(t *testing.T) { c1 := &Commit{ Committer: "Jane Doe 1503956287 -0400", } c2 := &Commit{ Committer: "John Doe 1503956287 -0400", } if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentMessages(t *testing.T) { c1 := &Commit{ Message: "initial commit", } c2 := &Commit{ Message: "not the initial commit", } if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWithDifferentTreeIDs(t *testing.T) { c1 := &Commit{ TreeID: make([]byte, 20), } c2 := &Commit{ TreeID: make([]byte, 20), } c1.TreeID[0] = 0x1 if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsFalseWhenOneCommitIsNil(t *testing.T) { c1 := &Commit{ Author: "Jane Doe 1503956287 -0400", Committer: "Jane Doe 1503956287 -0400", ParentIDs: [][]byte{make([]byte, 20)}, TreeID: make([]byte, 20), ExtraHeaders: []*ExtraHeader{ {K: "Signed-off-by", V: "Joe Smith"}, }, Message: "initial commit", } c2 := (*Commit)(nil) if c1.Equal(c2) { t.Error("Expected false") } } func TestCommitEqualReturnsTrueWhenBothCommitsAreNil(t *testing.T) { c1 := (*Commit)(nil) c2 := (*Commit)(nil) if !c1.Equal(c2) { t.Error("Expected true") } } func TestBadCommit(t *testing.T) { cc := `tree 2aedfd35087c75d17bdbaf4dd56069d44fc75b71 parent 75158117eb8efe60453f8c077527ac3530c81e38 author Credit Card Account 1722305889 +0800 committer \346\244\260\346\235\215 1722305889 +0800 Credit Card Account` var c Commit _, err := c.Decode(sha1.New(), strings.NewReader(cc), int64(len(cc))) if err != nil { fmt.Fprintf(os.Stderr, "bad commit: '%v'\n", err) return } fmt.Fprintf(os.Stderr, "%v\n", c) } func TestBad2Commit(t *testing.T) { cc := `tree 2aedfd35087c75d17bdbaf4dd56069d44fc75b71 parent 75158117eb8efe60453f8c077527ac3530c81e38 author Credit Card Account 1722305889 +0800 committer Credit Card Account 1722305889 +0800 V D ---` var c Commit _, err := c.Decode(sha1.New(), strings.NewReader(cc), int64(len(cc))) if err != nil { fmt.Fprintf(os.Stderr, "bad commit: '%v'\n", err) return } fmt.Fprintf(os.Stderr, "%v\n", c) } // TestCommitDecodeWithLeadingWhitespaceWithoutPreviousHeader // Tests handling lines starting with space after standard headers but before empty line // This test verifies the code does not panic and handles this case correctly func TestCommitDecodeWithLeadingWhitespaceWithoutPreviousHeader(t *testing.T) { cc := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 extra line without previous header test message` flen := len(cc) commit := new(Commit) // This call should not panic n, err := commit.Decode(sha1.New(), strings.NewReader(cc), int64(flen)) // May return error or success, but should not panic _ = n _ = err } // TestCommitDecodePanicOnContinuationWithoutPreviousHeader // Attempts to trigger commit.go:119 panic: when encountering blank line without previous header func TestCommitDecodePanicOnContinuationWithoutPreviousHeader(t *testing.T) { cc := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 first continuation line before any extra header test message` flen := len(cc) commit := new(Commit) // Try to see if it will panic n, err := commit.Decode(sha1.New(), strings.NewReader(cc), int64(flen)) fmt.Printf("Result: n=%d, err=%v\n", n, err) fmt.Printf("Commit: %+v\n", commit) } // TestSplitBehavior // Directly tests strings.Split behavior to confirm if it can return empty array func TestSplitBehavior(t *testing.T) { testCases := []struct { input string sep string expect int }{ {"", " ", 1}, {" ", " ", 2}, {" ", " ", 3}, {"\t", " ", 1}, {"\n", " ", 1}, {"\r\n", " ", 1}, {"\u0000", " ", 1}, } for _, tc := range testCases { fields := strings.Split(tc.input, tc.sep) fmt.Printf("Split(%q, %q): len=%d\n", tc.input, tc.sep, len(fields)) if len(fields) == 0 { fmt.Printf(" >>> EMPTY ARRAY! <<<\n") } if tc.expect != len(fields) { t.Errorf("Expected %v, got %v", tc.expect, len(fields)) } } } // TestCommitDecodePanicOnEmptyFields // 测试是否能触发 len(fields) == 0 的情况 func TestCommitDecodePanicOnEmptyFields(t *testing.T) { // 尝试构造特殊输入 testCases := []string{ `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 `, // 在 header 区域结尾只有空行 `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 message`, } for i, cc := range testCases { fmt.Printf("\n=== Test case %d ===\n", i) fmt.Printf("Input:\n%s\n", cc) flen := len(cc) commit := new(Commit) // Check if it will panic func() { defer func() { if r := recover(); r != nil { fmt.Printf("PANIC CAUGHT: %v\n", r) } }() n, err := commit.Decode(sha1.New(), strings.NewReader(cc), int64(flen)) fmt.Printf("Result: n=%d, err=%v\n", n, err) fmt.Printf("Commit: %+v\n", commit) }() } } // TestCommitDecodePanicWithMalformedInput // Attempts to trigger panic using various malformed inputs func TestCommitDecodePanicWithMalformedInput(t *testing.T) { testCases := []struct { name string input string }{ { name: "Extra header followed by pure space line", input: `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 custom value message`, }, { name: "Multiple spaces line after extra header", input: `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 custom value message`, }, { name: "Only tab after extra header", input: `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 custom value message`, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { fmt.Printf("\n=== %s ===\n", tc.name) fmt.Printf("Input:\n%s\n", tc.input) commit := new(Commit) flen := len(tc.input) // 使用 recover 捕获 panic defer func() { if r := recover(); r != nil { t.Logf(">>> PANIC CAUGHT: %v <<<", r) t.Logf("This proves the panic can be triggered!") t.FailNow() } }() n, err := commit.Decode(sha1.New(), strings.NewReader(tc.input), int64(flen)) t.Logf("Result: n=%d, err=%v", n, err) t.Logf("ExtraHeaders count: %d", len(commit.ExtraHeaders)) if len(commit.ExtraHeaders) > 0 { for i, h := range commit.ExtraHeaders { t.Logf(" [%d] K=%q, V=%q", i, h.K, h.V) } } }) } } // TestCommitDecodeWithEmptyAuthor tests decoding with empty author func TestCommitDecodeWithEmptyAuthor(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author committer Pat Doe 1337892984 -0700 test message` commit := new(Commit) _, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if commit.Author != "" { t.Errorf("Expected %v, got %v", "", commit.Author) } if commit.Committer != "Pat Doe 1337892984 -0700" { t.Errorf("Expected %v, got %v", "Pat Doe 1337892984 -0700", commit.Committer) } } // TestCommitDecodeWithEmptyCommitter tests decoding with empty committer func TestCommitDecodeWithEmptyCommitter(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer test message` commit := new(Commit) _, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if commit.Author != "Pat Doe 1337892984 -0700" { t.Errorf("Expected %v, got %v", "Pat Doe 1337892984 -0700", commit.Author) } if commit.Committer != "" { t.Errorf("Expected %v, got %v", "", commit.Committer) } } // TestCommitDecodeWithMultipleParents tests decoding with multiple parents func TestCommitDecodeWithMultipleParents(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2 parent b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3 parent c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4 author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 test message` commit := new(Commit) _, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(commit.ParentIDs) != 3 { t.Errorf("Expected %v, got %v", 3, len(commit.ParentIDs)) } if hex.EncodeToString(commit.ParentIDs[0]) != "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" { t.Errorf("Expected %v, got %v", "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", hex.EncodeToString(commit.ParentIDs[0])) } if hex.EncodeToString(commit.ParentIDs[1]) != "b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3" { t.Errorf("Expected %v, got %v", "b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3", hex.EncodeToString(commit.ParentIDs[1])) } if hex.EncodeToString(commit.ParentIDs[2]) != "c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4" { t.Errorf("Expected %v, got %v", "c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4", hex.EncodeToString(commit.ParentIDs[2])) } } // TestCommitDecodeWithSpecialCharacters tests decoding with special characters func TestCommitDecodeWithSpecialCharacters(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author 张三 1337892984 +0800 committer 张三 1337892984 +0800 custom value with spaces & special!@#$%^&*()_+-=[]{}|;':",./<>? test message with 中文 and 日本語` commit := new(Commit) _, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if !strings.Contains(commit.Author, "张三") { t.Errorf("Expected to contain %v", "张三") } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected %v, got %v", 1, len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "custom" { t.Errorf("Expected %v, got %v", "custom", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "value with spaces & special!@#$%^&*()_+-=[]{}|;':\",./<>?" { t.Errorf("Expected %v, got %v", "value with spaces & special!@#$%^&*()_+-=[]{}|;':\",./<>?", commit.ExtraHeaders[0].V) } if !strings.Contains(commit.Message, "中文") { t.Errorf("Expected to contain %v", "中文") } if !strings.Contains(commit.Message, "日本語") { t.Errorf("Expected to contain %v", "日本語") } } // TestCommitDecodeWithExtraHeaderBeforeStandard tests extra header before standard headers func TestCommitDecodeWithExtraHeaderBeforeStandard(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb custom extra header before standard author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 test message` commit := new(Commit) _, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected %v, got %v", 1, len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "custom" { t.Errorf("Expected %v, got %v", "custom", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "extra header before standard" { t.Errorf("Expected %v, got %v", "extra header before standard", commit.ExtraHeaders[0].V) } } // TestCommitDecodeMultilineExtraHeaders tests correct parsing of multi-line extra headers // This is a test case for fixing multi-line header bug func TestCommitDecodeMultilineExtraHeaders(t *testing.T) { // Construct a commit with multi-line GPG signature // Note: In Git format, leading spaces in multi-line header continuation are removed input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 gpgsig -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) iQIcBAABAgAGBQJR9JqnAAoJEJyGw4i5t8hW3KUP/0XuWjE4kM6G8J7E6H4P2J8 =i9Jh -----END PGP SIGNATURE----- test message` commit := new(Commit) n, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(input) != n { t.Fatalf("Expected %v, got %v", len(input), n) } if len(commit.ExtraHeaders) != 1 { t.Fatalf("Expected %v, got %v", 1, len(commit.ExtraHeaders)) } // Verify multi-line header value is correctly concatenated // Note: Leading spaces are removed, but empty lines in continuation are preserved gpgsig := commit.ExtraHeaders[0] if gpgsig.K != "gpgsig" { t.Errorf("Expected %v, got %v", "gpgsig", gpgsig.K) } expectedValue := "-----BEGIN PGP SIGNATURE-----\n" + "Version: GnuPG v1.4.11 (GNU/Linux)\n" + "iQIcBAABAgAGBQJR9JqnAAoJEJyGw4i5t8hW3KUP/0XuWjE4kM6G8J7E6H4P2J8\n" + "=i9Jh\n" + "-----END PGP SIGNATURE-----" if gpgsig.V != expectedValue { t.Errorf("Expected %v, got %v", expectedValue, gpgsig.V) } if commit.Message != "test message" { t.Errorf("Expected %v, got %v", "test message", commit.Message) } } // TestCommitDecodeMultipleExtraHeaders tests multiple extra headers func TestCommitDecodeMultipleExtraHeaders(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 encoding utf-8 gpgsig -----BEGIN PGP SIGNATURE----- signature -----END PGP SIGNATURE----- custom value1 custom value2 test message` commit := new(Commit) n, err := commit.Decode(sha1.New(), strings.NewReader(input), int64(len(input))) if err != nil { t.Fatalf("Unexpected error: %v", err) } if len(input) != n { t.Fatalf("Expected %v, got %v", len(input), n) } if len(commit.ExtraHeaders) != 4 { t.Fatalf("Expected %v, got %v", 4, len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "encoding" { t.Errorf("Expected %v, got %v", "encoding", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "utf-8" { t.Errorf("Expected %v, got %v", "utf-8", commit.ExtraHeaders[0].V) } if commit.ExtraHeaders[1].K != "gpgsig" { t.Errorf("Expected %v, got %v", "gpgsig", commit.ExtraHeaders[1].K) } if !strings.Contains(commit.ExtraHeaders[1].V, "-----BEGIN PGP SIGNATURE-----") { t.Errorf("Expected to contain %v", "-----BEGIN PGP SIGNATURE-----") } if !strings.Contains(commit.ExtraHeaders[1].V, "signature") { t.Errorf("Expected to contain %v", "signature") } if !strings.Contains(commit.ExtraHeaders[1].V, "-----END PGP SIGNATURE-----") { t.Errorf("Expected to contain %v", "-----END PGP SIGNATURE-----") } if commit.ExtraHeaders[2].K != "custom" { t.Errorf("Expected %v, got %v", "custom", commit.ExtraHeaders[2].K) } if commit.ExtraHeaders[2].V != "value1" { t.Errorf("Expected %v, got %v", "value1", commit.ExtraHeaders[2].V) } if commit.ExtraHeaders[3].K != "custom" { t.Errorf("Expected %v, got %v", "custom", commit.ExtraHeaders[3].K) } if commit.ExtraHeaders[3].V != "value2" { t.Errorf("Expected %v, got %v", "value2", commit.ExtraHeaders[3].V) } if commit.Message != "test message" { t.Errorf("Expected %v, got %v", "test message", commit.Message) } } // TestCommitDecodeWithStringsCut validates correct usage of strings.Cut func TestCommitDecodeWithStringsCut(t *testing.T) { tests := []struct { name string input string wantTree string wantErr bool }{ { name: "standard commit", input: "tree abc123\nauthor test\n\nmsg", wantTree: "abc123", wantErr: false, }, { name: "tree with value", input: "tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb\nauthor test\n\nmsg", wantTree: "e8ad84c41c2acde27c77fa212b8865cd3acfe6fb", wantErr: false, }, { name: "tree without value (should be skipped)", input: "tree\ntree abc123\nauthor test\n\nmsg", wantTree: "abc123", wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { commit := new(Commit) _, err := commit.Decode(sha1.New(), strings.NewReader(tt.input), int64(len(tt.input))) if tt.wantErr { if err == nil { t.Error("Expected error") } } else { if err != nil { t.Errorf("Unexpected error: %v", err) } if tt.wantTree != hex.EncodeToString(commit.TreeID) { t.Errorf("Expected %v, got %v", tt.wantTree, hex.EncodeToString(commit.TreeID)) } } }) } } ================================================ FILE: modules/git/gitobj/errors/errors.go ================================================ package errors import ( "errors" "fmt" ) // noSuchObject is an error type that occurs when no object with a given object // ID is available. type noSuchObject struct { oid []byte } // Error implements the error.Error() function. func (e *noSuchObject) Error() string { return fmt.Sprintf("git/object: no such object: %x", e.oid) } // NoSuchObject creates a new error representing a missing object with a given // object ID. func NoSuchObject(oid []byte) error { return &noSuchObject{oid: oid} } // IsNoSuchObject indicates whether an error is a noSuchObject and is non-nil. func IsNoSuchObject(err error) bool { var e *noSuchObject return errors.As(err, &e) } ================================================ FILE: modules/git/gitobj/errors/errors_test.go ================================================ package errors import ( "encoding/hex" "testing" ) func TestNoSuchObjectTypeErrFormatting(t *testing.T) { sha := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" oid, err := hex.DecodeString(sha) if err != nil { t.Errorf("Expected nil, got %v", err) } err = NoSuchObject(oid) if err.Error() != "git/object: no such object: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" { t.Errorf("Expected %v, got %v", "git/object: no such object: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", err.Error()) } if IsNoSuchObject(err) != true { t.Errorf("Expected %v, got %v", IsNoSuchObject(err), true) } } func TestIsNoSuchObjectNilHandling(t *testing.T) { if IsNoSuchObject((*noSuchObject)(nil)) != false { t.Errorf("Expected %v, got %v", IsNoSuchObject((*noSuchObject)(nil)), false) } if IsNoSuchObject(nil) != false { t.Errorf("Expected %v, got %v", IsNoSuchObject(nil), false) } } ================================================ FILE: modules/git/gitobj/errors.go ================================================ package gitobj import "fmt" // UnexpectedObjectType is an error type that represents a scenario where an // object was requested of a given type "Wanted", and received as a different // _other_ type, "Wanted". type UnexpectedObjectType struct { // Got was the object type requested. Got ObjectType // Wanted was the object type received. Wanted ObjectType } // Error implements the error.Error() function. func (e *UnexpectedObjectType) Error() string { return fmt.Sprintf("git/object: unexpected object type, got: %q, wanted: %q", e.Got, e.Wanted) } ================================================ FILE: modules/git/gitobj/errors_test.go ================================================ package gitobj import ( "testing" ) func TestUnexpectedObjectTypeErrFormatting(t *testing.T) { err := &UnexpectedObjectType{ Got: TreeObjectType, Wanted: BlobObjectType, } expected := "git/object: unexpected object type, got: \"tree\", wanted: \"blob\"" if expected != err.Error() { t.Errorf("Expected %v, got %v", expected, err.Error()) } } ================================================ FILE: modules/git/gitobj/file_storer.go ================================================ package gitobj import ( "encoding/hex" "fmt" "io" "os" "path/filepath" "github.com/antgroup/hugescm/modules/git/gitobj/errors" "github.com/antgroup/hugescm/modules/strengthen" ) // fileStorer implements the storer interface by writing to the .git/objects // directory on disc. type fileStorer struct { // root is the top level /objects directory's path on disc. root string // temp directory, defaults to os.TempDir tmp string } // NewFileStorer returns a new fileStorer instance with the given root. func newFileStorer(root, tmp string) *fileStorer { return &fileStorer{ root: root, tmp: tmp, } } // Open implements the storer.Open function, and returns a io.ReadCloser // for the given SHA. If the file does not exist, or if there was any other // error in opening the file, an error will be returned. // // It is the caller's responsibility to close the given file "f" after its use // is complete. func (fs *fileStorer) Open(sha []byte) (f io.ReadCloser, err error) { f, err = fs.open(fs.path(sha), os.O_RDONLY) if os.IsNotExist(err) { return nil, errors.NoSuchObject(sha) } return f, err } // Store implements the storer.Store function and returns the number of bytes // written, along with any error encountered in copying the given io.Reader, "r" // into the object database on disk at a path given by "sha". // // If the file could not be created, or opened, an error will be returned. func (fs *fileStorer) Store(sha []byte, r io.Reader) (n int64, err error) { path := fs.path(sha) dir := filepath.Dir(path) if fd, ok := r.(*os.File); ok { // Since .git/objects partitions objects based on the first two // characters of their ASCII-encoded SHA1 object ID, ensure that // the directory exists before copying a file into it. if err = os.MkdirAll(dir, 0755); err != nil { return n, err } if err = strengthen.FinalizeObject(fd.Name(), path); err != nil { return n, err } return n, nil } if stat, err := os.Stat(path); stat != nil || os.IsExist(err) { // If the file already exists, there is no work left for us to // do, since the object already exists (or there is a SHA1 // collision). _, err = io.Copy(io.Discard, r) if err != nil { return 0, fmt.Errorf("discard pre-existing object data: %w", err) } return 0, nil } tmp, err := os.CreateTemp(fs.tmp, "") if err != nil { return 0, err } n, _ = io.Copy(tmp, r) if err = tmp.Close(); err != nil { return n, err } // Since .git/objects partitions objects based on the first two // characters of their ASCII-encoded SHA1 object ID, ensure that // the directory exists before copying a file into it. if err = os.MkdirAll(dir, 0755); err != nil { return n, err } if err = strengthen.FinalizeObject(tmp.Name(), path); err != nil { return n, err } return n, nil } // Root gives the absolute (fully-qualified) path to the file storer on disk. func (fs *fileStorer) Root() string { return fs.root } // Close closes the file storer. func (fs *fileStorer) Close() error { return nil } // IsCompressed returns true, because the file storer returns compressed data. func (fs *fileStorer) IsCompressed() bool { return true } // open opens a given file. func (fs *fileStorer) open(path string, flag int) (*os.File, error) { return os.OpenFile(path, flag, 0) } // path returns an absolute path on disk to the object given by the OID "sha". func (fs *fileStorer) path(sha []byte) string { encoded := hex.EncodeToString(sha) return filepath.Join(fs.root, encoded[:2], encoded[2:]) } ================================================ FILE: modules/git/gitobj/memory_storer.go ================================================ package gitobj import ( "bytes" "fmt" "io" "sync" "github.com/antgroup/hugescm/modules/git/gitobj/errors" ) // memoryStorer is an implementation of the storer interface that holds data for // the object database in memory. type memoryStorer struct { // mu guards reads and writes to the map "fs" below. mu *sync.Mutex // fs maps a hex-encoded SHA to a bytes.Buffer wrapped in a no-op closer // type. fs map[string]*bufCloser } // newMemoryStorer initializes a new memoryStorer instance with the given // initial set. // // A value of "nil" is acceptable and indicates that no entries shall be added // to the memory storer at/during construction time. func newMemoryStorer(m map[string]io.ReadWriter) *memoryStorer { fs := make(map[string]*bufCloser, len(m)) for n, rw := range m { fs[n] = &bufCloser{rw} } return &memoryStorer{ mu: new(sync.Mutex), fs: fs, } } // Store implements the storer.Store function and copies the data given in "r" // into an object entry in the memory. If an object given by that SHA "sha" is // already indexed in the database, Store will panic(). func (ms *memoryStorer) Store(sha []byte, r io.Reader) (n int64, err error) { ms.mu.Lock() defer ms.mu.Unlock() key := fmt.Sprintf("%x", sha) ms.fs[key] = &bufCloser{new(bytes.Buffer)} return io.Copy(ms.fs[key], r) } // Open implements the storer.Open function, and returns a io.ReadWriteCloser // for the given SHA. If a reader for the given SHA does not exist an error will // be returned. func (ms *memoryStorer) Open(sha []byte) (f io.ReadCloser, err error) { ms.mu.Lock() defer ms.mu.Unlock() key := fmt.Sprintf("%x", sha) if _, ok := ms.fs[key]; !ok { return nil, errors.NoSuchObject(sha) } return ms.fs[key], nil } // Close closes the memory storer. func (ms *memoryStorer) Close() error { return nil } // IsCompressed returns true, because the memory storer returns compressed data. func (ms *memoryStorer) IsCompressed() bool { return true } // bufCloser wraps a type satisfying the io.ReadWriter interface with a no-op // Close() function, thus implementing the io.ReadWriteCloser composite // interface. type bufCloser struct { io.ReadWriter } // Close implements io.Closer, and returns nothing. func (b *bufCloser) Close() error { return nil } ================================================ FILE: modules/git/gitobj/memory_storer_test.go ================================================ package gitobj import ( "bytes" "encoding/hex" "io" "strings" "testing" "github.com/antgroup/hugescm/modules/git/gitobj/errors" ) func TestMemoryStorerIncludesGivenEntries(t *testing.T) { sha := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" hex, err := hex.DecodeString(sha) if err != nil { t.Errorf("Expected nil, got %v", err) } ms := newMemoryStorer(map[string]io.ReadWriter{ sha: bytes.NewBuffer([]byte{0x1}), }) buf, err := ms.Open(hex) if err != nil { t.Errorf("Expected nil, got %v", err) } contents, err := io.ReadAll(buf) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte{0x1}, contents) { t.Errorf("Expected %v, got %v", []byte{0x1}, contents) } } func TestMemoryStorerAcceptsNilEntries(t *testing.T) { ms := newMemoryStorer(nil) if len(ms.fs) != 0 { t.Errorf("Expected 0, got %v", len(ms.fs)) } if ms.Close() != nil { t.Errorf("Expected nil, got %v", ms.Close()) } } func TestMemoryStorerDoesntOpenMissingEntries(t *testing.T) { sha := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" hex, err := hex.DecodeString(sha) if err != nil { t.Errorf("Expected nil, got %v", err) } ms := newMemoryStorer(nil) f, err := ms.Open(hex) if !errors.IsNoSuchObject(err) { t.Errorf("Expected NoSuchObject error, got %v", err) } if f != nil { t.Errorf("Expected nil, got %v", f) } } func TestMemoryStorerStoresNewEntries(t *testing.T) { hex, err := hex.DecodeString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") if err != nil { t.Errorf("Expected nil, got %v", err) } ms := newMemoryStorer(nil) if len(ms.fs) != 0 { t.Errorf("Expected 0, got %v", len(ms.fs)) } _, err = ms.Store(hex, strings.NewReader("hello")) if err != nil { t.Errorf("Expected nil, got %v", err) } if len(ms.fs) != 1 { t.Errorf("Expected 1, got %v", len(ms.fs)) } got, err := ms.Open(hex) if err != nil { t.Errorf("Expected nil, got %v", err) } contents, err := io.ReadAll(got) if err != nil { t.Errorf("Expected nil, got %v", err) } if string(contents) != "hello" { t.Errorf("Expected %v, got %v", "hello", string(contents)) } } func TestMemoryStorerStoresExistingEntries(t *testing.T) { hex, err := hex.DecodeString("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") if err != nil { t.Errorf("Expected nil, got %v", err) } ms := newMemoryStorer(nil) if len(ms.fs) != 0 { t.Errorf("Expected 0, got %v", len(ms.fs)) } _, err = ms.Store(hex, new(bytes.Buffer)) if err != nil { t.Errorf("Expected nil, got %v", err) } if len(ms.fs) != 1 { t.Errorf("Expected 1, got %v", len(ms.fs)) } n, err := ms.Store(hex, new(bytes.Buffer)) if err != nil { t.Errorf("Expected nil, got %v", err) } if int64(0) != n { t.Errorf("Expected %v, got %v", 0, n) } } ================================================ FILE: modules/git/gitobj/object.go ================================================ package gitobj import ( "hash" "io" ) // Object is an interface satisfied by any concrete type that represents a loose // Git object. type Object interface { // Encode takes an io.Writer, "to", and encodes an uncompressed // Git-compatible representation of itself to that stream. // // It must return "n", the number of uncompressed bytes written to that // stream, along with "err", any error that was encountered during the // write. // // Any error that was encountered should be treated as "fatal-local", // meaning that a particular invocation of Encode() cannot progress, and // an accurate number "n" of bytes written up that point should be // returned. Encode(to io.Writer) (n int, err error) // Decode takes an io.Reader, "from" as well as a size "size" (the // number of uncompressed bytes on the stream that represent the object // trying to be decoded) and decodes the encoded object onto itself, // as a mutative transaction. // // It returns the number of uncompressed bytes "n" that an invocation // of this function has advanced the io.Reader, "from", as well as any // error that was encountered along the way. // // If an(y) error was encountered, it should be returned immediately, // along with the number of bytes read up to that point. Decode(hash hash.Hash, from io.Reader, size int64) (n int, err error) // Type returns the ObjectType constant that represents an instance of // the implementing type. Type() ObjectType } ================================================ FILE: modules/git/gitobj/object_db.go ================================================ package gitobj import ( "bytes" "crypto/sha1" "crypto/sha256" "errors" "fmt" "hash" "io" "os" "sync/atomic" "github.com/antgroup/hugescm/modules/git/gitobj/storage" ) // Database enables the reading and writing of objects against a storage // backend. type Database struct { // members managed via sync/atomic must be aligned at the top of this // structure (see: https://github.com/git-lfs/git-lfs/pull/2880). // closed is a uint32 managed by sync/atomic's Uint32 methods. It // yields a value of 0 if the *Database it is stored upon is open, // and a value of 1 if it is closed. closed uint32 // ro is the locations from which we can read objects. ro storage.Storage // rw is the location to which we write objects. rw storage.WritableStorage // temp directory, defaults to os.TempDir tmp string // objectFormat is the object format (hash algorithm) objectFormat ObjectFormatAlgorithm } type options struct { alternates string objectFormat ObjectFormatAlgorithm } type Option func(*options) type ObjectFormatAlgorithm string const ( ObjectFormatSHA1 = ObjectFormatAlgorithm("sha1") ObjectFormatSHA256 = ObjectFormatAlgorithm("sha256") ) // Alternates is an Option to specify the string of alternate repositories that // are searched for objects. The format is the same as for // GIT_ALTERNATE_OBJECT_DIRECTORIES. func Alternates(alternates string) Option { return func(args *options) { args.alternates = alternates } } // ObjectFormat is an Option to specify the hash algorithm (object format) in // use in Git. If not specified, it defaults to ObjectFormatSHA1. func ObjectFormat(algo ObjectFormatAlgorithm) Option { return func(args *options) { args.objectFormat = algo } } // NewDatabase constructs an *Database instance that is backed by a // directory on the filesystem. Specifically, this should point to: // // /absolute/repo/path/.git/objects func NewDatabase(root, tmp string, setters ...Option) (*Database, error) { args := &options{objectFormat: ObjectFormatSHA1} for _, setter := range setters { setter(args) } b, err := NewFilesystemBackend(root, tmp, args.alternates, hasher(args.objectFormat)) if err != nil { return nil, err } odb, err := FromBackend(b, setters...) if err != nil { return nil, err } odb.tmp = tmp return odb, nil } func FromBackend(b storage.Backend, setters ...Option) (*Database, error) { args := &options{objectFormat: ObjectFormatSHA1} for _, setter := range setters { setter(args) } ro, rw := b.Storage() odb := &Database{ ro: ro, rw: rw, objectFormat: args.objectFormat, } return odb, nil } // Close closes the *Database, freeing any open resources (namely: the // `*git.ObjectScanner instance), and returning any errors encountered in // closing them. // // If Close() has already been called, this function will return an error. func (d *Database) Close() error { if !atomic.CompareAndSwapUint32(&d.closed, 0, 1) { return errors.New("git/object: *Database already closed") } if err := d.ro.Close(); err != nil { return err } if err := d.rw.Close(); err != nil { return err } return nil } // Object returns an Object (of unknown implementation) satisfying the type // associated with the object named "sha". // // If the object could not be opened, is of unknown type, or could not be // decoded, than an appropriate error is returned instead. func (d *Database) Object(sha []byte) (Object, error) { r, err := d.open(sha) if err != nil { return nil, err } typ, _, err := r.Header() if err != nil { return nil, err } var into Object switch typ { case BlobObjectType: into = new(Blob) case TreeObjectType: into = new(Tree) case CommitObjectType: into = new(Commit) case TagObjectType: into = new(Tag) default: return nil, fmt.Errorf("git/object: unknown object type: %s", typ) } return into, d.decode(r, into) } // Blob returns a *Blob as identified by the SHA given, or an error if one was // encountered. func (d *Database) Blob(sha []byte) (*Blob, error) { var b Blob if err := d.openDecode(sha, &b); err != nil { return nil, err } return &b, nil } // Tree returns a *Tree as identified by the SHA given, or an error if one was // encountered. func (d *Database) Tree(sha []byte) (*Tree, error) { var t Tree if err := d.openDecode(sha, &t); err != nil { return nil, err } return &t, nil } // Commit returns a *Commit as identified by the SHA given, or an error if one // was encountered. func (o *Database) Commit(sha []byte) (*Commit, error) { var c Commit if err := o.openDecode(sha, &c); err != nil { return nil, err } return &c, nil } // Tag returns a *Tag as identified by the SHA given, or an error if one was // encountered. func (d *Database) Tag(sha []byte) (*Tag, error) { var t Tag if err := d.openDecode(sha, &t); err != nil { return nil, err } return &t, nil } // WriteBlob stores a *Blob on disk and returns the SHA it is uniquely // identified by, or an error if one was encountered. func (d *Database) WriteBlob(b *Blob) ([]byte, error) { tmp, err := os.CreateTemp(d.tmp, "") if err != nil { return nil, err } defer d.cleanup(tmp) to := NewObjectWriter(tmp, d.Hasher()) if _, err = to.WriteHeader(b.Type(), b.Size); err != nil { return nil, err } if err = b.Close(); err != nil { return nil, err } if _, err = io.Copy(to, b.Contents); err != nil { return nil, err } if err = to.Close(); err != nil { return nil, err } if _, err := tmp.Seek(0, io.SeekStart); err != nil { return nil, err } sha, _, err := d.save(to.Sha(), tmp) return sha, err } // WriteTree stores a *Tree on disk and returns the SHA it is uniquely // identified by, or an error if one was encountered. func (o *Database) WriteTree(t *Tree) ([]byte, error) { sha, _, err := o.encode(t) if err != nil { return nil, err } return sha, nil } // WriteCommit stores a *Commit on disk and returns the SHA it is uniquely // identified by, or an error if one was encountered. func (o *Database) WriteCommit(c *Commit) ([]byte, error) { sha, _, err := o.encode(c) if err != nil { return nil, err } return sha, nil } // WriteTag stores a *Tag on disk and returns the SHA it is uniquely identified // by, or an error if one was encountered. func (o *Database) WriteTag(t *Tag) ([]byte, error) { sha, _, err := o.encode(t) if err != nil { return nil, err } return sha, nil } // Root returns the filesystem root that this *Database works within, if // backed by a fileStorer (constructed by FromFilesystem). If so, it returns // the fully-qualified path on a disk and a value of true. // // Otherwise, it returns empty-string and a value of false. func (o *Database) Root() (string, bool) { type rooter interface { Root() string } if root, ok := o.rw.(rooter); ok { return root.Root(), true } return "", false } // Hasher returns a new hash instance suitable for this object database. func (o *Database) Hasher() hash.Hash { return hasher(o.objectFormat) } // encode encodes and saves an object to the storage backend and uses an // in-memory buffer to calculate the object's encoded body. func (d *Database) encode(object Object) (sha []byte, n int64, err error) { return d.encodeBuffer(object, bytes.NewBuffer(nil)) } // encodeBuffer encodes and saves an object to the storage backend by using the // given buffer to calculate and store the object's encoded body. func (d *Database) encodeBuffer(object Object, buf io.ReadWriter) (sha []byte, n int64, err error) { cn, err := object.Encode(buf) if err != nil { return nil, 0, err } tmp, err := os.CreateTemp(d.tmp, "") if err != nil { return nil, 0, err } defer d.cleanup(tmp) to := NewObjectWriter(tmp, d.Hasher()) if _, err = to.WriteHeader(object.Type(), int64(cn)); err != nil { return nil, 0, err } if seek, ok := buf.(io.Seeker); ok { if _, err = seek.Seek(0, io.SeekStart); err != nil { return nil, 0, err } } if _, err = io.Copy(to, buf); err != nil { return nil, 0, err } if err = to.Close(); err != nil { return nil, 0, err } if _, err := tmp.Seek(0, io.SeekStart); err != nil { return nil, 0, err } return d.save(to.Sha(), tmp) } // save writes the given buffer to the location given by the storer "o.s" as // identified by the sha []byte. func (o *Database) save(sha []byte, buf io.Reader) ([]byte, int64, error) { n, err := o.rw.Store(sha, buf) return sha, n, err } // open gives an `*ObjectReader` for the given loose object keyed by the given // "sha" []byte, or an error. func (o *Database) open(sha []byte) (*ObjectReader, error) { if atomic.LoadUint32(&o.closed) == 1 { return nil, errors.New("git/object: cannot use closed *pack.Set") } f, err := o.ro.Open(sha) if err != nil { return nil, err } if o.ro.IsCompressed() { return NewObjectReadCloser(f) } return NewUncompressedObjectReadCloser(f) } // openDecode calls decode (see: below) on the object named "sha" after openin // it. func (o *Database) openDecode(sha []byte, into Object) error { r, err := o.open(sha) if err != nil { return err } return o.decode(r, into) } // decode decodes an object given by the sha "sha []byte" into the given object // "into", or returns an error if one was encountered. // // Ordinarily, it closes the object's underlying io.ReadCloser (if it implements // the `io.Closer` interface), but skips this if the "into" Object is of type // BlobObjectType. Blob's don't exhaust the buffer completely (they instead // maintain a handle on the blob's contents via an io.LimitedReader) and // therefore cannot be closed until signaled explicitly by object.Blob.Close(). func (o *Database) decode(r *ObjectReader, into Object) error { typ, size, err := r.Header() if err != nil { return err } else if typ != into.Type() { return &UnexpectedObjectType{Got: typ, Wanted: into.Type()} } if _, err = into.Decode(o.Hasher(), r, size); err != nil { return err } if into.Type() == BlobObjectType { return nil } return r.Close() } func (o *Database) cleanup(f *os.File) { _ = f.Close() _ = os.Remove(f.Name()) } func hasher(algo ObjectFormatAlgorithm) hash.Hash { switch algo { case ObjectFormatSHA1: return sha1.New() case ObjectFormatSHA256: return sha256.New() default: return nil } } ================================================ FILE: modules/git/gitobj/object_db_test.go ================================================ package gitobj import ( "bytes" "compress/zlib" "crypto/sha1" "encoding/hex" "fmt" "io" "strings" "testing" "time" ) const roundTripCommitSha string = `561ed224a6bd39232d902ad8023c0ebe44fbf6c5` const roundTripCommit string = `tree f2ebdf9c967f69d57b370901f9344596ec47e51c parent fe8fbf7de1cd9f08ae642e502bf5de94e523cc08 author brian m. carlson 1543506816 +0000 committer brian m. carlson 1543506816 +0000 gpgsig -----BEGIN PGP SIGNATURE----- Version: GnuPG/MacGPG2 v2.2.9 (Darwin) iQIGBAABCgAwFiEETbktHYzuflTwZxNFLQybwS+Cs6EFAlwAC4cSHGJrMjIwNEBn aXRodWIuY29tAAoJEC0Mm8EvgrOhiRMN/2rTxkBb5BeQQeq7rPiIW8+29FzuvPeD /DhxlRKwKut9h4qhtxNQszTezxhP4PLOkuMvUax2pGXCQ8cjkSswagmycev+AB4d s0loG4SrEwvH8nAdr6qfNx4ZproRJ8QaEJqyN9SqF7PCWrUAoJKehdgA38WtYFws ON+nIwzDIvgpoNI+DzgWrx16SOTp87xt8RaJOVK9JNZQk8zBh7rR2viS9CWLysmz wOh3j4XI1TZ5IFJfpCxZzUDFgb6K3wpAX6Vux5F1f3cN5MsJn6WUJCmYCvwofeeZ 6LMqKgry7EA12l7Tv/JtmMeh+rbT5WLdMIsjascUaHRhpJDNqqHCKMEj1zh3QZNY Hycdcs24JouVAtPwg07f1ncPU3aE624LnNRA9A6Ih6SkkKE4tgMVA5qkObDfwzLE lWyBj2QKySaIdSlU2EcoH3UK33v/ofrRr3+bUkDgxdqeV/RkBVvfpeMwFVSFWseE bCcotryLCZF7vBQU+pKC+EaZxQV9L5+McGzcDYxUmqrhwtR+azRBYFOw+lOT4sYD FxdLFWCtmDhKPX5Ajci2gmyfgCwdIeDhSuOf2iQQGRpE6y7aka4AlaE= =UyqL -----END PGP SIGNATURE----- pack/set: ignore packs without indices When we look for packs to read, we look for a pack file, and then an index, and fail if either one is missing. When Git looks for packs to read, it looks only for indices and then checks if the pack is present. The Git approach handles the case when there is an extra pack that lacks an index, while our approach does not. Consequently, we can get various errors (showing up so far only on Windows) when an index is missing. If the index file cannot be read for any reason, simply skip the entire pack altogether and continue on. This leaves us no more or less functional than Git in terms of discovering objects and makes our error handling more robust. ` func TestDecodeObject(t *testing.T) { testCases := []struct { options []Option sha string }{ { []Option{}, "af5626b4a114abcb82d63db7c8082c3c4756e51b", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, "7506cbcf4c572be9e06a1fed35ac5b1df8b5a74d26c07f022648e5d95a9f6f2a", }, } for _, test := range testCases { contents := "Hello, world!\n" var buf bytes.Buffer zw := zlib.NewWriter(&buf) _, _ = fmt.Fprintf(zw, "blob 14\x00%s", contents) zw.Close() // nolint b, err := NewMemoryBackend(map[string]io.ReadWriter{ test.sha: &buf, }) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } shaHex, _ := hex.DecodeString(test.sha) obj, err := odb.Object(shaHex) blob, ok := obj.(*Blob) if err != nil { t.Fatalf("Error: %v", err) } if !ok { t.Fatalf("Expected true") } got, err := io.ReadAll(blob.Contents) if err != nil { t.Errorf("Expected nil, got %v", err) } if contents != string(got) { t.Errorf("Expected %v, got %v", contents, string(got)) } } } func TestDecodeBlob(t *testing.T) { testCases := []struct { options []Option sha string }{ { []Option{}, "af5626b4a114abcb82d63db7c8082c3c4756e51b", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, "7506cbcf4c572be9e06a1fed35ac5b1df8b5a74d26c07f022648e5d95a9f6f2a", }, } for _, test := range testCases { contents := "Hello, world!\n" var buf bytes.Buffer zw := zlib.NewWriter(&buf) _, _ = fmt.Fprintf(zw, "blob 14\x00%s", contents) zw.Close() // nolint b, err := NewMemoryBackend(map[string]io.ReadWriter{ test.sha: &buf, }) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } shaHex, _ := hex.DecodeString(test.sha) blob, err := odb.Blob(shaHex) if err != nil { t.Errorf("Expected nil, got %v", err) } if blob.Size != 14 { t.Errorf("Expected %v, got %v", 14, blob.Size) } got, err := io.ReadAll(blob.Contents) if err != nil { t.Errorf("Expected nil, got %v", err) } if contents != string(got) { t.Errorf("Expected %v, got %v", contents, string(got)) } } } func TestDecodeTree(t *testing.T) { testCases := []struct { options []Option size int64 treeSha string blobSha string }{ { []Option{}, 37, "fcb545d5746547a597811b7441ed8eba307be1ff", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, 49, "eeea12da3c10b7ff20f96530ca613674f0b3292cb524c1b317b80e045adde0b6", "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813", }, } for _, test := range testCases { hexSha, err := hex.DecodeString(test.treeSha) if err != nil { t.Fatalf("Expected nil") } hexBlobSha, err := hex.DecodeString(test.blobSha) if err != nil { t.Fatalf("Expected nil") } var buf bytes.Buffer zw := zlib.NewWriter(&buf) _, _ = fmt.Fprintf(zw, "tree %d\x00", test.size) _, _ = fmt.Fprintf(zw, "100644 hello.txt\x00") _, _ = zw.Write(hexBlobSha) zw.Close() // nolint b, err := NewMemoryBackend(map[string]io.ReadWriter{ test.treeSha: &buf, }) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } tree, err := odb.Tree(hexSha) if err != nil { t.Errorf("Expected nil, got %v", err) } if len(tree.Entries) != 1 { t.Fatalf("Expected %v, got %v", 1, len(tree.Entries)) } entry := tree.Entries[0] if entry.Name != "hello.txt" { t.Fatalf("Expected Name %v, got %v", "hello.txt", entry.Name) } if !bytes.Equal(entry.Oid, hexBlobSha) { t.Fatalf("Expected Oid %v, got %v", hexBlobSha, entry.Oid) } if entry.Filemode != 0100644 { t.Fatalf("Expected Filemode %v, got %v", 0100644, entry.Filemode) } } } func TestDecodeCommit(t *testing.T) { testCases := []struct { options []Option size int64 treeSha string commitSha string }{ { []Option{}, 173, "fcb545d5746547a597811b7441ed8eba307be1ff", "d7283480bb6dc90be621252e1001a93871dcf511", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, 197, "eeea12da3c10b7ff20f96530ca613674f0b3292cb524c1b317b80e045adde0b6", "9b03a791a98a2c35621ea6870061fb17299b22e2bb5e9f6a7d5afd7dc0c23915", }, } for _, test := range testCases { commitShaHex, err := hex.DecodeString(test.commitSha) if err != nil { t.Errorf("Expected nil, got %v", err) } var buf bytes.Buffer zw := zlib.NewWriter(&buf) _, _ = fmt.Fprintf(zw, "commit %d\x00", test.size) _, _ = fmt.Fprintf(zw, "tree %s\n", test.treeSha) _, _ = fmt.Fprintf(zw, "author Taylor Blau 1494620424 -0600\n") _, _ = fmt.Fprintf(zw, "committer Taylor Blau 1494620424 -0600\n") _, _ = fmt.Fprintf(zw, "\ninitial commit") zw.Close() // nolint b, err := NewMemoryBackend(map[string]io.ReadWriter{ test.commitSha: &buf, }) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } commit, err := odb.Commit(commitShaHex) if err != nil { t.Errorf("Expected nil, got %v", err) } if commit.Author != "Taylor Blau 1494620424 -0600" { t.Errorf("Expected %v, got %v", "Taylor Blau 1494620424 -0600", commit.Author) } if commit.Committer != "Taylor Blau 1494620424 -0600" { t.Errorf("Expected %v, got %v", "Taylor Blau 1494620424 -0600", commit.Committer) } if commit.Message != "initial commit" { t.Errorf("Expected %v, got %v", "initial commit", commit.Message) } if len(commit.ParentIDs) != 0 { t.Errorf("Expected %v, got %v", 0, len(commit.ParentIDs)) } if test.treeSha != hex.EncodeToString(commit.TreeID) { t.Errorf("Expected %v, got %v", test.treeSha, hex.EncodeToString(commit.TreeID)) } } } func TestWriteBlob(t *testing.T) { testCases := []struct { options []Option sha string }{ { []Option{}, "af5626b4a114abcb82d63db7c8082c3c4756e51b", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, "7506cbcf4c572be9e06a1fed35ac5b1df8b5a74d26c07f022648e5d95a9f6f2a", }, } for _, test := range testCases { b, err := NewMemoryBackend(nil) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } sha, err := odb.WriteBlob(&Blob{ Size: 14, Contents: strings.NewReader("Hello, world!\n"), }) _, s := b.Storage() if err != nil { t.Errorf("Expected nil, got %v", err) } if test.sha != hex.EncodeToString(sha) { t.Errorf("Expected %v, got %v", test.sha, hex.EncodeToString(sha)) } if s.(*memoryStorer) == nil { t.Errorf("Expected non-nil") } _ = s.(*memoryStorer).fs[hex.EncodeToString(sha)] } } func TestWriteTree(t *testing.T) { testCases := []struct { options []Option treeSha string blobSha string }{ { []Option{}, "fcb545d5746547a597811b7441ed8eba307be1ff", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, "eeea12da3c10b7ff20f96530ca613674f0b3292cb524c1b317b80e045adde0b6", "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813", }, } for _, test := range testCases { b, err := NewMemoryBackend(nil) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } hexBlobSha, err := hex.DecodeString(test.blobSha) if err != nil { t.Fatalf("Expected nil") } sha, err := odb.WriteTree(&Tree{Entries: []*TreeEntry{ { Name: "hello.txt", Oid: hexBlobSha, Filemode: 0100644, }, }}) _, s := b.Storage() if err != nil { t.Errorf("Expected nil, got %v", err) } if test.treeSha != hex.EncodeToString(sha) { t.Errorf("Expected %v, got %v", test.treeSha, hex.EncodeToString(sha)) } if s.(*memoryStorer) == nil { t.Errorf("Expected non-nil") } _ = s.(*memoryStorer).fs[hex.EncodeToString(sha)] } } func TestWriteCommit(t *testing.T) { testCases := []struct { options []Option treeSha string commitSha string }{ { []Option{}, "fcb545d5746547a597811b7441ed8eba307be1ff", "77a746376fdb591a44a4848b5ba308b2d3e2a90c", }, { []Option{ObjectFormat(ObjectFormatSHA256)}, "eeea12da3c10b7ff20f96530ca613674f0b3292cb524c1b317b80e045adde0b6", "e75fcf742b1e2d55358cf7e96257634979390f9772e24909bb96b41521bdaee0", }, } for _, test := range testCases { b, err := NewMemoryBackend(nil) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } when := time.Unix(1257894000, 0).UTC() author := &Signature{Name: "John Doe", Email: "john@example.com", When: when} committer := &Signature{Name: "Jane Doe", Email: "jane@example.com", When: when} treeHex, err := hex.DecodeString(test.treeSha) if err != nil { t.Errorf("Expected nil, got %v", err) } sha, err := odb.WriteCommit(&Commit{ Author: author.String(), Committer: committer.String(), TreeID: treeHex, Message: "initial commit", }) _, s := b.Storage() if err != nil { t.Errorf("Expected nil, got %v", err) } if test.commitSha != hex.EncodeToString(sha) { t.Errorf("Expected %v, got %v", test.commitSha, hex.EncodeToString(sha)) } if s.(*memoryStorer) == nil { t.Errorf("Expected non-nil") } _ = s.(*memoryStorer).fs[hex.EncodeToString(sha)] } } func TestWriteCommitWithGPGSignature(t *testing.T) { b, err := NewMemoryBackend(nil) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b) if err != nil { t.Fatalf("Error: %v", err) } commit := new(Commit) _, err = commit.Decode( sha1.New(), strings.NewReader(roundTripCommit), int64(len(roundTripCommit))) if err != nil { t.Fatalf("Error: %v", err) } buf := new(bytes.Buffer) _, _ = commit.Encode(buf) if roundTripCommit != buf.String() { t.Errorf("Expected %v, got %v", roundTripCommit, buf.String()) } sha, err := odb.WriteCommit(commit) if err != nil { t.Errorf("Expected nil, got %v", err) } if roundTripCommitSha != hex.EncodeToString(sha) { t.Errorf("Expected %v, got %v", roundTripCommitSha, hex.EncodeToString(sha)) } } func TestDecodeTag(t *testing.T) { const sha = "7639ba293cd2c457070e8446ecdea56682af0f48" tagShaHex, _ := hex.DecodeString(sha) var buf bytes.Buffer zw := zlib.NewWriter(&buf) _, _ = fmt.Fprintf(zw, "tag 165\x00") _, _ = fmt.Fprintf(zw, "object 6161616161616161616161616161616161616161\n") _, _ = fmt.Fprintf(zw, "type commit\n") _, _ = fmt.Fprintf(zw, "tag v2.4.0\n") _, _ = fmt.Fprintf(zw, "tagger A U Thor \n") _, _ = fmt.Fprintf(zw, "\n") _, _ = fmt.Fprintf(zw, "The quick brown fox jumps over the lazy dog.\n") zw.Close() // nolint b, err := NewMemoryBackend(map[string]io.ReadWriter{ sha: &buf, }) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b) if err != nil { t.Fatalf("Error: %v", err) } tag, err := odb.Tag(tagShaHex) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte("aaaaaaaaaaaaaaaaaaaa"), tag.Object) { t.Errorf("Expected %v, got %v", []byte("aaaaaaaaaaaaaaaaaaaa"), tag.Object) } if CommitObjectType != tag.ObjectType { t.Errorf("Expected %v, got %v", CommitObjectType, tag.ObjectType) } if tag.Name != "v2.4.0" { t.Errorf("Expected %v, got %v", "v2.4.0", tag.Name) } if tag.Tagger != "A U Thor " { t.Errorf("Expected %v, got %v", "A U Thor ", tag.Tagger) } if tag.Message != "The quick brown fox jumps over the lazy dog.\n" { t.Errorf("Expected %v, got %v", "The quick brown fox jumps over the lazy dog.\n", tag.Message) } } func TestWriteTag(t *testing.T) { testCases := []struct { options []Option tagSha string commitSha []byte }{ { []Option{}, "e614dda21829f4176d3db27fe62fb4aee2e2475d", []byte("aaaaaaaaaaaaaaaaaaaa"), }, { []Option{ObjectFormat(ObjectFormatSHA256)}, "a297d8b92e8be21fbe1c96a64acc596f26c8b204eb291c71e371c832d3584651", []byte("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), }, } for _, test := range testCases { b, err := NewMemoryBackend(nil) if err != nil { t.Fatalf("Error: %v", err) } odb, err := FromBackend(b, test.options...) if err != nil { t.Fatalf("Error: %v", err) } sha, err := odb.WriteTag(&Tag{ Object: test.commitSha, ObjectType: CommitObjectType, Name: "v2.4.0", Tagger: "A U Thor ", Message: "The quick brown fox jumps over the lazy dog.", }) _, s := b.Storage() if err != nil { t.Errorf("Expected nil, got %v", err) } if test.tagSha != hex.EncodeToString(sha) { t.Errorf("Expected %v, got %v", test.tagSha, hex.EncodeToString(sha)) } if s.(*memoryStorer) == nil { t.Errorf("Expected non-nil") } _ = s.(*memoryStorer).fs[hex.EncodeToString(sha)] } } func TestReadingAMissingObjectAfterClose(t *testing.T) { sha, _ := hex.DecodeString("af5626b4a114abcb82d63db7c8082c3c4756e51b") b, err := NewMemoryBackend(nil) if err != nil { t.Fatalf("Error: %v", err) } ro, rw := b.Storage() db := &Database{ ro: ro, rw: rw, closed: 1, } blob, err := db.Blob(sha) if err == nil { t.Fatalf("Expected error, got nil") } if err.Error() != "git/object: cannot use closed *pack.Set" { t.Errorf("Expected error message %v, got %v", "git/object: cannot use closed *pack.Set", err.Error()) } if blob != nil { t.Errorf("Expected nil, got %v", blob) } } func TestClosingAnDatabaseMoreThanOnce(t *testing.T) { db, err := NewDatabase("/tmp", "") if err != nil { t.Errorf("Expected nil, got %v", err) } if db.Close() != nil { t.Errorf("Expected nil, got %v", db.Close()) } if db.Close() == nil || db.Close().Error() != "git/object: *Database already closed" { t.Errorf("Expected 'git/object: *Database already closed', got %v", db.Close()) } } func TestDatabaseRootWithRoot(t *testing.T) { db, err := NewDatabase("/foo/bar/baz", "") if err != nil { t.Errorf("Expected nil, got %v", err) } root, ok := db.Root() if root != "/foo/bar/baz" { t.Errorf("Expected %v, got %v", "/foo/bar/baz", root) } if !ok { t.Errorf("Expected true") } } func TestDatabaseRootWithoutRoot(t *testing.T) { root, ok := new(Database).Root() if root != "" { t.Errorf("Expected %v, got %v", "", root) } if ok { t.Errorf("Expected false") } } ================================================ FILE: modules/git/gitobj/object_reader.go ================================================ package gitobj import ( "bufio" "compress/zlib" "errors" "io" "strconv" "strings" ) // ObjectReader provides an io.Reader implementation that can read Git object // headers, as well as provide an uncompressed view into the object contents // itself. type ObjectReader struct { // header is the object header type header *struct { // typ is the ObjectType encoded in the header pointed at by // this reader. typ ObjectType // size is the number of uncompressed bytes following the header // that encodes the object. size int64 } // r is the underling uncompressed reader. r *bufio.Reader // closeFn supplies an optional function that, when called, frees an // resources (open files, memory, etc) held by this instance of the // *ObjectReader. // // closeFn returns any error encountered when closing/freeing resources // held. // // It is allowed to be nil. closeFn func() error } // NewObjectReader takes a given io.Reader that yields zlib-compressed data, and // returns an *ObjectReader wrapping it, or an error if one occurred during // construction time. func NewObjectReader(r io.Reader) (*ObjectReader, error) { return NewObjectReadCloser(io.NopCloser(r)) } // NewObjectReader takes a given io.Reader that yields uncompressed data and // returns an *ObjectReader wrapping it, or an error if one occurred during // construction time. func NewUncompressedObjectReader(r io.Reader) (*ObjectReader, error) { return NewUncompressedObjectReadCloser(io.NopCloser(r)) } // NewObjectReadCloser takes a given io.Reader that yields zlib-compressed data, and // returns an *ObjectReader wrapping it, or an error if one occurred during // construction time. // // It also calls the Close() function given by the implementation "r" of the // type io.Closer. func NewObjectReadCloser(r io.ReadCloser) (*ObjectReader, error) { zr, err := zlib.NewReader(r) if err != nil { return nil, err } return &ObjectReader{ r: bufio.NewReader(zr), closeFn: func() error { if err := zr.Close(); err != nil { return err } if err := r.Close(); err != nil { return err } return nil }, }, nil } // NewUncompressObjectReadCloser takes a given io.Reader that yields // uncompressed data, and returns an *ObjectReader wrapping it, or an error if // one occurred during construction time. // // It also calls the Close() function given by the implementation "r" of the // type io.Closer. func NewUncompressedObjectReadCloser(r io.ReadCloser) (*ObjectReader, error) { return &ObjectReader{ r: bufio.NewReader(r), closeFn: r.Close, }, nil } // Header returns information about the Object's header, or an error if one // occurred while reading the data. // // Header information is cached, so this function is safe to call at any point // during the object read, and can be called more than once. func (r *ObjectReader) Header() (typ ObjectType, size int64, err error) { if r.header != nil { return r.header.typ, r.header.size, nil } typs, err := r.r.ReadString(' ') if err != nil { return UnknownObjectType, 0, err } if len(typs) == 0 { return UnknownObjectType, 0, errors.New("git/object: object type must not be empty") } typs = strings.TrimSuffix(typs, " ") sizeStr, err := r.r.ReadString('\x00') if err != nil { return UnknownObjectType, 0, err } sizeStr = strings.TrimSuffix(sizeStr, "\x00") size, err = strconv.ParseInt(sizeStr, 10, 64) if err != nil { return UnknownObjectType, 0, err } r.header = &struct { typ ObjectType size int64 }{ ObjectTypeFromString(typs), size, } return r.header.typ, r.header.size, nil } // Read reads uncompressed bytes into the buffer "p", and returns the number of // uncompressed bytes read. Otherwise, it returns any error encountered along // the way. // // This function is safe to call before reading the Header information, as any // call to Read() will ensure that read has been called at least once. func (r *ObjectReader) Read(p []byte) (n int, err error) { if _, _, err = r.Header(); err != nil { return 0, err } return r.r.Read(p) } // Close frees any resources held by the ObjectReader and must be called before // disposing of this instance. // // It returns any error encountered by the *ObjectReader during close. func (r *ObjectReader) Close() error { if r.closeFn == nil { return nil } return r.closeFn() } ================================================ FILE: modules/git/gitobj/object_reader_test.go ================================================ package gitobj import ( "bytes" "compress/zlib" "errors" "io" "sync/atomic" "testing" ) func TestObjectReaderReadsHeaders(t *testing.T) { var compressed bytes.Buffer zw := zlib.NewWriter(&compressed) _, _ = zw.Write([]byte("blob 1\x00")) _ = zw.Close() or, err := NewObjectReader(&compressed) if err != nil { t.Errorf("Expected nil, got %v", err) } typ, size, err := or.Header() if err != nil { t.Errorf("Expected nil, got %v", err) } if size != 1 { t.Errorf("Expected %v, got %v", 1, size) } if BlobObjectType != typ { t.Errorf("Expected %v, got %v", BlobObjectType, typ) } } func TestObjectReaderConsumesHeaderBeforeReads(t *testing.T) { var compressed bytes.Buffer zw := zlib.NewWriter(&compressed) _, _ = zw.Write([]byte("blob 1\x00asdf")) _ = zw.Close() or, err := NewObjectReader(&compressed) if err != nil { t.Errorf("Expected nil, got %v", err) } var buf [4]byte n, err := or.Read(buf[:]) if n != 4 { t.Errorf("Expected %v, got %v", 4, n) } if !bytes.Equal([]byte{'a', 's', 'd', 'f'}, buf[:]) { t.Errorf("Expected %v, got %v", []byte{'a', 's', 'd', 'f'}, buf[:]) } if err != nil { t.Errorf("Expected nil, got %v", err) } } type ReadCloserFn struct { io.Reader closeFn func() error } func (r *ReadCloserFn) Close() error { return r.closeFn() } func TestObjectReaderCallsClose(t *testing.T) { var calls uint32 expected := errors.New("expected") or, err := NewObjectReadCloser(&ReadCloserFn{ Reader: bytes.NewBuffer([]byte{0x78, 0x01}), closeFn: func() error { atomic.AddUint32(&calls, 1) return expected }, }) if err != nil { t.Errorf("Expected nil, got %v", err) } got := or.Close() if !errors.Is(got, expected) { t.Errorf("Expected %v, got %v", expected, got) } if atomic.LoadUint32(&calls) != 1 { t.Errorf("Expected %v, got %v", 1, atomic.LoadUint32(&calls)) } } ================================================ FILE: modules/git/gitobj/object_type.go ================================================ package gitobj import "strings" // ObjectType is a constant enumeration type for identifying the kind of object // type an implementing instance of the Object interface is. type ObjectType uint8 const ( UnknownObjectType ObjectType = iota BlobObjectType TreeObjectType CommitObjectType TagObjectType ) // ObjectTypeFromString converts from a given string to an ObjectType // enumeration instance. func ObjectTypeFromString(s string) ObjectType { switch strings.ToLower(s) { case "blob": return BlobObjectType case "tree": return TreeObjectType case "commit": return CommitObjectType case "tag": return TagObjectType default: return UnknownObjectType } } // String implements the fmt.Stringer interface and returns a string // representation of the ObjectType enumeration instance. func (t ObjectType) String() string { switch t { case UnknownObjectType: return "unknown" case BlobObjectType: return "blob" case TreeObjectType: return "tree" case CommitObjectType: return "commit" case TagObjectType: return "tag" } return "" } ================================================ FILE: modules/git/gitobj/object_type_test.go ================================================ package gitobj import ( "math" "testing" ) func TestObjectTypeFromString(t *testing.T) { for str, typ := range map[string]ObjectType{ "blob": BlobObjectType, "tree": TreeObjectType, "commit": CommitObjectType, "tag": TagObjectType, "something else": UnknownObjectType, } { t.Run(str, func(t *testing.T) { if typ != ObjectTypeFromString(str) { t.Errorf("Expected %v, got %v", typ, ObjectTypeFromString(str)) } }) } } func TestObjectTypeToString(t *testing.T) { for typ, str := range map[ObjectType]string{ BlobObjectType: "blob", TreeObjectType: "tree", CommitObjectType: "commit", TagObjectType: "tag", UnknownObjectType: "unknown", ObjectType(math.MaxUint8): "", } { t.Run(str, func(t *testing.T) { if str != typ.String() { t.Errorf("Expected %v, got %v", str, typ.String()) } }) } } ================================================ FILE: modules/git/gitobj/object_writer.go ================================================ package gitobj import ( "compress/zlib" "errors" "fmt" "hash" "io" "sync/atomic" ) // ObjectWriter provides an implementation of io.Writer that compresses and // writes data given to it, and keeps track of the SHA1 hash of the data as it // is written. type ObjectWriter struct { // members managed via sync/atomic must be aligned at the top of this // structure (see: https://github.com/git-lfs/git-lfs/pull/2880). // wroteHeader is a uint32 managed by the sync/atomic package. It is 1 // if the header was written, and 0 otherwise. wroteHeader uint32 // w is the underling writer that this ObjectWriter is writing to. w io.Writer // sum is the in-progress hash calculation. sum hash.Hash // closeFn supplies an optional function that, when called, frees an // resources (open files, memory, etc) held by this instance of the // *ObjectWriter. // // closeFn returns any error encountered when closing/freeing resources // held. // // It is allowed to be nil. closeFn func() error } // nopCloser provides a no-op implementation of the io.WriteCloser interface by // taking an io.Writer and wrapping it with a Close() method that returns nil. type nopCloser struct { // Writer is an embedded io.Writer that receives the Write() method // call. io.Writer } // Close implements the io.Closer interface by returning nil. func (n *nopCloser) Close() error { return nil } // NewObjectWriter returns a new *ObjectWriter instance that drains incoming // writes into the io.Writer given, "w". "hash" is a hash instance from the // Database'e Hash method. func NewObjectWriter(w io.Writer, hash hash.Hash) *ObjectWriter { return NewObjectWriteCloser(&nopCloser{w}, hash) } // NewObjectWriter returns a new *ObjectWriter instance that drains incoming // writes into the io.Writer given, "w". "sum" is a hash instance from the // Database'e Hash method. // // Upon closing, it calls the given Close() function of the io.WriteCloser. func NewObjectWriteCloser(w io.WriteCloser, sum hash.Hash) *ObjectWriter { zw := zlib.NewWriter(w) sum.Reset() return &ObjectWriter{ w: io.MultiWriter(zw, sum), sum: sum, closeFn: func() error { if err := zw.Close(); err != nil { return err } if err := w.Close(); err != nil { return err } return nil }, } } // WriteHeader writes object header information and returns the number of // uncompressed bytes written, or any error that was encountered along the way. // // WriteHeader MUST be called only once, or a panic() will occur. func (w *ObjectWriter) WriteHeader(typ ObjectType, length int64) (n int, err error) { if !atomic.CompareAndSwapUint32(&w.wroteHeader, 0, 1) { return 0, errors.New("git/object: cannot write headers more than once") } return fmt.Fprintf(w, "%s %d\x00", typ, length) } // Write writes the given buffer "p" of uncompressed bytes into the underlying // data-stream, returning the number of uncompressed bytes written, along with // any error encountered along the way. // // A call to WriteHeaders MUST occur before calling Write, or a panic() will // occur. func (w *ObjectWriter) Write(p []byte) (n int, err error) { if atomic.LoadUint32(&w.wroteHeader) != 1 { return 0, errors.New("git/object: cannot write data without header") } return w.w.Write(p) } // Sha returns the in-progress SHA1 of the compressed object contents. func (w *ObjectWriter) Sha() []byte { return w.sum.Sum(nil) } // Close closes the ObjectWriter and frees any resources held by it, including // flushing the zlib-compressed content to the underling writer. It must be // called before discarding of the Writer instance. // // If any error occurred while calling close, it will be returned immediately, // otherwise nil. func (w *ObjectWriter) Close() error { if w.closeFn == nil { return nil } return w.closeFn() } ================================================ FILE: modules/git/gitobj/object_writer_test.go ================================================ package gitobj import ( "bytes" "compress/zlib" "crypto/sha1" "crypto/sha256" "encoding/hex" "errors" "hash" "io" "sync/atomic" "testing" ) func TestObjectWriterWritesHeaders(t *testing.T) { var buf bytes.Buffer w := NewObjectWriter(&buf, sha1.New()) n, err := w.WriteHeader(BlobObjectType, 1) if n != 7 { t.Errorf("Expected %v, got %v", 7, n) } if err != nil { t.Errorf("Expected nil, got %v", err) } if w.Close() != nil { t.Errorf("Expected nil, got %v", w.Close()) } r, err := zlib.NewReader(&buf) if err != nil { t.Errorf("Expected nil, got %v", err) } all, err := io.ReadAll(r) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte("blob 1\x00"), all) { t.Errorf("Expected %v, got %v", []byte("blob 1\x00"), all) } if r.Close() != nil { t.Errorf("Expected nil, got %v", r.Close()) } } func TestObjectWriterWritesData(t *testing.T) { testCases := []struct { h hash.Hash sha string }{ { sha1.New(), "56a6051ca2b02b04ef92d5150c9ef600403cb1de", }, { sha256.New(), "36456d9b87f21fc54ed5babf1222a9ab0fbbd0c4ad239a7933522d5e4447049c", }, } for _, test := range testCases { var buf bytes.Buffer w := NewObjectWriter(&buf, test.h) _, _ = w.WriteHeader(BlobObjectType, 1) n, err := w.Write([]byte{0x31}) if n != 1 { t.Errorf("Expected %v, got %v", 1, n) } if err != nil { t.Errorf("Expected nil, got %v", err) } if w.Close() != nil { t.Errorf("Expected nil, got %v", w.Close()) } r, err := zlib.NewReader(&buf) if err != nil { t.Errorf("Expected nil, got %v", err) } all, err := io.ReadAll(r) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte("blob 1\x001"), all) { t.Errorf("Expected %v, got %v", []byte("blob 1\x001"), all) } if r.Close() != nil { t.Errorf("Expected nil, got %v", r.Close()) } if test.sha != hex.EncodeToString(w.Sha()) { t.Errorf("Expected %v, got %v", test.sha, hex.EncodeToString(w.Sha())) } } } func TestObjectWriterKeepsTrackOfHash(t *testing.T) { w := NewObjectWriter(new(bytes.Buffer), sha1.New()) n, err := w.WriteHeader(BlobObjectType, 1) if err != nil { t.Errorf("Expected nil, got %v", err) } if n != 7 { t.Errorf("Expected %v, got %v", 7, n) } if hex.EncodeToString(w.Sha()) != "bb6ca78b66403a67c6281df142de5ef472186283" { t.Errorf("Expected %v, got %v", "bb6ca78b66403a67c6281df142de5ef472186283", hex.EncodeToString(w.Sha())) } w = NewObjectWriter(new(bytes.Buffer), sha256.New()) n, err = w.WriteHeader(BlobObjectType, 1) if err != nil { t.Errorf("Expected nil, got %v", err) } if n != 7 { t.Errorf("Expected %v, got %v", 7, n) } if hex.EncodeToString(w.Sha()) != "3a68c454a6eb75cc55bda147a53756f0f581497eb80b9b67156fb8a8d3931cd7" { t.Errorf("Expected %v, got %v", "3a68c454a6eb75cc55bda147a53756f0f581497eb80b9b67156fb8a8d3931cd7", hex.EncodeToString(w.Sha())) } } type WriteCloserFn struct { io.Writer closeFn func() error } func (r *WriteCloserFn) Close() error { return r.closeFn() } func TestObjectWriterCallsClose(t *testing.T) { var calls uint32 expected := errors.New("close error") w := NewObjectWriteCloser(&WriteCloserFn{ Writer: new(bytes.Buffer), closeFn: func() error { atomic.AddUint32(&calls, 1) return expected }, }, sha1.New()) got := w.Close() if calls != 1 { t.Errorf("Expected %v, got %v", 1, calls) } if !errors.Is(got, expected) { t.Errorf("Expected %v, got %v", expected, got) } } ================================================ FILE: modules/git/gitobj/pack/bounds.go ================================================ package pack import "fmt" // bounds encapsulates the window of search for a single iteration of binary // search. // // Callers may choose to treat the return values from Left() and Right() as // inclusive or exclusive. *bounds makes no assumptions on the inclusivity of // those values. // // See: *git/object/pack:.Index for more. type bounds struct { // left is the left or lower bound of the bounds. left int64 // right is the rightmost or upper bound of the bounds. right int64 } // newBounds returns a new *bounds instance with the given left and right // values. func newBounds(left, right int64) *bounds { return &bounds{ left: left, right: right, } } // Left returns the leftmost value or lower bound of this *bounds instance. func (b *bounds) Left() int64 { return b.left } // right returns the rightmost value or upper bound of this *bounds instance. func (b *bounds) Right() int64 { return b.right } // WithLeft returns a new copy of this *bounds instance, replacing the left // value with the given argument. func (b *bounds) WithLeft(v int64) *bounds { return &bounds{ left: v, right: b.right, } } // WithRight returns a new copy of this *bounds instance, replacing the right // value with the given argument. func (b *bounds) WithRight(v int64) *bounds { return &bounds{ left: b.left, right: v, } } // Equal returns whether or not the receiving *bounds instance is equal to the // given one: // // - If both the argument and receiver are nil, they are given to be equal. // - If both the argument and receiver are not nil, and they share the same // Left() and Right() values, they are equal. // - If both the argument and receiver are not nil, but they do not share the // same Left() and Right() values, they are not equal. // - If either the argument or receiver is nil, but the other is not, they are // not equal. func (b *bounds) Equal(other *bounds) bool { if b == nil { return other == nil } if other == nil { return false } return b.left == other.left && b.right == other.right } // String returns a string representation of this bounds instance, given as: // // [,] func (b *bounds) String() string { return fmt.Sprintf("[%d,%d]", b.Left(), b.Right()) } ================================================ FILE: modules/git/gitobj/pack/bounds_test.go ================================================ package pack import ( "testing" ) func TestBoundsLeft(t *testing.T) { if newBounds(1, 2).Left() != 1 { t.Errorf("Expected %v, got %v", 1, newBounds(1, 2).Left()) } } func TestBoundsRight(t *testing.T) { if newBounds(1, 2).Right() != 2 { t.Errorf("Expected %v, got %v", 2, newBounds(1, 2).Right()) } } func TestBoundsWithLeftReturnsNewBounds(t *testing.T) { b1 := newBounds(1, 2) b2 := b1.WithLeft(3) if b1.Left() != 1 { t.Errorf("Expected %v, got %v", 1, b1.Left()) } if b1.Right() != 2 { t.Errorf("Expected %v, got %v", 2, b1.Right()) } if b2.Left() != 3 { t.Errorf("Expected %v, got %v", 3, b2.Left()) } if b2.Right() != 2 { t.Errorf("Expected %v, got %v", 2, b2.Right()) } } func TestBoundsWithRightReturnsNewBounds(t *testing.T) { b1 := newBounds(1, 2) b2 := b1.WithRight(3) if b1.Left() != 1 { t.Errorf("Expected %v, got %v", 1, b1.Left()) } if b1.Right() != 2 { t.Errorf("Expected %v, got %v", 2, b1.Right()) } if b2.Left() != 1 { t.Errorf("Expected %v, got %v", 1, b2.Left()) } if b2.Right() != 3 { t.Errorf("Expected %v, got %v", 3, b2.Right()) } } func TestBoundsEqualWithIdenticalBounds(t *testing.T) { b1 := newBounds(1, 2) b2 := newBounds(1, 2) if !b1.Equal(b2) { t.Errorf("Expected true") } } func TestBoundsEqualWithDifferentBounds(t *testing.T) { b1 := newBounds(1, 2) b2 := newBounds(3, 4) if b1.Equal(b2) { t.Errorf("Expected false") } } func TestBoundsEqualWithNilReceiver(t *testing.T) { bnil := (*bounds)(nil) b2 := newBounds(1, 2) if bnil.Equal(b2) { t.Errorf("Expected false") } } func TestBoundsEqualWithNilArgument(t *testing.T) { b1 := newBounds(1, 2) bnil := (*bounds)(nil) if b1.Equal(bnil) { t.Errorf("Expected false") } } func TestBoundsEqualWithNilArgumentAndReceiver(t *testing.T) { b1 := (*bounds)(nil) b2 := (*bounds)(nil) if !b1.Equal(b2) { t.Errorf("Expected true") } } func TestBoundsString(t *testing.T) { b1 := newBounds(1, 2) if b1.String() != "[1,2]" { t.Errorf("Expected [1,2], got %v", b1.String()) } } ================================================ FILE: modules/git/gitobj/pack/chain.go ================================================ package pack // Chain represents an element in the delta-base chain corresponding to a packed // object. type Chain interface { // Unpack unpacks the data encoded in the delta-base chain up to and // including the receiving Chain implementation by applying the // delta-base chain successively to itself. // // If there was an error in the delta-base resolution, i.e., the chain // is malformed, has a bad instruction, or there was a file read error, this // function is expected to return that error. // // In the event that a non-nil error is returned, it is assumed that the // unpacked data this function returns is malformed, or otherwise // corrupt. Unpack() ([]byte, error) // Type returns the type of the receiving chain element. Type() PackedObjectType } ================================================ FILE: modules/git/gitobj/pack/chain_base.go ================================================ package pack import ( "compress/zlib" "io" ) // ChainBase represents the "base" component of a delta-base chain. type ChainBase struct { // offset returns the offset into the given io.ReaderAt where the read // will begin. offset int64 // size is the total uncompressed size of the data in the base chain. size int64 // typ is the type of data that this *ChainBase encodes. typ PackedObjectType // r is the io.ReaderAt yielding a stream of zlib-compressed data. r io.ReaderAt } // Unpack inflates and returns the uncompressed data encoded in the base // element. // // If there was any error in reading the compressed data (invalid headers, // etc.), it will be returned immediately. func (b *ChainBase) Unpack() ([]byte, error) { zr, err := zlib.NewReader(&OffsetReaderAt{ r: b.r, o: b.offset, }) if err != nil { return nil, err } defer zr.Close() // nolint buf := make([]byte, b.size) if _, err := io.ReadFull(zr, buf); err != nil { return nil, err } return buf, nil } // ChainBase returns the type of the object it encodes. func (b *ChainBase) Type() PackedObjectType { return b.typ } ================================================ FILE: modules/git/gitobj/pack/chain_base_test.go ================================================ package pack import ( "bytes" "compress/zlib" "testing" ) func TestChainBaseDecompressesData(t *testing.T) { const contents = "Hello, world!\n" compressed, err := compress(contents) if err != nil { t.Errorf("Expected nil, got %v", err) } var buf bytes.Buffer _, err = buf.Write([]byte{0x0, 0x0, 0x0, 0x0}) if err != nil { t.Errorf("Expected nil, got %v", err) } _, err = buf.Write(compressed) if err != nil { t.Errorf("Expected nil, got %v", err) } _, err = buf.Write([]byte{0x0, 0x0, 0x0, 0x0}) if err != nil { t.Errorf("Expected nil, got %v", err) } base := &ChainBase{ offset: 4, size: int64(len(contents)), r: bytes.NewReader(buf.Bytes()), } unpacked, err := base.Unpack() if err != nil { t.Errorf("Expected nil, got %v", err) } if contents != string(unpacked) { t.Errorf("Expected %v, got %v", contents, string(unpacked)) } } func TestChainBaseTypeReturnsType(t *testing.T) { b := &ChainBase{ typ: TypeCommit, } if TypeCommit != b.Type() { t.Errorf("Expected %v, got %v", TypeCommit, b.Type()) } } func compress(base string) ([]byte, error) { var buf bytes.Buffer zw := zlib.NewWriter(&buf) if _, err := zw.Write([]byte(base)); err != nil { return nil, err } if err := zw.Close(); err != nil { return nil, err } return buf.Bytes(), nil } ================================================ FILE: modules/git/gitobj/pack/chain_delta.go ================================================ package pack import ( "errors" "fmt" ) // ChainDelta represents a "delta" component of a delta-base chain. type ChainDelta struct { // Base is the base delta-base chain that this delta should be applied // to. It can be a ChainBase in the simple case, or it can itself be a // ChainDelta, which resolves against another ChainBase, when the // delta-base chain is of length greater than 2. base Chain // delta is the set of copy/add instructions to apply on top of the // base. delta []byte } // Unpack applies the delta operation to the previous delta-base chain, "base". // // If any of the delta-base instructions were invalid, an error will be // returned. func (d *ChainDelta) Unpack() ([]byte, error) { base, err := d.base.Unpack() if err != nil { return nil, err } return patch(base, d.delta) } // Type returns the type of the base of the delta-base chain. func (d *ChainDelta) Type() PackedObjectType { return d.base.Type() } // patch applies the delta instructions in "delta" to the base given as "base". // It returns the result of applying those patch instructions to base, but does // not modify base itself. // // If any of the delta instructions were malformed, or otherwise could not be // applied to the given base, an error will returned, along with an empty set of // data. func patch(base, delta []byte) ([]byte, error) { srcSize, pos := patchDeltaHeader(delta, 0) if srcSize != int64(len(base)) { // The header of the delta gives the size of the source contents // that it is a patch over. // // If this does not match with the srcSize, return an error // early so as to avoid a possible bounds error below. return nil, errors.New("git/object/pack: invalid delta data") } // The remainder of the delta header contains the destination size, and // moves the "pos" offset to the correct position to begin the set of // delta instructions. destSize, pos := patchDeltaHeader(delta, pos) dest := make([]byte, 0, destSize) for pos < len(delta) { c := int(delta[pos]) pos += 1 if c&0x80 != 0 { // If the most significant bit (MSB, at position 0x80) // is set, this is a copy instruction. Advance the // position one byte backwards, and initialize variables // for the copy offset and size instructions. pos -= 1 var co, cs int // The lower-half of "c" (0000 1111) defines a "bitmask" // for the copy offset. if c&0x1 != 0 { pos += 1 co = int(delta[pos]) } if c&0x2 != 0 { pos += 1 co |= (int(delta[pos]) << 8) } if c&0x4 != 0 { pos += 1 co |= (int(delta[pos]) << 16) } if c&0x8 != 0 { pos += 1 co |= (int(delta[pos]) << 24) } // The upper-half of "c" (1111 0000) defines a "bitmask" // for the size of the copy instruction. if c&0x10 != 0 { pos += 1 cs = int(delta[pos]) } if c&0x20 != 0 { pos += 1 cs |= (int(delta[pos]) << 8) } if c&0x40 != 0 { pos += 1 cs |= (int(delta[pos]) << 16) } if cs == 0 { // If the copy size is zero, we assume that it // is the next whole number after the max uint32 // value. cs = 0x10000 } pos += 1 // Once we have the copy offset and length defined, copy // that number of bytes from the base into the // destination. Since we are copying from the base and // not the delta, the position into the delta ("pos") // need not be updated. dest = append(dest, base[co:co+cs]...) } else if c != 0 { // If the most significant bit (MSB) is _not_ set, we // instead process a copy instruction, where "c" is the // number of successive bytes in the delta patch to add // to the output. // // Copy the bytes and increment the read pointer // forward. dest = append(dest, delta[pos:pos+c]...) pos += c } else { // Otherwise, "c" is 0, and is an invalid delta // instruction. // // Return immediately. return nil, fmt.Errorf( "git/object/pack:: invalid delta data") } } if destSize != int64(len(dest)) { // If after patching the delta against the base, the destination // size is different than the expected destination size, we have // an invalid set of patch instructions. // // Return immediately. return nil, errors.New("git/object/pack: invalid delta data") } return dest, nil } // patchDeltaHeader examines the header within delta at the given offset, and // returns the size encoded within it, as well as the ending offset where begins // the next header, or the patch instructions. func patchDeltaHeader(delta []byte, pos int) (size int64, end int) { var shift uint var c int64 for shift == 0 || c&0x80 != 0 { if len(delta) <= pos { //panic("git/object/pack:: invalid delta header") return } c = int64(delta[pos]) pos++ size |= (c & 0x7f) << shift shift += 7 } return size, pos } ================================================ FILE: modules/git/gitobj/pack/chain_delta_test.go ================================================ package pack import ( "bytes" "testing" ) func TestChainDeltaUnpackCopiesFromBase(t *testing.T) { c := &ChainDelta{ base: &ChainSimple{ X: []byte{0x0, 0x1, 0x2, 0x3}, }, delta: []byte{ 0x04, // Source size: 4. 0x03, // Destination size: 3. 0x80 | 0x01 | 0x10, // Copy, omask=0001, smask=0001. 0x1, // Offset: 1. 0x3, // Size: 3. }, } data, err := c.Unpack() if err != nil { t.Errorf("Expected nil, got %v", err) } expected := []byte{0x1, 0x2, 0x3} if !bytes.Equal(expected, data) { t.Errorf("Expected %v, got %v", expected, data) } } func TestChainDeltaUnpackAddsToBase(t *testing.T) { c := &ChainDelta{ base: &ChainSimple{ X: make([]byte, 0), }, delta: []byte{ 0x0, // Source size: 0. 0x3, // Destination size: 3. 0x3, // Add, size=3. 0x1, 0x2, 0x3, // Contents: ... }, } data, err := c.Unpack() if err != nil { t.Errorf("Expected nil, got %v", err) } expected := []byte{0x1, 0x2, 0x3} if !bytes.Equal(expected, data) { t.Errorf("Expected %v, got %v", expected, data) } } func TestChainDeltaWithMultipleInstructions(t *testing.T) { c := &ChainDelta{ base: &ChainSimple{ X: []byte{'H', 'e', 'l', 'l', 'o', '!', '\n'}, }, delta: []byte{ 0x07, // Source size: 7. 0x0e, // Destination size: 14. 0x80 | 0x01 | 0x10, // Copy, omask=0001, smask=0001. 0x0, // Offset: 1. 0x5, // Size: 5. 0x7, // Add, size=7. ',', ' ', 'w', 'o', 'r', 'l', 'd', // Contents: ... 0x80 | 0x01 | 0x10, // Copy, omask=0001, smask=0001. 0x05, // Offset: 5. 0x02, // Size: 2. }, } data, err := c.Unpack() if err != nil { t.Errorf("Expected nil, got %v", err) } expected := []byte("Hello, world!\n") if !bytes.Equal(expected, data) { t.Errorf("Expected %v, got %v", expected, data) } } func TestChainDeltaWithInvalidDeltaInstruction(t *testing.T) { c := &ChainDelta{ base: &ChainSimple{ X: make([]byte, 0), }, delta: []byte{ 0x0, // Source size: 0. 0x1, // Destination size: 3. 0x0, // Invalid instruction. }, } data, err := c.Unpack() if err == nil || (err.Error() != "git/object/pack:: invalid delta data" && err.Error() != "git/object/pack: invalid delta data") { t.Errorf("Expected 'git/object/pack:: invalid delta data' or 'git/object/pack: invalid delta data', got %v", err) } if data != nil { t.Errorf("Expected nil, got %v", data) } } func TestChainDeltaWithExtraInstructions(t *testing.T) { c := &ChainDelta{ base: &ChainSimple{ X: make([]byte, 0), }, delta: []byte{ 0x0, // Source size: 0. 0x3, // Destination size: 3. 0x4, // Add, size=4 (invalid). 0x1, 0x2, 0x3, 0x4, // Contents: ... }, } data, err := c.Unpack() errMsg := "" if err != nil { errMsg = err.Error() } if errMsg != "git/object/pack:: invalid delta data" && errMsg != "git/object/pack: invalid delta data" { t.Errorf("Expected 'git/object/pack:: invalid delta data' or 'git/object/pack: invalid delta data', got %v", err) } if data != nil { t.Errorf("Expected nil, got %v", data) } } ================================================ FILE: modules/git/gitobj/pack/chain_test.go ================================================ package pack type ChainSimple struct { X []byte Err error } func (c *ChainSimple) Unpack() ([]byte, error) { return c.X, c.Err } func (c *ChainSimple) Type() PackedObjectType { return TypeNone } ================================================ FILE: modules/git/gitobj/pack/delayed_object.go ================================================ package pack import ( "bytes" "compress/zlib" "fmt" "io" "strings" ) // delayedObjectReader provides an interface for reading from an Object while // loading object data into memory only on demand. It implements io.ReadCloser. type delayedObjectReader struct { obj *Object mr io.Reader closeFn func() error } func (d *delayedObjectReader) makeReader() (err error) { if b, ok := d.obj.data.(*ChainBase); ok { zr, err := zlib.NewReader(&OffsetReaderAt{ r: b.r, o: b.offset, }) if err != nil { return err } d.mr = io.MultiReader( // Git object header: strings.NewReader(fmt.Sprintf("%s %d\x00", b.typ.String(), b.size, )), // Git object (uncompressed) contents: io.LimitReader(zr, b.size), ) d.closeFn = func() error { return zr.Close() } return nil } data, err := d.obj.Unpack() if err != nil { return err } d.mr = io.MultiReader( // Git object header: strings.NewReader(fmt.Sprintf("%s %d\x00", d.obj.Type(), len(data), )), // Git object (uncompressed) contents: bytes.NewReader(data), ) return } // Read implements the io.Reader method by instantiating a new underlying reader // only on demand. func (d *delayedObjectReader) Read(b []byte) (int, error) { if d.mr == nil { if err := d.makeReader(); err != nil { return 0, err } } return d.mr.Read(b) } // Close implements the io.Closer interface. func (d *delayedObjectReader) Close() error { if d.closeFn != nil { return d.closeFn() } return nil } ================================================ FILE: modules/git/gitobj/pack/errors.go ================================================ package pack import "fmt" // UnsupportedVersionErr is a type implementing 'error' which indicates a // the presence of an unsupported packfile version. type UnsupportedVersionErr struct { // Got is the unsupported version that was detected. Got uint32 } // Error implements 'error.Error()'. func (u *UnsupportedVersionErr) Error() string { return fmt.Sprintf("git/object/pack:: unsupported version: %d", u.Got) } ================================================ FILE: modules/git/gitobj/pack/errors_test.go ================================================ package pack import ( "testing" ) func TestUnsupportedVersionErr(t *testing.T) { u := &UnsupportedVersionErr{Got: 3} if u.Error() != "git/object/pack:: unsupported version: 3" { t.Errorf("Expected 'git/object/pack:: unsupported version: 3', got %v", u.Error()) } } ================================================ FILE: modules/git/gitobj/pack/index.go ================================================ package pack import ( "bytes" "crypto/sha256" "errors" "io" ) const MaxHashSize = sha256.Size // Index stores information about the location of objects in a corresponding // packfile. type Index struct { // version is the encoding version used by this index. // // Currently, versions 1 and 2 are supported. version IndexVersion // fanout is the L1 fanout table stored in this index. For a given index // "i" into the array, the value stored at that index specifies the // number of objects in the packfile/index that are lexicographically // less than or equal to that index. // // See: https://github.com/git/git/blob/v2.13.0/Documentation/technical/pack-format.txt#L41-L45 fanout []uint32 // r is the underlying set of encoded data comprising this index file. r io.ReaderAt } // Count returns the number of objects in the packfile. func (i *Index) Count() int { return int(i.fanout[255]) } // Close closes the packfile index if the underlying data stream is closeable. // If so, it returns any error involved in closing. func (i *Index) Close() error { if c, ok := i.r.(io.Closer); ok { return c.Close() } return nil } var ( // errNotFound is an error returned by Index.Entry() (see: below) when // an object cannot be found in the index. errNotFound = errors.New("git/object/pack:: object not found in index") ) // IsNotFound returns whether a given error represents a missing object in the // index. func IsNotFound(err error) bool { return errors.Is(err, errNotFound) } // Entry returns an entry containing the offset of a given SHA1 "name". // // Entry operates in O(log(n))-time in the worst case, where "n" is the number // of objects that begin with the first byte of "name". // // If the entry cannot be found, (nil, ErrNotFound) will be returned. If there // was an error searching for or parsing an entry, it will be returned as (nil, // err). // // Otherwise, (entry, nil) will be returned. func (i *Index) Entry(name []byte) (*IndexEntry, error) { var last *bounds bounds := i.bounds(name) for bounds.Left() < bounds.Right() { if last.Equal(bounds) { // If the bounds are unchanged, that means either that // the object does not exist in the packfile, or the // fanout table is corrupt. // // Either way, we won't be able to find the object. // Return immediately to prevent infinite looping. return nil, errNotFound } last = bounds // Find the midpoint between the upper and lower bounds. mid := bounds.Left() + ((bounds.Right() - bounds.Left()) / 2) got, err := i.version.Name(i, mid) if err != nil { return nil, err } if cmp := bytes.Compare(name, got); cmp == 0 { // If "cmp" is zero, that means the object at that index // "at" had a SHA equal to the one given by name, and we // are done. return i.version.Entry(i, mid) } else if cmp < 0 { // If the comparison is less than 0, we searched past // the desired object, so limit the upper bound of the // search to the midpoint. bounds = bounds.WithRight(mid) } else if cmp > 0 { // Likewise, if the comparison is greater than 0, we // searched below the desired object. Modify the bounds // accordingly. bounds = bounds.WithLeft(mid) } } return nil, errNotFound } // readAt is a convenience method that allow reading into the underlying data // source from other callers within this package. func (i *Index) readAt(p []byte, at int64) (n int, err error) { return i.r.ReadAt(p, at) } // bounds returns the initial bounds for a given name using the fanout table to // limit search results. func (i *Index) bounds(name []byte) *bounds { var left, right int64 if name[0] == 0 { // If the lower bound is 0, there are no objects before it, // start at the beginning of the index file. left = 0 } else { // Otherwise, make the lower bound the slot before the given // object. left = int64(i.fanout[name[0]-1]) } if name[0] == 255 { // As above, if the upper bound is the max byte value, make the // upper bound the last object in the list. right = int64(i.Count()) } else { // Otherwise, make the upper bound the first object which is not // within the given slot. right = int64(i.fanout[name[0]+1]) } return newBounds(left, right) } ================================================ FILE: modules/git/gitobj/pack/index_decode.go ================================================ package pack import ( "bytes" "encoding/binary" "errors" "hash" "io" ) const ( // indexMagicWidth is the width of the magic header of packfiles version // 2 and newer. indexMagicWidth = 4 // indexVersionWidth is the width of the version following the magic // header. indexVersionWidth = 4 // indexV2Width is the total width of the header in V2. indexV2Width = indexMagicWidth + indexVersionWidth // indexV1Width is the total width of the header in V1. indexV1Width = 0 // indexFanoutEntries is the number of entries in the fanout table. indexFanoutEntries = 256 // indexFanoutEntryWidth is the width of each entry in the fanout table. indexFanoutEntryWidth = 4 // indexFanoutWidth is the width of the entire fanout table. indexFanoutWidth = indexFanoutEntries * indexFanoutEntryWidth // indexOffsetV1Start is the location of the first object outside of the // V1 header. indexOffsetV1Start = indexV1Width + indexFanoutWidth // indexOffsetV2Start is the location of the first object outside of the // V2 header. indexOffsetV2Start = indexV2Width + indexFanoutWidth // indexObjectCRCWidth is the width of the CRC accompanying each object // in V2. indexObjectCRCWidth = 4 // indexObjectSmallOffsetWidth is the width of the small offset encoded // into each object. indexObjectSmallOffsetWidth = 4 // indexObjectLargeOffsetWidth is the width of the optional large offset // encoded into the small offset. indexObjectLargeOffsetWidth = 8 ) var ( // ErrShortFanout is an error representing situations where the entire // fanout table could not be read, and is thus too short. ErrShortFanout = errors.New("git/object/pack: too short fanout table") // indexHeader is the first four "magic" bytes of index files version 2 // or newer. indexHeader = []byte{0xff, 0x74, 0x4f, 0x63} ) // DecodeIndex decodes an index whose underlying data is supplied by "r". // // DecodeIndex reads only the header and fanout table, and does not eagerly // parse index entries. // // If there was an error parsing, it will be returned immediately. func DecodeIndex(r io.ReaderAt, hash hash.Hash) (*Index, error) { version, err := decodeIndexHeader(r, hash) if err != nil { return nil, err } fanout, err := decodeIndexFanout(r, version.Width()) if err != nil { return nil, err } return &Index{ version: version, fanout: fanout, r: r, }, nil } // decodeIndexHeader determines which version the index given by "r" is. func decodeIndexHeader(r io.ReaderAt, hash hash.Hash) (IndexVersion, error) { hdr := make([]byte, 4) if _, err := r.ReadAt(hdr, 0); err != nil { return nil, err } if bytes.Equal(hdr, indexHeader) { vb := make([]byte, 4) if _, err := r.ReadAt(vb, 4); err != nil { return nil, err } version := binary.BigEndian.Uint32(vb) switch version { case 1: return &V1{hash: hash}, nil case 2: return &V2{hash: hash}, nil } return nil, &UnsupportedVersionErr{version} } return &V1{hash: hash}, nil } // decodeIndexFanout decodes the fanout table given by "r" and beginning at the // given offset. func decodeIndexFanout(r io.ReaderAt, offset int64) ([]uint32, error) { b := make([]byte, 256*4) if _, err := r.ReadAt(b, offset); err != nil { if err == io.EOF { return nil, ErrShortFanout } return nil, err } fanout := make([]uint32, 256) for i := range fanout { fanout[i] = binary.BigEndian.Uint32(b[(i * 4):]) } return fanout, nil } ================================================ FILE: modules/git/gitobj/pack/index_decode_test.go ================================================ package pack import ( "bytes" "crypto/sha1" "encoding/binary" "errors" "io" "testing" ) func TestDecodeIndexV2(t *testing.T) { buf := make([]byte, 0, indexV2Width+indexFanoutWidth) buf = append(buf, 0xff, 0x74, 0x4f, 0x63) buf = append(buf, 0x0, 0x0, 0x0, 0x2) for range indexFanoutEntries { x := make([]byte, 4) binary.BigEndian.PutUint32(x, uint32(3)) buf = append(buf, x...) } idx, err := DecodeIndex(bytes.NewReader(buf), sha1.New()) if err != nil { t.Errorf("Expected nil, got %v", err) } if idx.Count() != 3 { t.Errorf("Expected %v, got %v", 3, idx.Count()) } } func TestDecodeIndexV2InvalidFanout(t *testing.T) { buf := make([]byte, 0, indexV2Width+indexFanoutWidth-indexFanoutEntryWidth) buf = append(buf, 0xff, 0x74, 0x4f, 0x63) buf = append(buf, 0x0, 0x0, 0x0, 0x2) buf = append(buf, make([]byte, indexFanoutWidth-1)...) idx, err := DecodeIndex(bytes.NewReader(buf), sha1.New()) if !errors.Is(err, ErrShortFanout) { t.Errorf("Expected %v, got %v", ErrShortFanout, err) } if idx != nil { t.Errorf("Expected nil, got %v", idx) } } func TestDecodeIndexV1(t *testing.T) { idx, err := DecodeIndex(bytes.NewReader(make([]byte, indexFanoutWidth)), sha1.New()) if err != nil { t.Errorf("Expected nil, got %v", err) } if idx.Count() != 0 { t.Errorf("Expected %v, got %v", 0, idx.Count()) } } func TestDecodeIndexV1InvalidFanout(t *testing.T) { idx, err := DecodeIndex(bytes.NewReader(make([]byte, indexFanoutWidth-1)), sha1.New()) if !errors.Is(err, ErrShortFanout) { t.Errorf("Expected %v, got %v", ErrShortFanout, err) } if idx != nil { t.Errorf("Expected nil, got %v", idx) } } func TestDecodeIndexUnsupportedVersion(t *testing.T) { buf := make([]byte, 0, 4+4) buf = append(buf, 0xff, 0x74, 0x4f, 0x63) buf = append(buf, 0x0, 0x0, 0x0, 0x3) idx, err := DecodeIndex(bytes.NewReader(buf), sha1.New()) if err == nil { t.Fatalf("Expected error, got nil") } if err.Error() != "git/object/pack:: unsupported version: 3" { t.Errorf("Expected error message %v, got %v", "git/object/pack:: unsupported version: 3", err.Error()) } if idx != nil { t.Errorf("Expected nil, got %v", idx) } } func TestDecodeIndexEmptyContents(t *testing.T) { idx, err := DecodeIndex(bytes.NewReader(make([]byte, 0)), sha1.New()) if !errors.Is(err, io.EOF) { t.Errorf("Expected %v, got %v", io.EOF, err) } if idx != nil { t.Errorf("Expected nil, got %v", idx) } } ================================================ FILE: modules/git/gitobj/pack/index_entry.go ================================================ package pack // IndexEntry specifies data encoded into an entry in the pack index. type IndexEntry struct { // PackOffset is the number of bytes before the associated object in a // packfile. PackOffset uint64 } ================================================ FILE: modules/git/gitobj/pack/index_test.go ================================================ package pack import ( "bytes" "crypto/sha1" "encoding/binary" "errors" "testing" ) var ( idx *Index ) func TestIndexEntrySearch(t *testing.T) { e, err := idx.Entry([]byte{ 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, }) if err != nil { t.Errorf("Expected nil, got %v", err) } if e.PackOffset != 6 { t.Errorf("Expected %v, got %v", 6, e.PackOffset) } } func TestIndexEntrySearchClampLeft(t *testing.T) { e, err := idx.Entry([]byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, }) if err != nil { t.Errorf("Expected nil, got %v", err) } if e.PackOffset != 0 { t.Errorf("Expected %v, got %v", 0, e.PackOffset) } } func TestIndexEntrySearchClampRight(t *testing.T) { e, err := idx.Entry([]byte{ 0xff, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, }) if err != nil { t.Errorf("Expected nil, got %v", err) } if e.PackOffset != 0x4ff { t.Errorf("Expected %v, got %v", 0x4ff, e.PackOffset) } } func TestIndexSearchOutOfBounds(t *testing.T) { e, err := idx.Entry([]byte{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }) if !IsNotFound(err) { t.Errorf("Expected true") } t.Log("expected err to be 'not found'") if e != nil { t.Errorf("Expected nil, got %v", e) } } func TestIndexEntryNotFound(t *testing.T) { e, err := idx.Entry([]byte{ 0x1, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, }) if !IsNotFound(err) { t.Errorf("Expected true") } t.Log("expected err to be 'not found'") if e != nil { t.Errorf("Expected nil, got %v", e) } } func TestIndexCount(t *testing.T) { fanout := make([]uint32, 256) for i := range fanout { fanout[i] = uint32(i) } idx := &Index{fanout: fanout} if idx.Count() != 255 { t.Errorf("Expected %v, got %v", 255, idx.Count()) } } func TestIndexIsNotFound(t *testing.T) { if !IsNotFound(errNotFound) { t.Errorf("Expected true") } t.Log("expected 'errNotFound' to satisfy 'IsNotFound()'") } func TestIndexIsNotFoundForOtherErrors(t *testing.T) { if IsNotFound(errors.New("git/object/pack: misc")) { t.Errorf("Expected false") } t.Log("expected 'err' not to satisfy 'IsNotFound()'") } // init generates some fixture data and then constructs an *Index instance using // it. func init() { // eps is the number of SHA1 names generated under each 0x slot. const eps = 5 hdr := []byte{ 0xff, 0x74, 0x4f, 0x63, // Index file v2+ magic header 0x00, 0x00, 0x00, 0x02, // 4-byte version indicator } // Create a fanout table using uint32s (later marshalled using // binary.BigEndian). // // Since we have an even distribution of SHA1s in the generated index, // each entry will increase by the number of entries per slot (see: eps // above). fanout := make([]uint32, indexFanoutEntries) for i := range fanout { // Begin the index at (i+1), since the fanout table mandates // objects less than the value at index "i". fanout[i] = uint32((i + 1) * eps) } offs := make([]uint32, 0, 256*eps) crcs := make([]uint32, 0, 256*eps) names := make([][]byte, 0, 256*eps) for i := range 256 { // For each name, generate a unique SHA using the prefix "i", // and then suffix "j". // // In other words, when i=1, we will generate: // []byte{0x1 0x0 0x0 0x0 ...} // []byte{0x1 0x1 0x1 0x1 ...} // []byte{0x1 0x2 0x2 0x2 ...} // // and etc. for j := range eps { var sha [20]byte sha[0] = byte(i) for r := 1; r < len(sha); r++ { sha[r] = byte(j) } cpy := make([]byte, len(sha)) copy(cpy, sha[:]) names = append(names, cpy) offs = append(offs, uint32((i*eps)+j)) crcs = append(crcs, 0) } } // Create a buffer to hold the index contents: buf := bytes.NewBuffer(hdr) // Write each value in the fanout table using a 32bit network byte-order // integer. for _, f := range fanout { _ = binary.Write(buf, binary.BigEndian, f) } // Write each SHA1 name to the table next. for _, name := range names { buf.Write(name) } // Then write each of the CRC values in network byte-order as a 32bit // unsigned integer. for _, crc := range crcs { _ = binary.Write(buf, binary.BigEndian, crc) } // Do the same with the offsets. for _, off := range offs { _ = binary.Write(buf, binary.BigEndian, off) } idx = &Index{ fanout: fanout, // version is unimportant here, use V2 since it's more common in // the wild. version: &V2{hash: sha1.New()}, // *bytes.Buffer does not implement io.ReaderAt, but // *bytes.Reader does. // // Call (*bytes.Buffer).Bytes() to get the data, and then // construct a new *bytes.Reader with it to implement // io.ReaderAt. r: bytes.NewReader(buf.Bytes()), } } ================================================ FILE: modules/git/gitobj/pack/index_v1.go ================================================ package pack import ( "encoding/binary" "hash" ) // V1 implements IndexVersion for v1 packfiles. type V1 struct { hash hash.Hash } // Name implements IndexVersion.Name by returning the 20 byte SHA-1 object name // for the given entry at offset "at" in the v1 index file "idx". func (v *V1) Name(idx *Index, at int64) ([]byte, error) { var sha [MaxHashSize]byte hashlen := v.hash.Size() if _, err := idx.readAt(sha[:hashlen], v1ShaOffset(at, int64(hashlen))); err != nil { return nil, err } return sha[:hashlen], nil } // Entry implements IndexVersion.Entry for v1 packfiles by parsing and returning // the IndexEntry specified at the offset "at" in the given index file. func (v *V1) Entry(idx *Index, at int64) (*IndexEntry, error) { var offs [4]byte if _, err := idx.readAt(offs[:], v1EntryOffset(at, int64(v.hash.Size()))); err != nil { return nil, err } return &IndexEntry{ PackOffset: uint64(binary.BigEndian.Uint32(offs[:])), }, nil } // Width implements IndexVersion.Width() by returning the number of bytes that // v1 packfile index header occupy. func (v *V1) Width() int64 { return indexV1Width } // v1ShaOffset returns the location of the SHA1 of an object given at "at". func v1ShaOffset(at int64, hashlen int64) int64 { // Skip forward until the desired entry. return v1EntryOffset(at, hashlen) + // Skip past the 4-byte object offset in the desired entry to // the SHA1. indexObjectSmallOffsetWidth } // v1EntryOffset returns the location of the packfile offset for the object // given at "at". func v1EntryOffset(at int64, hashlen int64) int64 { // Skip the L1 fanout table return indexOffsetV1Start + // Skip the object entries before the one located at "at" ((hashlen + indexObjectSmallOffsetWidth) * at) } ================================================ FILE: modules/git/gitobj/pack/index_v1_test.go ================================================ package pack import ( "bytes" "crypto/sha1" "crypto/sha256" "encoding/binary" "hash" "testing" ) var ( V1IndexFanout = make([]uint32, indexFanoutEntries) ) func TestIndexV1SearchExact(t *testing.T) { for _, algo := range []hash.Hash{sha1.New(), sha256.New()} { index := newV1Index(algo) v := &V1{hash: algo} e, err := v.Entry(index, 1) if err != nil { t.Errorf("Expected nil, got %v", err) } if e.PackOffset != 2 { t.Errorf("Expected %v, got %v", 2, e.PackOffset) } } } func TestIndexVersionWidthV1(t *testing.T) { for _, algo := range []hash.Hash{sha1.New(), sha256.New()} { v := &V1{hash: algo} if v.Width() != 0 { t.Errorf("Expected %v, got %v", 0, v.Width()) } } } func newV1Index(hash hash.Hash) *Index { V1IndexFanout[1] = 1 V1IndexFanout[2] = 2 V1IndexFanout[3] = 3 for i := 3; i < len(V1IndexFanout); i++ { V1IndexFanout[i] = 3 } fanout := make([]byte, indexFanoutWidth) for i, n := range V1IndexFanout { binary.BigEndian.PutUint32(fanout[i*indexFanoutEntryWidth:], n) } hashlen := hash.Size() entrylen := hashlen + indexObjectCRCWidth entries := make([]byte, entrylen*3) for i := range 3 { // For each entry, set the first three bytes to 0 and the // remainder to the same value. That creates an initial 4-byte // CRC field with the value of i+1, followed by a series of data // bytes which all have that same value. for j := entrylen*i + 3; j < entrylen*(i+1); j++ { entries[j] = byte(i + 1) } } buf := make([]byte, 0, indexOffsetV1Start) buf = append(buf, fanout...) buf = append(buf, entries...) return &Index{ fanout: V1IndexFanout, version: &V1{hash: hash}, r: bytes.NewReader(buf), } } ================================================ FILE: modules/git/gitobj/pack/index_v2.go ================================================ package pack import ( "encoding/binary" "hash" ) // V2 implements IndexVersion for v2 packfiles. type V2 struct { hash hash.Hash } // Name implements IndexVersion.Name by returning the 20 byte SHA-1 object name // for the given entry at offset "at" in the v2 index file "idx". func (v *V2) Name(idx *Index, at int64) ([]byte, error) { var sha [MaxHashSize]byte hashlen := v.hash.Size() if _, err := idx.readAt(sha[:hashlen], v2ShaOffset(at, int64(hashlen))); err != nil { return nil, err } return sha[:hashlen], nil } // Entry implements IndexVersion.Entry for v2 packfiles by parsing and returning // the IndexEntry specified at the offset "at" in the given index file. func (v *V2) Entry(idx *Index, at int64) (*IndexEntry, error) { var offs [4]byte hashlen := v.hash.Size() if _, err := idx.readAt(offs[:], v2SmallOffsetOffset(at, int64(idx.Count()), int64(hashlen))); err != nil { return nil, err } loc := uint64(binary.BigEndian.Uint32(offs[:])) if loc&0x80000000 > 0 { // If the most significant bit (MSB) of the offset is set, then // the offset encodes the indexed location for an 8-byte offset. // // Mask away (offs&0x7fffffff) the MSB to use as an index to // find the offset of the 8-byte pack offset. lo := v2LargeOffsetOffset(int64(loc&0x7fffffff), int64(idx.Count()), int64(hashlen)) var offs [8]byte if _, err := idx.readAt(offs[:], lo); err != nil { return nil, err } loc = binary.BigEndian.Uint64(offs[:]) } return &IndexEntry{PackOffset: loc}, nil } // Width implements IndexVersion.Width() by returning the number of bytes that // v2 packfile index header occupy. func (v *V2) Width() int64 { return indexV2Width } // v2ShaOffset returns the offset of a SHA1 given at "at" in the V2 index file. func v2ShaOffset(at int64, hashlen int64) int64 { // Skip the packfile index header and the L1 fanout table. return indexOffsetV2Start + // Skip until the desired name in the sorted names table. (hashlen * at) } // v2SmallOffsetOffset returns the offset of an object's small (4-byte) offset // given by "at". func v2SmallOffsetOffset(at, total, hashlen int64) int64 { // Skip the packfile index header and the L1 fanout table. return indexOffsetV2Start + // Skip the name table. (hashlen * total) + // Skip the CRC table. (indexObjectCRCWidth * total) + // Skip until the desired index in the small offsets table. (indexObjectSmallOffsetWidth * at) } // v2LargeOffsetOffset returns the offset of an object's large (4-byte) offset, // given by the index "at". func v2LargeOffsetOffset(at, total, hashlen int64) int64 { // Skip the packfile index header and the L1 fanout table. return indexOffsetV2Start + // Skip the name table. (hashlen * total) + // Skip the CRC table. (indexObjectCRCWidth * total) + // Skip the small offsets table. (indexObjectSmallOffsetWidth * total) + // Seek to the large offset within the large offset(s) table. (indexObjectLargeOffsetWidth * at) } ================================================ FILE: modules/git/gitobj/pack/index_v2_test.go ================================================ package pack import ( "bytes" "crypto/sha1" "crypto/sha256" "encoding/binary" "hash" "testing" ) var ( V2IndexHeader = []byte{ 0xff, 0x74, 0x4f, 0x63, 0x00, 0x00, 0x00, 0x02, } V2IndexFanout = make([]uint32, indexFanoutEntries) V2IndexCRCs = []byte{ 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x1, 0x1, 0x2, 0x2, 0x2, 0x2, } V2IndexOffsets = []byte{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, 0x01, // use the second large offset 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // filler data 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // large offset } ) func TestIndexV2EntryExact(t *testing.T) { for _, algo := range []hash.Hash{sha1.New(), sha256.New()} { index := newV2Index(algo) v := &V2{hash: algo} e, err := v.Entry(index, 1) if err != nil { t.Errorf("Expected nil, got %v", err) } if e.PackOffset != 2 { t.Errorf("Expected %v, got %v", 2, e.PackOffset) } } } func TestIndexV2EntryExtendedOffset(t *testing.T) { for _, algo := range []hash.Hash{sha1.New(), sha256.New()} { index := newV2Index(algo) v := &V2{hash: algo} e, err := v.Entry(index, 2) if err != nil { t.Errorf("Expected nil, got %v", err) } if e.PackOffset != 3 { t.Errorf("Expected %v, got %v", 3, e.PackOffset) } } } func TestIndexVersionWidthV2(t *testing.T) { for _, algo := range []hash.Hash{sha1.New(), sha256.New()} { v := &V2{hash: algo} if v.Width() != 8 { t.Errorf("Expected %v, got %v", 8, v.Width()) } } } func newV2Index(hash hash.Hash) *Index { V2IndexFanout[1] = 1 V2IndexFanout[2] = 2 V2IndexFanout[3] = 3 for i := 3; i < len(V2IndexFanout); i++ { V2IndexFanout[i] = 3 } fanout := make([]byte, indexFanoutWidth) for i, n := range V2IndexFanout { binary.BigEndian.PutUint32(fanout[i*indexFanoutEntryWidth:], n) } hashlen := hash.Size() names := make([]byte, hashlen*3) for i := range names { names[i] = byte((i / hashlen) + 1) } buf := make([]byte, 0, indexOffsetV2Start+3) buf = append(buf, V2IndexHeader...) buf = append(buf, fanout...) buf = append(buf, names...) buf = append(buf, V2IndexCRCs...) buf = append(buf, V2IndexOffsets...) return &Index{ fanout: V2IndexFanout, version: &V2{hash: hash}, r: bytes.NewReader(buf), } } ================================================ FILE: modules/git/gitobj/pack/index_version.go ================================================ package pack type IndexVersion interface { // Name returns the name of the object located at the given offset "at", // in the Index file "idx". // // It returns an error if the object at that location could not be // parsed. Name(idx *Index, at int64) ([]byte, error) // Entry parses and returns the full *IndexEntry located at the offset // "at" in the Index file "idx". // // If there was an error parsing the IndexEntry at that location, it // will be returned. Entry(idx *Index, at int64) (*IndexEntry, error) // Width returns the number of bytes occupied by the header of a // particular index version. Width() int64 } ================================================ FILE: modules/git/gitobj/pack/io.go ================================================ package pack import "io" // OffsetReaderAt transforms an io.ReaderAt into an io.Reader by beginning and // advancing all reads at the given offset. type OffsetReaderAt struct { // r is the data source for this instance of *OffsetReaderAt. r io.ReaderAt // o if the number of bytes read from the underlying data source, "r". // It is incremented upon reads. o int64 } // Read implements io.Reader.Read by reading into the given []byte, "p" from the // last known offset provided to the OffsetReaderAt. // // It returns any error encountered from the underlying data stream, and // advances the reader forward by "n", the number of bytes read from the // underlying data stream. func (r *OffsetReaderAt) Read(p []byte) (n int, err error) { n, err = r.r.ReadAt(p, r.o) r.o += int64(n) return n, err } ================================================ FILE: modules/git/gitobj/pack/io_test.go ================================================ package pack import ( "bytes" "errors" "testing" ) func TestOffsetReaderAtReadsAtOffset(t *testing.T) { bo := &OffsetReaderAt{ r: bytes.NewReader([]byte{0x0, 0x1, 0x2, 0x3}), o: 1, } var x1 [1]byte n1, e1 := bo.Read(x1[:]) if e1 != nil { t.Errorf("Expected nil, got %v", e1) } if n1 != 1 { t.Errorf("Expected %v, got %v", 1, n1) } if x1[0] != 0x1 { t.Errorf("Expected %v, got %v", 0x1, x1[0]) } var x2 [1]byte n2, e2 := bo.Read(x2[:]) if e2 != nil { t.Errorf("Expected nil, got %v", e2) } if n2 != 1 { t.Errorf("Expected %v, got %v", 1, n2) } if x2[0] != 0x2 { t.Errorf("Expected %v, got %v", 0x2, x2[0]) } } func TestOffsetReaderPropogatesErrors(t *testing.T) { expected := errors.New("git/object/pack: testing") bo := &OffsetReaderAt{ r: &ErrReaderAt{Err: expected}, o: 1, } n, err := bo.Read(make([]byte, 1)) if !errors.Is(err, expected) { t.Errorf("Expected %v, got %v", expected, err) } if n != 0 { t.Errorf("Expected %v, got %v", 0, n) } } type ErrReaderAt struct { Err error } func (e *ErrReaderAt) ReadAt(p []byte, at int64) (n int, err error) { return 0, e.Err } ================================================ FILE: modules/git/gitobj/pack/object.go ================================================ package pack // Object is an encapsulation of an object found in a packfile, or a packed // object. type Object struct { // data is the front-most element of the delta-base chain, and when // resolved, yields the uncompressed data of this object. data Chain // typ is the underlying object's type. It is not the type of the // front-most chain element, rather, the type of the actual object. typ PackedObjectType } // Unpack resolves the delta-base chain and returns an uncompressed, unpacked, // and full representation of the data encoded by this object. // // If there was any error in unpacking this object, it is returned immediately, // and the object's data can be assumed to be corrupt. func (o *Object) Unpack() ([]byte, error) { return o.data.Unpack() } // Type returns the underlying object's type. Rather than the type of the // front-most delta-base component, it is the type of the object itself. func (o *Object) Type() PackedObjectType { return o.typ } ================================================ FILE: modules/git/gitobj/pack/object_test.go ================================================ package pack import ( "bytes" "errors" "testing" ) func TestObjectTypeReturnsObjectType(t *testing.T) { o := &Object{ typ: TypeCommit, } if TypeCommit != o.Type() { t.Errorf("Expected %v, got %v", TypeCommit, o.Type()) } } func TestObjectUnpackUnpacksData(t *testing.T) { expected := []byte{0x1, 0x2, 0x3, 0x4} o := &Object{ data: &ChainSimple{ X: expected, }, } data, err := o.Unpack() if !bytes.Equal(expected, data) { t.Errorf("Expected %v, got %v", expected, data) } if err != nil { t.Errorf("Expected nil, got %v", err) } } func TestObjectUnpackPropogatesErrors(t *testing.T) { expected := errors.New("git/object/pack: testing") o := &Object{ data: &ChainSimple{ Err: expected, }, } data, err := o.Unpack() if data != nil { t.Errorf("Expected nil, got %v", data) } if !errors.Is(err, expected) { t.Errorf("Expected %v, got %v", expected, err) } } ================================================ FILE: modules/git/gitobj/pack/packfile.go ================================================ package pack import ( "compress/zlib" "fmt" "hash" "io" ) // Packfile encapsulates the behavior of accessing an unpacked representation of // all of the objects encoded in a single packfile. type Packfile struct { // Version is the version of the packfile. Version uint32 // Objects is the total number of objects in the packfile. Objects uint32 // idx is the corresponding "pack-*.idx" file giving the positions of // objects in this packfile. idx *Index // hash is the hash algorithm used in this pack. hash hash.Hash // r is an io.ReaderAt that allows read access to the packfile itself. r io.ReaderAt } // Close closes the packfile if the underlying data stream is closeable. If so, // it returns any error involved in closing. func (p *Packfile) Close() error { var iErr error if p.idx != nil { iErr = p.idx.Close() } if c, ok := p.r.(io.Closer); ok { return c.Close() } return iErr } // Object returns a reference to an object packed in the receiving *Packfile. It // does not attempt to unpack the packfile, rather, that is accomplished by // calling Unpack() on the returned *Object. // // If there was an error loading or buffering the base, it will be returned // without an object. // // If the object given by the SHA-1 name, "name", could not be found, // (nil, errNotFound) will be returned. // // If the object was able to be loaded successfully, it will be returned without // any error. func (p *Packfile) Object(name []byte) (*Object, error) { // First, try and determine the offset of the last entry in the // delta-base chain by loading it from the corresponding pack index. entry, err := p.idx.Entry(name) if err != nil { if !IsNotFound(err) { // If the error was not an errNotFound, re-wrap it with // additional context. err = fmt.Errorf("git/object/pack: could not load index: %w", err) } return nil, err } // If all goes well, then unpack the object at that given offset. r, err := p.find(int64(entry.PackOffset)) if err != nil { return nil, err } return &Object{ data: r, typ: r.Type(), }, nil } // find finds and returns a Chain element corresponding to the offset of its // last element as given by the "offset" argument. // // If find returns a ChainBase, it loads that data into memory, but does not // zlib-flate it. Otherwise, if find returns a ChainDelta, it loads all of the // leading elements in the chain recursively, but does not apply one delta to // another. func (p *Packfile) find(offset int64) (Chain, error) { // Read the first byte in the chain element. buf := make([]byte, 1) if _, err := p.r.ReadAt(buf, offset); err != nil { return nil, err } // Store the original offset; this will be compared to when loading // chain elements of type OBJ_OFS_DELTA. objectOffset := offset // Of the first byte, (0123 4567): // - Bit 0 is the M.S.B., and indicates whether there is more data // encoded in the length. // - Bits 1-3 ((buf[0] >> 4) & 0x7) are the object type. // - Bits 4-7 (buf[0] & 0xf) are the first 4 bits of the variable // length size of the encoded delta or base. typ := PackedObjectType((buf[0] >> 4) & 0x7) size := uint64(buf[0] & 0xf) shift := uint(4) offset += 1 for buf[0]&0x80 != 0 { // If there is more data to be read, read it. if _, err := p.r.ReadAt(buf, offset); err != nil { return nil, err } // And update the size, bitshift, and offset accordingly. size |= (uint64(buf[0]&0x7f) << shift) shift += 7 offset += 1 } switch typ { case TypeObjectOffsetDelta, TypeObjectReferenceDelta: // If the type of delta-base element is a delta, (either // OBJ_OFS_DELTA, or OBJ_REFS_DELTA), we must load the base, // which itself could be either of the two above, or a // OBJ_COMMIT, OBJ_BLOB, etc. // // Recursively load the base, and keep track of the updated // offset. base, offset, err := p.findBase(typ, offset, objectOffset) if err != nil { return nil, err } // Now load the delta to apply to the base, given at the offset // "offset" and for length "size". // // NB: The delta instructions are zlib compressed, so ensure // that we uncompress the instructions first. zr, err := zlib.NewReader(&OffsetReaderAt{ o: offset, r: p.r, }) if err != nil { return nil, err } delta, err := io.ReadAll(zr) if err != nil { return nil, err } // Then compose the two and return it as a *ChainDelta. return &ChainDelta{ base: base, delta: delta, }, nil case TypeCommit, TypeTree, TypeBlob, TypeTag: // Otherwise, the object's contents are given to be the // following zlib-compressed data. // // The length of the compressed data itself is not known, // rather, "size" determines the length of the data after // inflation. return &ChainBase{ offset: offset, size: int64(size), typ: typ, r: p.r, }, nil } // Otherwise, we received an invalid object type. return nil, errUnrecognizedObjectType } // findBase finds the base (an object, or another delta) for a given // OBJ_OFS_DELTA or OBJ_REFS_DELTA at the given offset. // // It returns the preceding Chain, as well as an updated read offset into the // underlying packfile data. // // If any of the above could not be completed successfully, findBase returns an // error. func (p *Packfile) findBase(typ PackedObjectType, offset, objOffset int64) (Chain, int64, error) { var baseOffset int64 hashlen := p.hash.Size() // We assume that we have to read at least an object ID's worth (the // hash length in the case of a OBJ_REF_DELTA, or greater than the // length of the base offset encoded in an OBJ_OFS_DELTA). var sha [MaxHashSize]byte if _, err := p.r.ReadAt(sha[:hashlen], offset); err != nil { return nil, baseOffset, err } switch typ { case TypeObjectOffsetDelta: // If the object is of type OBJ_OFS_DELTA, read a // variable-length integer, and find the object at that // location. i := 0 c := int64(sha[i]) baseOffset = c & 0x7f for c&0x80 != 0 { i += 1 c = int64(sha[i]) baseOffset += 1 baseOffset <<= 7 baseOffset |= c & 0x7f } baseOffset = objOffset - baseOffset offset += int64(i) + 1 case TypeObjectReferenceDelta: // If the delta is an OBJ_REFS_DELTA, find the location of its // base by reading the SHA-1 name and looking it up in the // corresponding pack index file. e, err := p.idx.Entry(sha[:hashlen]) if err != nil { return nil, baseOffset, err } baseOffset = int64(e.PackOffset) offset += int64(hashlen) default: // If we did not receive an OBJ_OFS_DELTA, or OBJ_REF_DELTA, the // type given is not a delta-fied type. Return an error. return nil, baseOffset, fmt.Errorf( "git/object/pack:: type %s is not deltafied", typ) } // Once we have determined the base offset of the object's chain base, // read the delta-base chain beginning at that offset. r, err := p.find(baseOffset) return r, offset, err } ================================================ FILE: modules/git/gitobj/pack/packfile_decode.go ================================================ package pack import ( "bytes" "encoding/binary" "errors" "hash" "io" ) var ( // packHeader is the expected header that begins all valid packfiles. packHeader = []byte{'P', 'A', 'C', 'K'} // errBadPackHeader is a sentinel error value returned when the given // pack header does not match the expected one. errBadPackHeader = errors.New("git/object/pack:: bad pack header") ) // DecodePackfile opens the packfile given by the io.ReaderAt "r" for reading. // It does not apply any delta-base chains, nor does it do reading otherwise // beyond the header. // // If the header is malformed, or otherwise cannot be read, an error will be // returned without a corresponding packfile. func DecodePackfile(r io.ReaderAt, hash hash.Hash) (*Packfile, error) { header := make([]byte, 12) if _, err := r.ReadAt(header, 0); err != nil { return nil, err } if !bytes.HasPrefix(header, packHeader) { return nil, errBadPackHeader } version := binary.BigEndian.Uint32(header[4:]) objects := binary.BigEndian.Uint32(header[8:]) return &Packfile{ Version: version, Objects: objects, r: r, hash: hash, }, nil } ================================================ FILE: modules/git/gitobj/pack/packfile_decode_test.go ================================================ package pack import ( "bytes" "crypto/sha1" "crypto/sha256" "errors" "testing" ) func TestDecodePackfileDecodesIntegerVersion(t *testing.T) { p, err := DecodePackfile(bytes.NewReader([]byte{ 'P', 'A', 'C', 'K', // Pack header. 0x0, 0x0, 0x0, 0x2, // Pack version. 0x0, 0x0, 0x0, 0x0, // Number of packed objects. }), sha1.New()) if err != nil { t.Errorf("Expected nil, got %v", err) } if p.Version != 2 { t.Errorf("Expected %v, got %v", 2, p.Version) } } func TestDecodePackfileDecodesIntegerCount(t *testing.T) { p, err := DecodePackfile(bytes.NewReader([]byte{ 'P', 'A', 'C', 'K', // Pack header. 0x0, 0x0, 0x0, 0x2, // Pack version. 0x0, 0x0, 0x1, 0x2, // Number of packed objects. }), sha256.New()) if err != nil { t.Errorf("Expected nil, got %v", err) } if p.Objects != 258 { t.Errorf("Expected %v, got %v", 258, p.Objects) } } func TestDecodePackfileReportsBadHeaders(t *testing.T) { p, err := DecodePackfile(bytes.NewReader([]byte{ 'W', 'R', 'O', 'N', 'G', // Malformed pack header. 0x0, 0x0, 0x0, 0x0, // Pack version. 0x0, 0x0, 0x0, 0x0, // Number of packed objects. }), sha1.New()) if !errors.Is(err, errBadPackHeader) { t.Errorf("Expected %v, got %v", errBadPackHeader, err) } if p != nil { t.Errorf("Expected nil, got %v", p) } } ================================================ FILE: modules/git/gitobj/pack/packfile_test.go ================================================ package pack import ( "bytes" "crypto/sha1" "encoding/binary" "encoding/hex" "errors" "sort" "strings" "sync/atomic" "testing" ) func TestPackObjectReturnsObjectWithSingleBaseAtLowOffset(t *testing.T) { const original = "Hello, world!\n" compressed, _ := compress(original) p := &Packfile{ idx: IndexWith(map[string]uint32{ "cccccccccccccccccccccccccccccccccccccccc": 32, }), r: bytes.NewReader(append([]byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, // (0001 1000) (msb=0, type=commit, size=14) 0x1e}, compressed...), ), hash: sha1.New(), } o, err := p.Object(DecodeHex(t, "cccccccccccccccccccccccccccccccccccccccc")) if err != nil { t.Errorf("Expected nil, got %v", err) } if TypeCommit != o.Type() { t.Errorf("Expected %v, got %v", TypeCommit, o.Type()) } unpacked, err := o.Unpack() if !bytes.Equal([]byte(original), unpacked) { t.Errorf("Expected %v, got %v", []byte(original), unpacked) } if err != nil { t.Errorf("Expected nil, got %v", err) } } func TestPackObjectReturnsObjectWithSingleBaseAtHighOffset(t *testing.T) { original := strings.Repeat("four", 64) compressed, _ := compress(original) p := &Packfile{ idx: IndexWith(map[string]uint32{ "cccccccccccccccccccccccccccccccccccccccc": 32, }), r: bytes.NewReader(append([]byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, // (1001 0000) (msb=1, type=commit, size=0) 0x90, // (1000 0000) (msb=0, size=1 -> size=256) 0x10}, compressed..., )), hash: sha1.New(), } o, err := p.Object(DecodeHex(t, "cccccccccccccccccccccccccccccccccccccccc")) if err != nil { t.Errorf("Expected nil, got %v", err) } if TypeCommit != o.Type() { t.Errorf("Expected %v, got %v", TypeCommit, o.Type()) } unpacked, err := o.Unpack() if !bytes.Equal([]byte(original), unpacked) { t.Errorf("Expected %v, got %v", []byte(original), unpacked) } if err != nil { t.Errorf("Expected nil, got %v", err) } } func TestPackObjectReturnsObjectWithDeltaBaseOffset(t *testing.T) { const original = "Hello" compressed, _ := compress(original) delta, _ := compress(string([]byte{ 0x05, // Source size: 5. 0x0e, // Destination size: 14. 0x91, // (1000 0001) (instruction=copy, bitmask=0001) 0x00, // (0000 0000) (offset=0) 0x05, // (0000 0101) (size=5) 0x09, // (0000 0111) (instruction=add, size=7) // Contents: ... ',', ' ', 'w', 'o', 'r', 'l', 'd', '!', '\n', })) p := &Packfile{ idx: IndexWith(map[string]uint32{ "cccccccccccccccccccccccccccccccccccccccc": uint32(32 + 1 + len(compressed)), }), r: bytes.NewReader(append(append([]byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x35, // (0011 0101) (msb=0, type=blob, size=5) }, compressed...), append([]byte{ 0x6e, // (0110 1010) (msb=0, type=obj_ofs_delta, size=10) 0x12, // (0001 0001) (ofs_delta=-17, len(compressed)) }, delta...)...)), hash: sha1.New(), } o, err := p.Object(DecodeHex(t, "cccccccccccccccccccccccccccccccccccccccc")) if err != nil { t.Errorf("Expected nil, got %v", err) } if TypeBlob != o.Type() { t.Errorf("Expected %v, got %v", TypeBlob, o.Type()) } unpacked, err := o.Unpack() if !bytes.Equal([]byte(original+", world!\n"), unpacked) { t.Errorf("Expected %v, got %v", []byte(original+", world!\n"), unpacked) } if err != nil { t.Errorf("Expected nil, got %v", err) } } func TestPackfileObjectReturnsObjectWithDeltaBaseReference(t *testing.T) { const original = "Hello!\n" compressed, _ := compress(original) delta, _ := compress(string([]byte{ 0x07, // Source size: 7. 0x0e, // Destination size: 14. 0x91, // (1001 0001) (copy, smask=0001, omask=0001) 0x00, // (0000 0000) (offset=0) 0x05, // (0000 0101) (size=5) 0x7, // (0000 0111) (add, length=6) ',', ' ', 'w', 'o', 'r', 'l', 'd', // (data ...) 0x91, // (1001 0001) (copy, smask=0001, omask=0001) 0x05, // (0000 0101) (offset=5) 0x02, // (0000 0010) (size=2) })) p := &Packfile{ idx: IndexWith(map[string]uint32{ "cccccccccccccccccccccccccccccccccccccccc": 32, "dddddddddddddddddddddddddddddddddddddddd": 52, }), r: bytes.NewReader(append(append([]byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x37, // (0011 0101) (msb=0, type=blob, size=7) }, compressed...), append([]byte{ 0x7f, // (0111 1111) (msb=0, type=obj_ref_delta, size=15) // SHA-1 "cccccccccccccccccccccccccccccccccccccccc", // original blob contents is "Hello!\n" 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, }, delta...)...)), hash: sha1.New(), } o, err := p.Object(DecodeHex(t, "dddddddddddddddddddddddddddddddddddddddd")) if err != nil { t.Errorf("Expected nil, got %v", err) } if TypeBlob != o.Type() { t.Errorf("Expected %v, got %v", TypeBlob, o.Type()) } unpacked, err := o.Unpack() if !bytes.Equal([]byte("Hello, world!\n"), unpacked) { t.Errorf("Expected %v, got %v", []byte("Hello, world!\n"), unpacked) } if err != nil { t.Errorf("Expected nil, got %v", err) } } func TestPackfileClosesReadClosers(t *testing.T) { r := new(ReaderAtCloser) p := &Packfile{ r: r, } if p.Close() != nil { t.Errorf("Expected nil, got %v", p.Close()) } if r.N != 1 { t.Errorf("Expected %v, got %v", 1, r.N) } } func TestPackfileClosePropogatesCloseErrors(t *testing.T) { e := errors.New("git/object/pack: testing") p := &Packfile{ r: &ReaderAtCloser{E: e}, } if !errors.Is(p.Close(), e) { t.Errorf("Expected %v, got %v", e, p.Close()) } } type ReaderAtCloser struct { E error N uint64 } func (r *ReaderAtCloser) ReadAt(p []byte, at int64) (int, error) { return 0, nil } func (r *ReaderAtCloser) Close() error { atomic.AddUint64(&r.N, 1) return r.E } func IndexWith(offsets map[string]uint32) *Index { header := []byte{ 0xff, 0x74, 0x4f, 0x63, 0x00, 0x00, 0x00, 0x02, } ns := make([][]byte, 0, len(offsets)) for name := range offsets { x, _ := hex.DecodeString(name) ns = append(ns, x) } sort.Slice(ns, func(i, j int) bool { return bytes.Compare(ns[i], ns[j]) < 0 }) fanout := make([]uint32, 256) for i := range fanout { var n uint32 for _, name := range ns { if name[0] <= byte(i) { n++ } } fanout[i] = n } crcs := make([]byte, 4*len(offsets)) for i := range ns { binary.BigEndian.PutUint32(crcs[i*4:], 0) } offs := make([]byte, 4*len(offsets)) for i, name := range ns { binary.BigEndian.PutUint32(offs[i*4:], offsets[hex.EncodeToString(name)]) } buf := make([]byte, 0) buf = append(buf, header...) for _, f := range fanout { x := make([]byte, 4) binary.BigEndian.PutUint32(x, f) buf = append(buf, x...) } for _, n := range ns { buf = append(buf, n...) } buf = append(buf, crcs...) buf = append(buf, offs...) return &Index{ fanout: fanout, r: bytes.NewReader(buf), version: &V2{hash: sha1.New()}, } } func DecodeHex(t *testing.T, str string) []byte { b, err := hex.DecodeString(str) if err != nil { t.Fatalf("git/object/pack:: unexpected hex.DecodeString error: %s", err) } return b } ================================================ FILE: modules/git/gitobj/pack/set.go ================================================ package pack import ( "hash" "os" "path/filepath" "regexp" "sort" "strings" "github.com/antgroup/hugescm/modules/git/gitobj/errors" ) // Set allows access of objects stored across a set of packfiles. type Set struct { // m maps the leading byte of a SHA-1 object name to a set of packfiles // that might contain that object, in order of which packfile is most // likely to contain that object. m map[byte][]*Packfile // closeFn is a function that is run by Close(), designated to free // resources held by the *Set, like open packfiles. closeFn func() error } var ( // nameRe is a regular expression that matches the basename of a // filepath that is a packfile. // // It includes one matchgroup, which is the SHA-1 name of the pack. nameRe = regexp.MustCompile(`^(.*)\.pack$`) ) // NewSet creates a new *Set of all packfiles found in a given object database's // root (i.e., "/path/to/repo/.git/objects"). // // It finds all packfiles in the "pack" subdirectory, and instantiates a *Set // containing them. If there was an error parsing the packfiles in that // directory, or the directory was otherwise unable to be observed, NewSet // returns that error. func NewSet(db string, algo hash.Hash) (*Set, error) { pd := filepath.Join(db, "pack") paths, err := filepath.Glob(filepath.Join(escapeGlobPattern(pd), "*.pack")) if err != nil { return nil, err } packs := make([]*Packfile, 0, len(paths)) for _, path := range paths { subMatch := nameRe.FindStringSubmatch(filepath.Base(path)) if len(subMatch) != 2 { continue } name := subMatch[1] ifd, err := os.Open(filepath.Join(pd, name+".idx")) if err != nil { // We have a pack (since it matched the regex), but the // index is missing or unusable. Skip this pack and // continue on with the next one, as Git does. if ifd != nil { // In the unlikely event that we did open a // file, close it, but discard any error in // doing so. _ = ifd.Close() } continue } pfd, err := os.Open(filepath.Join(pd, name+".pack")) if err != nil { _ = ifd.Close() return nil, err } pack, err := DecodePackfile(pfd, algo) if err != nil { _ = ifd.Close() return nil, err } idx, err := DecodeIndex(ifd, algo) if err != nil { _ = pack.Close() return nil, err } pack.idx = idx packs = append(packs, pack) } return NewSetPacks(packs...), nil } // globEscapes uses these escapes because filepath.Glob does not understand // backslash escapes on Windows. var globEscapes = map[string]string{ "*": "[*]", "?": "[?]", "[": "[[]", } func escapeGlobPattern(s string) string { for char, escape := range globEscapes { s = strings.ReplaceAll(s, char, escape) } return s } // NewSetPacks creates a new *Set from the given packfiles. func NewSetPacks(packs ...*Packfile) *Set { m := make(map[byte][]*Packfile) for i := range 256 { n := byte(i) for j := range packs { pack := packs[j] var count uint32 if n == 0 { count = pack.idx.fanout[n] } else { count = pack.idx.fanout[n] - pack.idx.fanout[n-1] } if count > 0 { m[n] = append(m[n], pack) } } sort.Slice(m[n], func(i, j int) bool { ni := m[n][i].idx.fanout[n] nj := m[n][j].idx.fanout[n] return ni > nj }) } return &Set{ m: m, closeFn: func() error { for _, pack := range packs { if err := pack.Close(); err != nil { return err } } return nil }, } } // Close closes all open packfiles, returning an error if one was encountered. func (s *Set) Close() error { if s.closeFn == nil { return nil } return s.closeFn() } // Object opens (but does not unpack, or, apply the delta-base chain) a given // object in the first packfile that matches it. // // Object searches packfiles contained in the set in order of how many objects // they have that begin with the first by of the given SHA-1 "name", in // descending order. // // If the object was unable to be found in any of the packfiles, (nil, // ErrNotFound) will be returned. // // If there was otherwise an error opening the object for reading from any of // the packfiles, it will be returned, and no other packfiles will be searched. // // Otherwise, the object will be returned without error. func (s *Set) Object(name []byte) (*Object, error) { return s.each(name, func(p *Packfile) (*Object, error) { return p.Object(name) }) } // iterFn is a function that takes a given packfile and opens an object from it. type iterFn func(p *Packfile) (o *Object, err error) // each executes the given iterFn "fn" on each Packfile that has any objects // beginning with a prefix of the SHA-1 "name", in order of which packfiles have // the most objects beginning with that prefix. // // If any invocation of "fn" returns a non-nil error, it will either be a) // returned immediately, if the error is not ErrIsNotFound, or b) continued // immediately, if the error is ErrNotFound. // // If no packfiles match the given file, return errors.NoSuchObject, along with // no object. func (s *Set) each(name []byte, fn iterFn) (*Object, error) { var key byte if len(name) > 0 { key = name[0] } for _, pack := range s.m[key] { o, err := fn(pack) if err != nil { if IsNotFound(err) { continue } return nil, err } return o, nil } return nil, errors.NoSuchObject(name) } ================================================ FILE: modules/git/gitobj/pack/set_test.go ================================================ package pack import ( "bytes" "testing" ) func TestSetOpenOpensAPackedObject(t *testing.T) { const sha = "decafdecafdecafdecafdecafdecafdecafdecaf" const data = "Hello, world!\n" compressed, _ := compress(data) set := NewSetPacks(&Packfile{ idx: IndexWith(map[string]uint32{ sha: 0, }), r: bytes.NewReader(append([]byte{0x3e}, compressed...)), }) o, err := set.Object(DecodeHex(t, sha)) if err != nil { t.Errorf("Expected nil, got %v", err) } if TypeBlob != o.Type() { t.Errorf("Expected %v, got %v", TypeBlob, o.Type()) } unpacked, err := o.Unpack() if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte(data), unpacked) { t.Errorf("Expected %v, got %v", []byte(data), unpacked) } } func TestSetOpenOpensPackedObjectsInPackOrder(t *testing.T) { p1 := &Packfile{ Objects: 1, idx: IndexWith(map[string]uint32{ "aa00000000000000000000000000000000000000": 1, }), r: bytes.NewReader(nil), } p2 := &Packfile{ Objects: 2, idx: IndexWith(map[string]uint32{ "aa11111111111111111111111111111111111111": 1, "aa22222222222222222222222222222222222222": 2, }), r: bytes.NewReader(nil), } p3 := &Packfile{ Objects: 3, idx: IndexWith(map[string]uint32{ "aa33333333333333333333333333333333333333": 3, "aa44444444444444444444444444444444444444": 4, "aa55555555555555555555555555555555555555": 5, }), r: bytes.NewReader(nil), } set := NewSetPacks(p1, p2, p3) var visited []*Packfile _, _ = set.each( DecodeHex(t, "aa55555555555555555555555555555555555555"), func(p *Packfile) (*Object, error) { visited = append(visited, p) return nil, errNotFound }, ) if len(visited) != 3 { t.Fatalf("Expected len %v, got %v", 3, len(visited)) } if visited[0].Objects != 3 { t.Errorf("Expected %v, got %v", visited[0].Objects, 3) } if visited[1].Objects != 2 { t.Errorf("Expected %v, got %v", visited[1].Objects, 2) } if visited[2].Objects != 1 { t.Errorf("Expected %v, got %v", visited[2].Objects, 1) } } ================================================ FILE: modules/git/gitobj/pack/storage.go ================================================ package pack import ( "hash" "io" ) // Storage implements the storage.Storage interface. type Storage struct { packs *Set } // NewStorage returns a new storage object based on a pack set. func NewStorage(root string, algo hash.Hash) (*Storage, error) { packs, err := NewSet(root, algo) if err != nil { return nil, err } return &Storage{packs: packs}, nil } // Open implements the storage.Storage.Open interface. func (f *Storage) Open(oid []byte) (r io.ReadCloser, err error) { obj, err := f.packs.Object(oid) if err != nil { return nil, err } return &delayedObjectReader{obj: obj}, nil } // Open implements the storage.Storage.Open interface. func (f *Storage) Close() error { return f.packs.Close() } // IsCompressed returns false, because data returned is already decompressed. func (f *Storage) IsCompressed() bool { return false } ================================================ FILE: modules/git/gitobj/pack/type.go ================================================ package pack import ( "errors" ) // PackedObjectType is a constant type that is defined for all valid object // types that a packed object can represent. type PackedObjectType uint8 const ( // TypeNone is the zero-value for PackedObjectType, and represents the // absence of a type. TypeNone PackedObjectType = iota // TypeCommit is the PackedObjectType for commit objects. TypeCommit // TypeTree is the PackedObjectType for tree objects. TypeTree // TypeBlob is the PackedObjectType for blob objects. TypeBlob // TypeTag is the PackedObjectType for tag objects. TypeTag // TypeObjectOffsetDelta is the type for OBJ_OFS_DELTA-typed objects. TypeObjectOffsetDelta PackedObjectType = 6 // TypeObjectReferenceDelta is the type for OBJ_REF_DELTA-typed objects. TypeObjectReferenceDelta PackedObjectType = 7 ) // String implements fmt.Stringer and returns an encoding of the type valid for // use in the loose object format protocol (see: package 'object' for more). // // If the receiving instance is not defined, String() will panic(). func (t PackedObjectType) String() string { switch t { case TypeNone: return "" case TypeCommit: return "commit" case TypeTree: return "tree" case TypeBlob: return "blob" case TypeTag: return "tag" case TypeObjectOffsetDelta: return "obj_ofs_delta" case TypeObjectReferenceDelta: return "obj_ref_delta" } //panic(fmt.Sprintf("git/object/pack:: unknown object type: %d", t)) return "" } var ( errUnrecognizedObjectType = errors.New("git/object/pack:: unrecognized object type") ) ================================================ FILE: modules/git/gitobj/pack/type_test.go ================================================ package pack import ( "fmt" "testing" ) type PackedObjectStringTestCase struct { T PackedObjectType Expected string Panic bool } func (c *PackedObjectStringTestCase) Assert(t *testing.T) { if c.Panic { defer func() { err := recover() if err == nil { t.Fatalf("git/object/pack:: expected panic()") } if c.Expected != fmt.Sprintf("%s", err) { t.Errorf("Expected %v, got %v", c.Expected, fmt.Sprintf("%s", err)) } }() } if c.Expected != c.T.String() { t.Errorf("Expected %v, got %v", c.Expected, c.T.String()) } } func TestPackedObjectTypeString(t *testing.T) { for desc, c := range map[string]*PackedObjectStringTestCase{ "TypeNone": {T: TypeNone, Expected: ""}, "TypeCommit": {T: TypeCommit, Expected: "commit"}, "TypeTree": {T: TypeTree, Expected: "tree"}, "TypeBlob": {T: TypeBlob, Expected: "blob"}, "TypeTag": {T: TypeTag, Expected: "tag"}, "TypeObjectOffsetDelta": {T: TypeObjectOffsetDelta, Expected: "obj_ofs_delta"}, "TypeObjectReferenceDelta": {T: TypeObjectReferenceDelta, Expected: "obj_ref_delta"}, } { t.Run(desc, c.Assert) } } ================================================ FILE: modules/git/gitobj/storage/backend.go ================================================ package storage // Backend is an encapsulation of a set of read-only and read-write interfaces // for reading and writing objects. type Backend interface { // Storage returns a read source and optionally a write source. // Generally, the write location, if present, should also be a read // location. Storage() (Storage, WritableStorage) } ================================================ FILE: modules/git/gitobj/storage/decompressing_readcloser.go ================================================ package storage import ( "compress/zlib" "io" ) // decompressingReadCloser wraps zlib.NewReader to ensure that both the zlib // reader and its underlying type are closed. type decompressingReadCloser struct { r io.ReadCloser zr io.ReadCloser } // newDecompressingReadCloser creates a new wrapped zlib reader func newDecompressingReadCloser(r io.ReadCloser) (io.ReadCloser, error) { zr, err := zlib.NewReader(r) if err != nil { return nil, err } return &decompressingReadCloser{r: r, zr: zr}, nil } // Read implements io.ReadCloser. func (d *decompressingReadCloser) Read(b []byte) (int, error) { return d.zr.Read(b) } // Close implements io.ReadCloser. func (d *decompressingReadCloser) Close() error { if err := d.zr.Close(); err != nil { return err } return d.r.Close() } ================================================ FILE: modules/git/gitobj/storage/multi_storage.go ================================================ package storage import ( "errors" "io" ge "github.com/antgroup/hugescm/modules/git/gitobj/errors" ) // Storage implements an interface for reading, but not writing, objects in an // object database. type multiStorage struct { storages []Storage } func MultiStorage(args ...Storage) Storage { return &multiStorage{storages: args} } // Open returns a handle on an existing object keyed by the given object // ID. It returns an error if that file does not already exist. func (m *multiStorage) Open(oid []byte) (f io.ReadCloser, err error) { for _, s := range m.storages { f, err := s.Open(oid) if err != nil { if ge.IsNoSuchObject(err) { continue } return nil, err } if s.IsCompressed() { return newDecompressingReadCloser(f) } return f, nil } return nil, ge.NoSuchObject(oid) } // Close closes the filesystem, after which no more operations are // allowed. func (m *multiStorage) Close() error { var errs []error for _, s := range m.storages { if err := s.Close(); err != nil { errs = append(errs, err) } } return errors.Join(errs...) } // Compressed indicates whether data read from this storage source will // be zlib-compressed. func (m *multiStorage) IsCompressed() bool { // To ensure we can read from any Storage type, we automatically // decompress items if they need it. return false } ================================================ FILE: modules/git/gitobj/storage/storage.go ================================================ package storage import "io" // Storage implements an interface for reading, but not writing, objects in an // object database. type Storage interface { // Open returns a handle on an existing object keyed by the given object // ID. It returns an error if that file does not already exist. Open(oid []byte) (f io.ReadCloser, err error) // Close closes the filesystem, after which no more operations are // allowed. Close() error // Compressed indicates whether data read from this storage source will // be zlib-compressed. IsCompressed() bool } // WritableStorage implements an interface for reading and writing objects in // an object database. type WritableStorage interface { Storage // Store copies the data given in "r" to the unique object path given by // "oid". It returns an error if that file already exists (acting as if // the `os.O_EXCL` mode is given in a bitmask to os.Open). Store(oid []byte, r io.Reader) (n int64, err error) } ================================================ FILE: modules/git/gitobj/storer.go ================================================ package gitobj import "io" // storer implements a storage engine for reading, writing, and creating // io.ReadWriters that can store information about loose objects type Storer interface { // Open returns a handle on an existing object keyed by the given SHA. // It returns an error if that file does not already exist. Open(sha []byte) (f io.ReadCloser, err error) // Store copies the data given in "r" to the unique object path given by // "sha". It returns an error if that file already exists (acting as if // the `os.O_EXCL` mode is given in a bitmask to os.Open). Store(sha []byte, r io.Reader) (n int64, err error) } ================================================ FILE: modules/git/gitobj/tag.go ================================================ package gitobj import ( "bufio" "bytes" "encoding/hex" "fmt" "hash" "io" "strings" ) type Tag struct { Object []byte ObjectType ObjectType Name string Tagger string Message string } // https://git-scm.com/docs/signature-format // https://github.blog/changelog/2022-08-23-ssh-commit-verification-now-supported/ func (t *Tag) Extract() (message string, signature string) { if i := strings.Index(t.Message, "-----BEGIN"); i > 0 { return t.Message[:i], t.Message[i:] } return t.Message, "" } func (t *Tag) StrictMessage() string { m, _ := t.Extract() return m } // Decode implements Object.Decode and decodes the uncompressed tag being // read. It returns the number of uncompressed bytes being consumed off of the // stream, which should be strictly equal to the size given. // // If any error was encountered along the way it will be returned, and the // receiving *Tag is considered invalid. func (t *Tag) Decode(hash hash.Hash, r io.Reader, size int64) (int, error) { br := bufio.NewReader(io.LimitReader(r, size)) var ( finishedHeaders bool ) var message strings.Builder for { line, readErr := br.ReadString('\n') if readErr != nil && readErr != io.EOF { return 0, readErr } if finishedHeaders { message.WriteString(line) } else { text := strings.TrimSuffix(line, "\n") if len(text) == 0 { finishedHeaders = true continue } field, value, ok := strings.Cut(text, " ") if !ok { return 0, fmt.Errorf("git/object: invalid tag header: %s", text) } switch field { case "object": sha, err := hex.DecodeString(value) if err != nil { return 0, fmt.Errorf("git/object: unable to decode SHA-1: %w", err) } t.Object = sha case "type": t.ObjectType = ObjectTypeFromString(value) case "tag": t.Name = value case "tagger": t.Tagger = value default: return 0, fmt.Errorf("git/object: unknown tag header: %s", field) } } if readErr == io.EOF { break } } t.Message = message.String() return int(size), nil } // Encode encodes the Tag's contents to the given io.Writer, "w". If there was // any error copying the Tag's contents, that error will be returned. // // Otherwise, the number of bytes written will be returned. func (t *Tag) Encode(w io.Writer) (int, error) { headers := []string{ fmt.Sprintf("object %s", hex.EncodeToString(t.Object)), fmt.Sprintf("type %s", t.ObjectType), fmt.Sprintf("tag %s", t.Name), fmt.Sprintf("tagger %s", t.Tagger), } return fmt.Fprintf(w, "%s\n\n%s", strings.Join(headers, "\n"), t.Message) } // Equal returns whether the receiving and given Tags are equal, or in other // words, whether they are represented by the same SHA-1 when saved to the // object database. func (t *Tag) Equal(other *Tag) bool { if (t == nil) != (other == nil) { return false } if t != nil { return bytes.Equal(t.Object, other.Object) && t.ObjectType == other.ObjectType && t.Name == other.Name && t.Tagger == other.Tagger && t.Message == other.Message } return true } // Type implements Object.ObjectType by returning the correct object type for // Tags, TagObjectType. func (t *Tag) Type() ObjectType { return TagObjectType } ================================================ FILE: modules/git/gitobj/tag_test.go ================================================ package gitobj import ( "bytes" "crypto/sha1" "fmt" "testing" ) func TestTagTypeReturnsCorrectObjectType(t *testing.T) { if TagObjectType != new(Tag).Type() { t.Errorf("Expected %v, got %v", TagObjectType, new(Tag).Type()) } } func TestTagEncode(t *testing.T) { tag := &Tag{ Object: []byte("aaaaaaaaaaaaaaaaaaaa"), ObjectType: CommitObjectType, Name: "v2.4.0", Tagger: "A U Thor ", Message: "The quick brown fox jumps over the lazy dog.", } buf := new(bytes.Buffer) n, err := tag.Encode(buf) if err != nil { t.Errorf("Expected nil, got %v", err) } if int64(buf.Len()) != int64(n) { t.Errorf("Expected %v, got %v", buf.Len(), n) } assertLine(t, buf, "object 6161616161616161616161616161616161616161") assertLine(t, buf, "type commit") assertLine(t, buf, "tag v2.4.0") assertLine(t, buf, "tagger A U Thor ") assertLine(t, buf, "") assertLine(t, buf, "The quick brown fox jumps over the lazy dog.") if buf.Len() != 0 { t.Errorf("Expected 0, got %v", buf.Len()) } } func TestTagDecode(t *testing.T) { from := new(bytes.Buffer) fmt.Fprintf(from, "object 6161616161616161616161616161616161616161\n") fmt.Fprintf(from, "type commit\n") fmt.Fprintf(from, "tag v2.4.0\n") fmt.Fprintf(from, "tagger A U Thor \n") fmt.Fprintf(from, "\n") fmt.Fprintf(from, "The quick brown fox jumps over the lazy dog.\n") flen := from.Len() tag := new(Tag) n, err := tag.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Errorf("Expected nil, got %v", err) } if int64(n) != int64(flen) { t.Errorf("Expected %v, got %v", flen, n) } if !bytes.Equal([]byte("aaaaaaaaaaaaaaaaaaaa"), tag.Object) { t.Errorf("Expected %v, got %v", []byte("aaaaaaaaaaaaaaaaaaaa"), tag.Object) } if CommitObjectType != tag.ObjectType { t.Errorf("Expected %v, got %v", CommitObjectType, tag.ObjectType) } if tag.Name != "v2.4.0" { t.Errorf("Expected %v, got %v", "v2.4.0", tag.Name) } if tag.Tagger != "A U Thor " { t.Errorf("Expected %v, got %v", "A U Thor ", tag.Tagger) } if tag.Message != "The quick brown fox jumps over the lazy dog.\n" { t.Errorf("Expected %v, got %v", "The quick brown fox jumps over the lazy dog.\n", tag.Message) } } ================================================ FILE: modules/git/gitobj/tree.go ================================================ package gitobj import ( "bufio" "bytes" "fmt" "hash" "io" "sort" "strconv" "strings" "github.com/antgroup/hugescm/modules/git/gitobj/pack" ) // We define these here instead of using the system ones because not all // operating systems use the traditional values. For example, zOS uses // different values. const ( sIFMT = int32(0170000) sIFREG = int32(0100000) sIFDIR = int32(0040000) sIFLNK = int32(0120000) sIFGITLINK = int32(0160000) ) // Tree encapsulates a Git tree object. type Tree struct { // Entries is the list of entries held by this tree. Entries []*TreeEntry } // Type implements Object.ObjectType by returning the correct object type for // Trees, TreeObjectType. func (t *Tree) Type() ObjectType { return TreeObjectType } // Decode implements Object.Decode and decodes the uncompressed tree being // read. It returns the number of uncompressed bytes being consumed off of the // stream, which should be strictly equal to the size given. // // If any error was encountered along the way, that will be returned, along with // the number of bytes read up to that point. func (t *Tree) Decode(hash hash.Hash, from io.Reader, size int64) (n int, err error) { hashlen := hash.Size() buf := bufio.NewReader(from) var entries []*TreeEntry for { modes, err := buf.ReadString(' ') if err != nil { if err == io.EOF { break } return n, err } n += len(modes) modes = strings.TrimSuffix(modes, " ") mode, _ := strconv.ParseInt(modes, 8, 32) fname, err := buf.ReadString('\x00') if err != nil { return n, err } n += len(fname) fname = strings.TrimSuffix(fname, "\x00") var sha [pack.MaxHashSize]byte if _, err = io.ReadFull(buf, sha[:hashlen]); err != nil { return n, err } n += hashlen entries = append(entries, &TreeEntry{ Name: fname, Oid: sha[:hashlen], Filemode: int32(mode), }) } t.Entries = entries return n, nil } // Encode encodes the tree's contents to the given io.Writer, "w". If there was // any error copying the tree's contents, that error will be returned. // // Otherwise, the number of bytes written will be returned. func (t *Tree) Encode(to io.Writer) (n int, err error) { const entryTmpl = "%s %s\x00%s" for _, entry := range t.Entries { fmode := strconv.FormatInt(int64(entry.Filemode), 8) ne, err := fmt.Fprintf(to, entryTmpl, fmode, entry.Name, entry.Oid) if err != nil { return n, err } n += ne } return } // Merge performs a merge operation against the given set of `*TreeEntry`'s by // either replacing existing tree entries of the same name, or appending new // entries in sub-tree order. // // It returns a copy of the tree, and performs the merge in O(n*log(n)) time. func (t *Tree) Merge(others ...*TreeEntry) *Tree { unseen := make(map[string]*TreeEntry) // Build a cache of name to *TreeEntry. for _, other := range others { unseen[other.Name] = other } // Map the existing entries ("t.Entries") into a new set by either // copying an existing entry, or replacing it with a new one. entries := make([]*TreeEntry, 0, len(t.Entries)) for _, entry := range t.Entries { if other, ok := unseen[entry.Name]; ok { entries = append(entries, other) delete(unseen, entry.Name) } else { oid := make([]byte, len(entry.Oid)) copy(oid, entry.Oid) entries = append(entries, &TreeEntry{ Filemode: entry.Filemode, Name: entry.Name, Oid: oid, }) } } // For all the items we haven't replaced into the new set, append them // to the entries. for _, remaining := range unseen { entries = append(entries, remaining) } // Call sort afterwords, as a tradeoff between speed and spacial // complexity. As a future point of optimization, adding new elements // (see: above) could be done as a linear pass of the "entries" set. // // In order to do that, we must have a constant-time lookup of both // entries in the existing and new sets. This requires building a // map[string]*TreeEntry for the given "others" as well as "t.Entries". // // Trees can be potentially large, so trade this spacial complexity for // an O(n*log(n)) sort. sort.Sort(SubtreeOrder(entries)) return &Tree{Entries: entries} } // Equal returns whether the receiving and given trees are equal, or in other // words, whether they are represented by the same SHA-1 when saved to the // object database. func (t *Tree) Equal(other *Tree) bool { if (t == nil) != (other == nil) { return false } if t != nil { if len(t.Entries) != len(other.Entries) { return false } for i := 0; i < len(t.Entries); i++ { e1 := t.Entries[i] e2 := other.Entries[i] if !e1.Equal(e2) { return false } } } return true } // TreeEntry encapsulates information about a single tree entry in a tree // listing. type TreeEntry struct { // Name is the entry name relative to the tree in which this entry is // contained. Name string // Oid is the object ID for this tree entry. Oid []byte // Filemode is the filemode of this tree entry on disk. Filemode int32 } // Equal returns whether the receiving and given TreeEntry instances are // identical in name, filemode, and OID. func (e *TreeEntry) Equal(other *TreeEntry) bool { if (e == nil) != (other == nil) { return false } if e != nil { return e.Name == other.Name && bytes.Equal(e.Oid, other.Oid) && e.Filemode == other.Filemode } return true } // Type is the type of entry (either blob: BlobObjectType, or a sub-tree: // TreeObjectType). func (e *TreeEntry) Type() ObjectType { switch e.Filemode & sIFMT { case sIFREG: return BlobObjectType case sIFDIR: return TreeObjectType case sIFLNK: return BlobObjectType case sIFGITLINK: return CommitObjectType default: return UnknownObjectType // panic(fmt.Sprintf("git/object: unknown object type: %o", // e.Filemode)) } } // IsLink returns true if the given TreeEntry is a blob which represents a // symbolic link (i.e., with a filemode of 0120000. func (e *TreeEntry) IsLink() bool { return e.Filemode&sIFMT == sIFLNK } // SubtreeOrder is an implementation of sort.Interface that sorts a set of // `*TreeEntry`'s according to "subtree" order. This ordering is required to // write trees in a correct, readable format to the Git object database. // // The format is as follows: entries are sorted lexicographically in byte-order, // with subtrees (entries of Type() == object.TreeObjectType) being sorted as // if their `Name` fields ended in a "/". // // See: https://github.com/git/git/blob/v2.13.0/fsck.c#L492-L525 for more // details. type SubtreeOrder []*TreeEntry // Len implements sort.Interface.Len() and return the length of the underlying // slice. func (s SubtreeOrder) Len() int { return len(s) } // Swap implements sort.Interface.Swap() and swaps the two elements at i and j. func (s SubtreeOrder) Swap(i, j int) { s[i], s[j] = s[j], s[i] } // Less implements sort.Interface.Less() and returns whether the element at "i" // is compared as "less" than the element at "j". In other words, it returns if // the element at "i" should be sorted ahead of that at "j". // // It performs this comparison in lexicographic byte-order according to the // rules above (see SubtreeOrder). func (s SubtreeOrder) Less(i, j int) bool { return s.Name(i) < s.Name(j) } // Name returns the name for a given entry indexed at "i", which is a C-style // string ('\0' terminated unless it's a subtree), optionally terminated with // '/' if it's a subtree. // // This is done because '/' sorts ahead of '\0', and is compatible with the // tree order in upstream Git. func (s SubtreeOrder) Name(i int) string { if i < 0 || i >= len(s) { return "" } entry := s[i] if entry == nil { return "" } if entry.Type() == TreeObjectType { return entry.Name + "/" } return entry.Name + "\x00" } ================================================ FILE: modules/git/gitobj/tree_test.go ================================================ package gitobj import ( "bufio" "bytes" "crypto/sha1" "fmt" "sort" "strconv" "testing" ) func TestTreeReturnsCorrectObjectType(t *testing.T) { if TreeObjectType != new(Tree).Type() { t.Errorf("Expected %v, got %v", TreeObjectType, new(Tree).Type()) } } func TestTreeEncoding(t *testing.T) { tree := &Tree{ Entries: []*TreeEntry{ { Name: "a.dat", Oid: []byte("aaaaaaaaaaaaaaaaaaaa"), Filemode: 0100644, }, { Name: "subdir", Oid: []byte("bbbbbbbbbbbbbbbbbbbb"), Filemode: 040000, }, { Name: "submodule", Oid: []byte("cccccccccccccccccccc"), Filemode: 0160000, }, }, } buf := new(bytes.Buffer) n, err := tree.Encode(buf) if err != nil { t.Errorf("Expected nil, got %v", err) } if n == 0 { t.Errorf("Expected not equal") } assertTreeEntry(t, buf, "a.dat", []byte("aaaaaaaaaaaaaaaaaaaa"), 0100644) assertTreeEntry(t, buf, "subdir", []byte("bbbbbbbbbbbbbbbbbbbb"), 040000) assertTreeEntry(t, buf, "submodule", []byte("cccccccccccccccccccc"), 0160000) if buf.Len() != 0 { t.Errorf("Expected %v, got %v", 0, buf.Len()) } } func TestTreeDecoding(t *testing.T) { from := new(bytes.Buffer) fmt.Fprintf(from, "%s %s\x00%s", strconv.FormatInt(int64(0100644), 8), "a.dat", []byte("aaaaaaaaaaaaaaaaaaaa")) fmt.Fprintf(from, "%s %s\x00%s", strconv.FormatInt(int64(040000), 8), "subdir", []byte("bbbbbbbbbbbbbbbbbbbb")) fmt.Fprintf(from, "%s %s\x00%s", strconv.FormatInt(int64(0120000), 8), "symlink", []byte("cccccccccccccccccccc")) fmt.Fprintf(from, "%s %s\x00%s", strconv.FormatInt(int64(0160000), 8), "submodule", []byte("dddddddddddddddddddd")) flen := from.Len() tree := new(Tree) n, err := tree.Decode(sha1.New(), from, int64(flen)) if err != nil { t.Errorf("Expected nil, got %v", err) } if flen != n { t.Errorf("Expected %v, got %v", flen, n) } if len(tree.Entries) != 4 { t.Fatalf("Expected %v, got %v", 4, len(tree.Entries)) } // Check a.dat if tree.Entries[0].Name != "a.dat" { t.Errorf("Expected 'a.dat', got %v", tree.Entries[0].Name) } if !bytes.Equal([]byte("aaaaaaaaaaaaaaaaaaaa"), tree.Entries[0].Oid) { t.Errorf("Expected aaaaaaaaaaaaaaaaaaaa, got %v", tree.Entries[0].Oid) } if tree.Entries[0].Filemode != 0100644 { t.Errorf("Expected 0100644, got %v", tree.Entries[0].Filemode) } // Check subdir if tree.Entries[1].Name != "subdir" { t.Errorf("Expected 'subdir', got %v", tree.Entries[1].Name) } if !bytes.Equal([]byte("bbbbbbbbbbbbbbbbbbbb"), tree.Entries[1].Oid) { t.Errorf("Expected bbbbbbbbbbbbbbbbbbbb, got %v", tree.Entries[1].Oid) } if tree.Entries[1].Filemode != 040000 { t.Errorf("Expected 040000, got %v", tree.Entries[1].Filemode) } // Check symlink if tree.Entries[2].Name != "symlink" { t.Errorf("Expected 'symlink', got %v", tree.Entries[2].Name) } if !bytes.Equal([]byte("cccccccccccccccccccc"), tree.Entries[2].Oid) { t.Errorf("Expected cccccccccccccccccccc, got %v", tree.Entries[2].Oid) } if tree.Entries[2].Filemode != 0120000 { t.Errorf("Expected 0120000, got %v", tree.Entries[2].Filemode) } // Check submodule if tree.Entries[3].Name != "submodule" { t.Errorf("Expected 'submodule', got %v", tree.Entries[3].Name) } if !bytes.Equal([]byte("dddddddddddddddddddd"), tree.Entries[3].Oid) { t.Errorf("Expected dddddddddddddddddddd, got %v", tree.Entries[3].Oid) } if tree.Entries[3].Filemode != 0160000 { t.Errorf("Expected 0160000, got %v", tree.Entries[3].Filemode) } } func TestTreeDecodingShaBoundary(t *testing.T) { var from bytes.Buffer fmt.Fprintf(&from, "%s %s\x00%s", strconv.FormatInt(int64(0100644), 8), "a.dat", []byte("aaaaaaaaaaaaaaaaaaaa")) flen := from.Len() tree := new(Tree) n, err := tree.Decode(sha1.New(), bufio.NewReaderSize(&from, flen-2), int64(flen)) if err != nil { t.Errorf("Expected nil, got %v", err) } if flen != n { t.Errorf("Expected %v, got %v", flen, n) } if len(tree.Entries) != 1 { t.Fatalf("Expected len %v, got %v", 1, len(tree.Entries)) } entry := tree.Entries[0] if entry.Name != "a.dat" { t.Errorf("Expected Name %v, got %v", "a.dat", entry.Name) } if !bytes.Equal(entry.Oid, []byte("aaaaaaaaaaaaaaaaaaaa")) { t.Errorf("Expected Oid %v, got %v", []byte("aaaaaaaaaaaaaaaaaaaa"), entry.Oid) } if entry.Filemode != 0100644 { t.Errorf("Expected Filemode %v, got %v", 0100644, entry.Filemode) } } func TestTreeMergeReplaceElements(t *testing.T) { e1 := &TreeEntry{Name: "a", Filemode: 0100644, Oid: []byte{0x1}} e2 := &TreeEntry{Name: "b", Filemode: 0100644, Oid: []byte{0x2}} e3 := &TreeEntry{Name: "c", Filemode: 0100755, Oid: []byte{0x3}} e4 := &TreeEntry{Name: "b", Filemode: 0100644, Oid: []byte{0x4}} e5 := &TreeEntry{Name: "c", Filemode: 0100644, Oid: []byte{0x5}} t1 := &Tree{Entries: []*TreeEntry{e1, e2, e3}} t2 := t1.Merge(e4, e5) if len(t1.Entries) != 3 { t.Fatalf("Expected len %v, got %v", 3, len(t1.Entries)) } if !bytes.Equal(t1.Entries[0].Oid, []byte{0x1}) { t.Errorf("Expected true") } if !bytes.Equal(t1.Entries[1].Oid, []byte{0x2}) { t.Errorf("Expected true") } if !bytes.Equal(t1.Entries[2].Oid, []byte{0x3}) { t.Errorf("Expected true") } if len(t2.Entries) != 3 { t.Fatalf("Expected len %v, got %v", 3, len(t2.Entries)) } if !bytes.Equal(t2.Entries[0].Oid, []byte{0x1}) { t.Errorf("Expected true") } if !bytes.Equal(t2.Entries[1].Oid, []byte{0x4}) { t.Errorf("Expected true") } if !bytes.Equal(t2.Entries[2].Oid, []byte{0x5}) { t.Errorf("Expected true") } } func TestMergeInsertElementsInSubtreeOrder(t *testing.T) { e1 := &TreeEntry{Name: "a-b", Filemode: 0100644, Oid: []byte{0x1}} e2 := &TreeEntry{Name: "a", Filemode: 040000, Oid: []byte{0x2}} e3 := &TreeEntry{Name: "a=", Filemode: 0100644, Oid: []byte{0x3}} e4 := &TreeEntry{Name: "a-", Filemode: 0100644, Oid: []byte{0x4}} t1 := &Tree{Entries: []*TreeEntry{e1, e2, e3}} t2 := t1.Merge(e4) if len(t1.Entries) != 3 { t.Fatalf("Expected len %v, got %v", 3, len(t1.Entries)) } if !bytes.Equal(t1.Entries[0].Oid, []byte{0x1}) { t.Errorf("Expected true") } if !bytes.Equal(t1.Entries[1].Oid, []byte{0x2}) { t.Errorf("Expected true") } if !bytes.Equal(t1.Entries[2].Oid, []byte{0x3}) { t.Errorf("Expected true") } if len(t2.Entries) != 4 { t.Fatalf("Expected len %v, got %v", 4, len(t2.Entries)) } if !bytes.Equal(t2.Entries[0].Oid, []byte{0x4}) { t.Errorf("Expected true") } if !bytes.Equal(t2.Entries[1].Oid, []byte{0x1}) { t.Errorf("Expected true") } if !bytes.Equal(t2.Entries[2].Oid, []byte{0x2}) { t.Errorf("Expected true") } if !bytes.Equal(t2.Entries[3].Oid, []byte{0x3}) { t.Errorf("Expected true") } } type TreeEntryTypeTestCase struct { Filemode int32 Expected ObjectType IsLink bool } func (c *TreeEntryTypeTestCase) AssertType(t *testing.T) { e := &TreeEntry{Filemode: c.Filemode} got := e.Type() if c.Expected != got { t.Errorf("git/object: expected type: %s, got: %s", c.Expected, got) } } func (c *TreeEntryTypeTestCase) AssertIsLink(t *testing.T) { e := &TreeEntry{Filemode: c.Filemode} isLink := e.IsLink() if c.IsLink != isLink { t.Errorf("git/object: expected link: %v, got: %v, for type %s", c.IsLink, isLink, c.Expected) } } func TestTreeEntryTypeResolution(t *testing.T) { for desc, c := range map[string]*TreeEntryTypeTestCase{ "blob": {0100644, BlobObjectType, false}, "subtree": {040000, TreeObjectType, false}, "symlink": {0120000, BlobObjectType, true}, "commit": {0160000, CommitObjectType, false}, } { t.Run(desc, c.AssertType) t.Run(desc, c.AssertIsLink) } } func TestSubtreeOrder(t *testing.T) { // The below list (e1, e2, ..., e5) is entered in subtree order: that // is, lexicographically byte-ordered as if blobs end in a '\0', and // sub-trees end in a '/'. // // See: // http://public-inbox.org/git/7vac6jfzem.fsf@assigned-by-dhcp.cox.net e1 := &TreeEntry{Filemode: 0100644, Name: "a-"} e2 := &TreeEntry{Filemode: 0100644, Name: "a-b"} e3 := &TreeEntry{Filemode: 040000, Name: "a"} e4 := &TreeEntry{Filemode: 0100644, Name: "a="} e5 := &TreeEntry{Filemode: 0100644, Name: "a=b"} // Create a set of entries in the wrong order: entries := []*TreeEntry{e3, e4, e1, e5, e2} sort.Sort(SubtreeOrder(entries)) // Assert that they are in the correct order after sorting in sub-tree // order: if len(entries) != 5 { t.Fatalf("Expected len %v, got %v", 5, len(entries)) } if entries[0].Name != "a-" { t.Errorf("Expected %v, got %v", "a-", entries[0].Name) } if entries[1].Name != "a-b" { t.Errorf("Expected %v, got %v", "a-b", entries[1].Name) } if entries[2].Name != "a" { t.Errorf("Expected %v, got %v", "a", entries[2].Name) } if entries[3].Name != "a=" { t.Errorf("Expected %v, got %v", "a=", entries[3].Name) } if entries[4].Name != "a=b" { t.Errorf("Expected %v, got %v", "a=b", entries[4].Name) } } func TestSubtreeOrderReturnsEmptyForOutOfBounds(t *testing.T) { o := SubtreeOrder([]*TreeEntry{{Name: "a"}}) result := o.Name(len(o) + 1) if result != "" { t.Errorf("Expected %v, got %v", "", result) } } func TestSubtreeOrderReturnsEmptyForNilElements(t *testing.T) { o := SubtreeOrder([]*TreeEntry{nil}) if o.Name(0) != "" { t.Errorf("Expected %v, got %v", "", o.Name(0)) } } func TestTreeEqualReturnsTrueWithUnchangedContents(t *testing.T) { t1 := &Tree{Entries: []*TreeEntry{ {Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)}, }} t2 := &Tree{Entries: []*TreeEntry{ {Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)}, }} if !t1.Equal(t2) { t.Errorf("Expected true") } } func TestTreeEqualReturnsFalseWithChangedContents(t *testing.T) { t1 := &Tree{Entries: []*TreeEntry{ {Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)}, {Name: "b.dat", Filemode: 0100644, Oid: make([]byte, 20)}, }} t2 := &Tree{Entries: []*TreeEntry{ {Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)}, {Name: "c.dat", Filemode: 0100644, Oid: make([]byte, 20)}, }} if t1.Equal(t2) { t.Errorf("Expected false") } } func TestTreeEqualReturnsTrueWhenOneTreeIsNil(t *testing.T) { t1 := &Tree{Entries: []*TreeEntry{ {Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)}, }} t2 := (*Tree)(nil) if t1.Equal(t2) { t.Errorf("Expected false") } if t2.Equal(t1) { t.Errorf("Expected false") } } func TestTreeEqualReturnsTrueWhenBothTreesAreNil(t *testing.T) { t1 := (*Tree)(nil) t2 := (*Tree)(nil) if !t1.Equal(t2) { t.Errorf("Expected true") } } func TestTreeEntryEqualReturnsTrueWhenEntriesAreTheSame(t *testing.T) { e1 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} e2 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} if !e1.Equal(e2) { t.Errorf("Expected true") } } func TestTreeEntryEqualReturnsFalseWhenDifferentNames(t *testing.T) { e1 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} e2 := &TreeEntry{Name: "b.dat", Filemode: 0100644, Oid: make([]byte, 20)} if e1.Equal(e2) { t.Errorf("Expected false") } } func TestTreeEntryEqualReturnsFalseWhenDifferentOids(t *testing.T) { e1 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} e2 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} e2.Oid[0] = 1 if e1.Equal(e2) { t.Errorf("Expected false") } } func TestTreeEntryEqualReturnsFalseWhenDifferentFilemodes(t *testing.T) { e1 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} e2 := &TreeEntry{Name: "a.dat", Filemode: 0100755, Oid: make([]byte, 20)} if e1.Equal(e2) { t.Errorf("Expected false") } } func TestTreeEntryEqualReturnsFalseWhenOneEntryIsNil(t *testing.T) { e1 := &TreeEntry{Name: "a.dat", Filemode: 0100644, Oid: make([]byte, 20)} e2 := (*TreeEntry)(nil) if e1.Equal(e2) { t.Errorf("Expected false") } } func TestTreeEntryEqualReturnsTrueWhenBothEntriesAreNil(t *testing.T) { e1 := (*TreeEntry)(nil) e2 := (*TreeEntry)(nil) if !e1.Equal(e2) { t.Errorf("Expected true") } } func assertTreeEntry(t *testing.T, buf *bytes.Buffer, name string, oid []byte, mode int32) { fmode, err := buf.ReadBytes(' ') if err != nil { t.Errorf("Expected nil, got %v", err) } expectedFmode := []byte(strconv.FormatInt(int64(mode), 8) + " ") if !bytes.Equal(expectedFmode, fmode) { t.Errorf("Expected %v, got %v", expectedFmode, fmode) } fname, err := buf.ReadBytes('\x00') if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal([]byte(name+"\x00"), fname) { t.Errorf("Expected %v, got %v", []byte(name+"\x00"), fname) } var sha [20]byte _, err = buf.Read(sha[:]) if err != nil { t.Errorf("Expected nil, got %v", err) } if !bytes.Equal(oid, sha[:]) { t.Errorf("Expected %v, got %v", oid, sha[:]) } } ================================================ FILE: modules/git/hash.go ================================================ package git import ( "bufio" "bytes" "context" "encoding/hex" "fmt" "os" "strings" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/env" "github.com/zeebo/blake3" ) var ( refsHashablePrefix = [][]byte{ []byte("refs/heads/"), []byte("refs/tags/"), []byte("refs/pull/"), []byte("refs/merge-requests/"), } ) // RevParseHEAD: resolve the reference pointed to by HEAD // // not git repo: fatal: not a git repository (or any parent directory): .git // // empty repo: HEAD // fatal: 有歧义的参数 'HEAD':未知的版本或路径不存在于工作区中。 // 使用 '--' 来分隔版本和路径,例如: // 'git <命令> [<版本>...] -- [<文件>...]' // // ref not exists: HEAD // // ref exists: refs/heads/master func RevParseHEAD(ctx context.Context, environ []string, repoPath string) (string, error) { // git rev-parse --symbolic-full-name HEAD cmd := command.NewFromOptions(ctx, &command.RunOpts{RepoPath: repoPath, Environ: environ}, "git", "rev-parse", "--symbolic-full-name", "HEAD") line, err := cmd.OneLine() if err != nil { return ReferenceNameDefault, err } return line, nil } // ParseReference parse symref return hash and refname func ParseReference(ctx context.Context, repoPath string, symref string) (string, string, error) { // git rev-parse HEAD --symbolic-full-name HEAD cmd := command.NewFromOptions(ctx, &command.RunOpts{RepoPath: repoPath}, "git", "rev-parse", symref, "--symbolic-full-name", symref) output, err := cmd.Output() if err != nil { return "", ReferenceNameDefault, err } var hash, refname string lines := strings.Split(string(output), "\n") if len(lines) >= 2 { refname = lines[1] } if len(lines) >= 1 { hash = lines[0] } return hash, refname, nil } // afa70145a25e81faa685dc0b465e52b45d2444bd refs/heads/master func startsWithHashablePrefix(line []byte) bool { _, ref, ok := bytes.Cut(line, []byte(" ")) if !ok { return false } for _, p := range refsHashablePrefix { if bytes.HasPrefix(ref, p) { return true } } return false } // HashFromEnv: Calculate the hash of the repository at the specified path and environment block func HashFromEnv(ctx context.Context, environ []string, repoPath string) (string, error) { if _, err := os.Stat(repoPath); err != nil && os.IsNotExist(err) { return "", err } head, err := RevParseHEAD(ctx, environ, repoPath) if err != nil { fmt.Fprintf(os.Stderr, "unable resolve %s HEAD error: %v\n", repoPath, err) } h := blake3.New() _, _ = fmt.Fprintf(h, "ref: %s\n", head) stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: environ, RepoPath: repoPath, Stderr: stderr, }, "git", "show-ref") out, err := cmd.StdoutPipe() if err != nil { return "", fmt.Errorf("unable create stdout pipe %w", err) } defer out.Close() // nolint if err := cmd.Start(); err != nil { return "", fmt.Errorf("unable create stdout pipe %w", err) } sr := bufio.NewScanner(out) for sr.Scan() { line := bytes.TrimSpace(sr.Bytes()) if !startsWithHashablePrefix(line) { continue } _, _ = h.Write(line) _, _ = h.Write([]byte("\n")) } if err := cmd.Wait(); err != nil { if stderr.Len() > 0 { fmt.Fprintf(os.Stderr, "hash %s error: %s\n", repoPath, stderr.String()) } return "", fmt.Errorf("hash error %w", err) } return hex.EncodeToString(h.Sum(nil)), nil } // Hash: Calculate the hash of the repository at the specified path func Hash(ctx context.Context, repoPath string) (string, error) { return HashFromEnv(ctx, env.Environ(), repoPath) } type HashResult struct { HEAD string Hash string References int } // HashEx: Calculates the hash of the repository at the specified path and returns HEAD, the number of references func HashEx(ctx context.Context, repoPath string) (*HashResult, error) { if _, err := os.Stat(repoPath); err != nil && os.IsNotExist(err) { return nil, err } hr := &HashResult{} head, err := RevParseHEAD(ctx, env.Environ(), repoPath) if err != nil { fmt.Fprintf(os.Stderr, "unable resolve %s HEAD error: %v\n", repoPath, err) } hr.HEAD = head h := blake3.New() _, _ = fmt.Fprintf(h, "ref: %s\n", head) stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{RepoPath: repoPath, Stderr: stderr}, "git", "show-ref") out, err := cmd.StdoutPipe() if err != nil { return nil, fmt.Errorf("unable create stdout pipe %w", err) } defer out.Close() // nolint if err := cmd.Start(); err != nil { return nil, fmt.Errorf("unable create stdout pipe %w", err) } sr := bufio.NewScanner(out) for sr.Scan() { line := bytes.TrimSpace(sr.Bytes()) if !startsWithHashablePrefix(line) { continue } hr.References++ _, _ = h.Write(line) _, _ = h.Write([]byte("\n")) } if err := cmd.Wait(); err != nil { if stderr.Len() > 0 { fmt.Fprintf(os.Stderr, "hash %s error: %s\n", repoPath, stderr.String()) } return nil, fmt.Errorf("hash error %w", err) } hr.Hash = hex.EncodeToString(h.Sum(nil)) return hr, nil } ================================================ FILE: modules/git/hash_test.go ================================================ package git import ( "context" "fmt" "os" "testing" ) func TestHash(t *testing.T) { h, err := Hash(context.Background(), ".") if err != nil { fmt.Fprintf(os.Stderr, "hash error: %v\n", err) return } fmt.Fprintf(os.Stderr, "hash: %v\n", h) } func TestParseReference(t *testing.T) { hash, refname, err := ParseReference(context.Background(), ".", "HEAD") if err != nil { fmt.Fprintf(os.Stderr, "RevParseEx error: %v\n", err) return } fmt.Fprintf(os.Stderr, "Hash: [%s] Ref: [%s]\n", hash, refname) } ================================================ FILE: modules/git/object.go ================================================ package git import ( "io" ) // object metadata type Metadata struct { // Hash of the object. Hash string // Size is the total uncompressed size of the blob's contents. Size int64 // Type of the object Type ObjectType } type Object struct { // Hash of the object. Hash string // Size is the total uncompressed size of the blob's contents. Size int64 // Type of the object Type ObjectType // dataReader is a reader that yields the uncompressed blob contents. It // may only be read once. dataReader io.Reader } func (o *Object) Read(p []byte) (int, error) { return o.dataReader.Read(p) } // WriteTo implements the io.WriterTo interface. It defers the write to the embedded object reader // via `io.Copy()`, which in turn will use `WriteTo()` or `ReadFrom()` in case these interfaces are // implemented by the respective reader or writer. func (o *Object) WriteTo(w io.Writer) (int64, error) { // `io.Copy()` will make use of `ReadFrom()` in case the writer implements it. return io.Copy(w, o.dataReader) } func (o *Object) Discard() { if o.dataReader != nil { _, _ = io.Copy(io.Discard, o.dataReader) } } ================================================ FILE: modules/git/odb.go ================================================ package git import ( "errors" "fmt" "os" "path/filepath" "github.com/antgroup/hugescm/modules/git/gitobj" ) type ODB struct { *gitobj.Database tmpdir string } func (o *ODB) Close() error { err := o.Database.Close() _ = os.RemoveAll(o.tmpdir) return err } // NewODB open repo default odb func NewODB(repoPath string, hashAlgo HashFormat) (*ODB, error) { var options []gitobj.Option if hashAlgo != HashUNKNOWN { options = append(options, gitobj.ObjectFormat(gitobj.ObjectFormatAlgorithm(hashAlgo.String()))) } objdir := filepath.Join(repoPath, "objects") tmpdir, err := NewSundriesDir(repoPath, "objects") if err != nil { return nil, err } odb, err := gitobj.NewDatabase(objdir, tmpdir, options...) if err != nil { _ = os.RemoveAll(tmpdir) return nil, err } if odb.Hasher() == nil { _ = os.RemoveAll(tmpdir) return nil, fmt.Errorf("unsupported repository hash algorithm %s", hashAlgo) } return &ODB{Database: odb, tmpdir: tmpdir}, nil } var ( ErrObjectNotFound = errors.New("object not found") // ErrInvalidType is returned when an invalid object type is provided. ErrInvalidType = errors.New("invalid object type") ) // ObjectType internal object type // Integer values from 0 to 7 map to those exposed by git. // AnyObject is used to represent any from 0 to 7. type ObjectType int8 const ( InvalidObject ObjectType = 0 CommitObject ObjectType = 1 TreeObject ObjectType = 2 BlobObject ObjectType = 3 TagObject ObjectType = 4 // 5 reserved for future expansion OFSDeltaObject ObjectType = 6 REFDeltaObject ObjectType = 7 AnyObject ObjectType = -127 ) func (t ObjectType) String() string { switch t { case CommitObject: return "commit" case TreeObject: return "tree" case BlobObject: return "blob" case TagObject: return "tag" case OFSDeltaObject: return "ofs-delta" case REFDeltaObject: return "ref-delta" case AnyObject: return "any" default: return "unknown" } } func (t ObjectType) Bytes() []byte { return []byte(t.String()) } // Valid returns true if t is a valid ObjectType. func (t ObjectType) Valid() bool { return t >= CommitObject && t <= REFDeltaObject } // IsDelta returns true for any ObjectTyoe that represents a delta (i.e. // REFDeltaObject or OFSDeltaObject). func (t ObjectType) IsDelta() bool { return t == REFDeltaObject || t == OFSDeltaObject } // ParseObjectType parses a string representation of ObjectType. It returns an // error on parse failure. func ParseObjectType(value string) (typ ObjectType, err error) { switch value { case "commit": typ = CommitObject case "tree": typ = TreeObject case "blob": typ = BlobObject case "tag": typ = TagObject case "ofs-delta": typ = OFSDeltaObject case "ref-delta": typ = REFDeltaObject default: err = ErrInvalidType } return } ================================================ FILE: modules/git/reference.go ================================================ package git import ( "bufio" "bytes" "context" "errors" "fmt" "regexp" "strings" "github.com/antgroup/hugescm/modules/command" ) const ( refPrefix = "refs/" refHeadPrefix = refPrefix + "heads/" refTagPrefix = refPrefix + "tags/" refRemotePrefix = refPrefix + "remotes/" refNotePrefix = refPrefix + "notes/" ) const ( RefRevParseRulesCount = 6 ) // RefRevParseRules are a set of rules to parse references into short names. // These are the same rules as used by git in shorten_unambiguous_ref. // See: https://github.com/git/git/blob/9857273be005833c71e2d16ba48e193113e12276/refs.c#L610 var RefRevParseRules = []string{ "%s", "refs/%s", "refs/tags/%s", "refs/heads/%s", "refs/remotes/%s", "refs/remotes/%s/HEAD", } // ReferenceType reference type's type ReferenceType int8 const ( InvalidReference ReferenceType = 0 HashReference ReferenceType = 1 SymbolicReference ReferenceType = 2 ) func (r ReferenceType) String() string { switch r { case InvalidReference: return "invalid-reference" case HashReference: return "hash-reference" case SymbolicReference: return "symbolic-reference" } return "" } // ReferenceName reference name's type ReferenceName string // NewBranchReferenceName returns a reference name describing a branch based on // his short name. func NewBranchReferenceName(name string) ReferenceName { return ReferenceName(refHeadPrefix + name) } // NewNoteReferenceName returns a reference name describing a note based on his // short name. func NewNoteReferenceName(name string) ReferenceName { return ReferenceName(refNotePrefix + name) } // NewRemoteReferenceName returns a reference name describing a remote branch // based on his short name and the remote name. func NewRemoteReferenceName(remote, name string) ReferenceName { return ReferenceName(refRemotePrefix + fmt.Sprintf("%s/%s", remote, name)) } // NewRemoteHEADReferenceName returns a reference name describing a the HEAD // branch of a remote. func NewRemoteHEADReferenceName(remote string) ReferenceName { return ReferenceName(refRemotePrefix + fmt.Sprintf("%s/%s", remote, HEAD)) } // NewTagReferenceName returns a reference name describing a tag based on short // his name. func NewTagReferenceName(name string) ReferenceName { return ReferenceName(refTagPrefix + name) } // IsBranch check if a reference is a branch func (r ReferenceName) IsBranch() bool { return strings.HasPrefix(string(r), refHeadPrefix) } func (r ReferenceName) BranchName() string { return strings.TrimPrefix(string(r), refHeadPrefix) } // IsNote check if a reference is a note func (r ReferenceName) IsNote() bool { return strings.HasPrefix(string(r), refNotePrefix) } // IsRemote check if a reference is a remote func (r ReferenceName) IsRemote() bool { return strings.HasPrefix(string(r), refRemotePrefix) } // IsTag check if a reference is a tag func (r ReferenceName) IsTag() bool { return strings.HasPrefix(string(r), refTagPrefix) } func (r ReferenceName) TagName() string { return strings.TrimPrefix(string(r), refTagPrefix) } func (r ReferenceName) String() string { return string(r) } // Short returns the short name of a ReferenceName // // un strict, does not check whether the name is ambiguous func (r ReferenceName) Short() string { s := string(r) res := s // skip first for _, format := range RefRevParseRules[1:] { _, err := fmt.Sscanf(s, format, &res) if err == nil { continue } } return res } const ( HEAD ReferenceName = "HEAD" Master ReferenceName = "refs/heads/master" ) // Branch returns `true` and the branch name if the reference is a branch. E.g. // if ReferenceName is "refs/heads/master", it will return "master". If it is // not a branch, `false` is returned. func (r ReferenceName) Branch() (string, bool) { if branch, ok := strings.CutPrefix(r.String(), refHeadPrefix); ok && len(branch) != 0 { return branch, true } return "", false } // Reference represents a Git reference. type Reference struct { // Name is the name of the reference Name ReferenceName // Target is the target of the reference. For direct references it // contains the object ID, for symbolic references it contains the // target branch name. Target string // ObjectType is the type of the object referenced. ObjectType ObjectType // ShortName: ONLY git parsed (else maybe empty) ShortName string // IsSymbolic tells whether the reference is direct or symbolic IsSymbolic bool } // NewReference creates a direct reference to an object. func NewReference(name ReferenceName, target string) Reference { return Reference{ Name: name, Target: target, IsSymbolic: false, } } // NewSymbolicReference creates a symbolic reference to another reference. func NewSymbolicReference(name ReferenceName, target ReferenceName) Reference { return Reference{ Name: name, Target: string(target), IsSymbolic: true, } } type ErrAlreadyLocked struct { refname string message string } func (e *ErrAlreadyLocked) Error() string { if len(e.message) != 0 { return e.message } return fmt.Sprintf("reference is already locked: %q", e.refname) } var ( refLockedRegex = regexp.MustCompile("cannot lock ref '(.+?)'") ErrReferenceNotFound = errors.New("reference not found") ) func IsErrAlreadyLocked(err error) bool { var e *ErrAlreadyLocked return errors.As(err, &e) } func ReferenceTarget(ctx context.Context, repoPath, reference string) (string, error) { // fatal: ambiguous argument 'refs/heads/dev': unknown revision or path not in the working tree stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{RepoPath: repoPath, Stderr: stderr}, "git", "rev-parse", reference) oid, err := cmd.OneLine() if err != nil { if strings.Contains(stderr.String(), "fatal:") { return "", ErrReferenceNotFound } return "", err } return oid, nil } // fatal: update_ref failed for ref 'refs/heads/release/1.0.0_20250728': 'refs/heads/release' exists; cannot create 'refs/heads/release/1.0.0_20250728 func UpdateRef(ctx context.Context, repoPath string, reference string, oldRev, newRev string, forceUpdate bool) error { updateRefArgs := []string{"update-ref", "--", reference, newRev} if !forceUpdate { // git update-ref refs/heads/master check oldRev matched updateRefArgs = append(updateRefArgs, oldRev) } stderr := command.NewStderr() cmd := command.NewFromOptions(ctx, &command.RunOpts{ RepoPath: repoPath, Stderr: stderr, }, "git", updateRefArgs...) if err := cmd.Run(); err != nil { message := stderr.String() if refLockedRegex.MatchString(message) { return &ErrAlreadyLocked{refname: reference} } if strings.Contains(message, " exists; cannot create ") { return &ErrAlreadyLocked{message: message} } if strings.Contains(message, "Another git process seems to be running in this repository") { return &ErrAlreadyLocked{refname: reference, message: message} } return fmt.Errorf("update-ref %s error: %w stderr: %v", reference, err, message) } return nil } type ErrReferenceBadName struct { Name string } func (err ErrReferenceBadName) Error() string { return fmt.Sprintf("bad revision name: '%s'", err.Name) } func IsErrReferenceBadName(err error) bool { var e *ErrReferenceBadName return errors.As(err, &e) } // https://github.com/git/git/blob/ae73b2c8f1da39c39335ee76a0f95857712c22a7/refs.c#L41-L290 var ( // refnameDisposition table // // Here golang's logic is different from C's, golang's strings are not NULL-terminated, so byte(0) is a forbidden character. refnameDisposition = [256]byte{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4, } ) /* * How to handle various characters in refnames: * 0: An acceptable character for refs * 1: End-of-component * 2: ., look for a preceding . to reject .. in refs * 3: {, look for a preceding @ to reject @{ in refs * 4: A bad character: ASCII control characters, and * ":", "?", "[", "\", "^", "~", SP, or TAB * 5: *, reject unless REFNAME_REFSPEC_PATTERN is set */ func checkReferenceNameComponent(refname []byte) int { last := byte(0) var i int for ; i < len(refname); i++ { ch := refname[i] & 255 disp := refnameDisposition[ch] switch disp { case 1: goto OUT // Do not use range, which causes extra processing for goto statements. case 2: if last == '.' { return -1 } case 3: if last == '@' { return -1 } case 4: return -1 case 5: // we not use pattern mode return -1 } last = ch } OUT: if i == 0 { return 0 } if refname[0] == '.' { return -1 } if bytes.HasSuffix(refname, []byte(".lock")) { return -1 } return i } /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is * not legal. It is legal if it is something reasonable to have under * ".git/refs/"; We do not like it if: * * - it begins with ".", or * - it has double dots "..", or * - it has ASCII control characters, or * - it has ":", "?", "[", "\", "^", "~", SP, or TAB anywhere, or * - it has "*" anywhere unless REFNAME_REFSPEC_PATTERN is set, or * - it ends with a "/", or * - it ends with ".lock", or * - it contains a "@{" portion * * When sanitized is not NULL, instead of rejecting the input refname * as an error, try to come up with a usable replacement for the input * refname in it. */ func ValidateReferenceName(refname []byte) bool { if bytes.Equal(refname, []byte("@")) { return false } var componentLen int for { /* We are at the start of a path component. */ if componentLen = checkReferenceNameComponent(refname); componentLen <= 0 { return false } if len(refname) == componentLen { break } refname = refname[componentLen+1:] } return refname[componentLen-1] != '.' } // ValidateBranchName: creating branches starting with - is not supported func ValidateBranchName(branch []byte) bool { if len(branch) == 0 || branch[0] == '-' { return false } return ValidateReferenceName(branch) } // ValidateTagName: creating tags starting with - is not supported func ValidateTagName(tag []byte) bool { if len(tag) == 0 || tag[0] == '-' { return false } return ValidateReferenceName(tag) } const ( ReferenceLineFormat = "%(refname)%00%(refname:short)%00%(objectname)%00%(objecttype)" ) func ParseOneReference(referenceLine string) (*Reference, error) { fields := strings.SplitN(referenceLine, "\x00", 4) if len(fields) != 4 { return nil, fmt.Errorf("invalid output from git for-each-ref command: %v", referenceLine) } typ, err := ParseObjectType(fields[3]) if err != nil { return nil, err } return &Reference{Name: ReferenceName(fields[0]), ShortName: fields[1], Target: fields[2], ObjectType: typ}, nil } type ReferenceEx struct { Name ReferenceName // name ShortName string // short name Target string // target commit,tag or symbolic IsSymbolic bool // is symbolic Commit *Commit // commit } // ReferencePrefixMatch: follow git's priority for finding refs // // https://git-scm.com/docs/git-rev-parse#Documentation/git-rev-parse.txt-emltrefnamegtemegemmasterememheadsmasterememrefsheadsmasterem // // https://github.com/git/git/blob/master/Documentation/revisions.txt func ReferencePrefixMatch(ctx context.Context, repoPath string, refname string) (*ReferenceEx, error) { refs := make([]*Reference, 6) matches := map[string]int{ refname: 0, //1 "refs/" + refname: 1, //2 "refs/tags/" + refname: 2, //3 "refs/heads/" + refname: 3, //4 "refs/remotes/" + refname: 4, //5 "refs/remotes/" + refname + "/HEAD": 5, //6 } stderr := command.NewStderr() psArgs := []string{"for-each-ref", "--format", ReferenceLineFormat} if !strings.HasPrefix(refname, "-") { psArgs = append(psArgs, refname) //1 } psArgs = append(psArgs, "refs/"+refname, //2 "refs/tags/"+refname, //3 "refs/heads/"+refname, //4 "refs/remotes/"+refname, //5 "refs/remotes/"+refname+"/HEAD", //6 ) reader, err := NewReader(ctx, &command.RunOpts{RepoPath: repoPath, Stderr: stderr}, psArgs...) if err != nil { return nil, err } defer reader.Close() // nolint scanner := bufio.NewScanner(reader) for scanner.Scan() { b, err := ParseOneReference(scanner.Text()) if err != nil { break } if i, ok := matches[b.Name.String()]; ok { refs[i] = b } } br := func() *Reference { for _, b := range refs { if b != nil { return b } } return nil }() if br == nil { return nil, NewBranchNotFound(refname) } cc, err := ParseRev(ctx, repoPath, br.Target) if IsErrNotExist(err) { return nil, NewBranchNotFound(refname) } if err != nil { return nil, err } return &ReferenceEx{Name: br.Name, ShortName: br.ShortName, Target: br.Target, IsSymbolic: br.IsSymbolic, Commit: cc}, nil } func HasSpecificReference(ctx context.Context, repoPath string, referencePrefix string) (bool, error) { showRefArgs := []string{"for-each-ref"} if len(referencePrefix) != 0 { showRefArgs = append(showRefArgs, referencePrefix) } showRefArgs = append(showRefArgs, "--format=%(refname)", "--count=1") cmd := command.New(ctx, repoPath, "git", showRefArgs...) stdout, err := cmd.StdoutPipe() if err != nil { return false, err } defer stdout.Close() // nolint scanner := bufio.NewScanner(stdout) if err := cmd.Start(); err != nil { return false, err } defer cmd.Exit() // nolint var result bool for scanner.Scan() { result = true } return result, nil } type Order int const ( OrderNone Order = iota OrderNewest OrderOldest ) func ParseReferences(ctx context.Context, repoPath string, order Order) ([]*Reference, error) { cmdArgs := []string{"for-each-ref"} switch order { case OrderNewest: cmdArgs = append(cmdArgs, "--sort=-committerdate") case OrderOldest: cmdArgs = append(cmdArgs, "--sort=committerdate") } cmdArgs = append(cmdArgs, "--format", ReferenceLineFormat) reader, err := NewReader(ctx, &command.RunOpts{RepoPath: repoPath}, cmdArgs...) if err != nil { return nil, err } defer reader.Close() // nolint refs := make([]*Reference, 0, 100) scanner := bufio.NewScanner(reader) for scanner.Scan() { r, err := ParseOneReference(scanner.Text()) if err != nil { break } refs = append(refs, r) } if err := scanner.Err(); err != nil { return nil, err } return refs, nil } ================================================ FILE: modules/git/reftable/reftable.go ================================================ // Copyright (c) 2016-present GitLab Inc. // SPDX-License-Identifier: MIT package reftable import ( "bytes" "encoding/binary" "encoding/hex" "errors" "fmt" "hash/crc32" "io" "math/big" "os" "path/filepath" "regexp" "strconv" "strings" "github.com/antgroup/hugescm/modules/git" ) var ( // magic is the magic value of a reftable header. magic = [...]byte{'R', 'E', 'F', 'T'} // hashIDSHA1 denotes this reftable uses SHA1. hashIDSHA1 = [...]byte{'s', 'h', 'a', '1'} // hashIDSHA256 denotes this reftable uses SHA256. hashIDSHA256 = [...]byte{'s', '2', '5', '6'} ) // version represents a reftable version. type version uint8 // HeaderSize returns the size of the header for this reftable version. func (v version) HeaderSize() int { switch v { case 1: // The Size is documented at https://git-scm.com/docs/reftable#_header_version_1 return 24 case 2: // The size is documented at https://git-scm.com/docs/reftable#_header_version_2 return 28 default: panic(fmt.Errorf("unsupported version: %d", v)) } } // FooterSize returns the size of the footer for this reftable version. func (v version) FooterSize() int { // The footer sizes are documented at https://git-scm.com/docs/reftable#_footer. switch v { case 1: return 68 case 2: return 72 default: panic(fmt.Errorf("unsupported version: %d", v)) } } // headerV1 is the exact byte layout of a header in reftable version 1. type headerV1 struct { Magic [4]byte Version version BlockSize [3]byte MinUpdateIndex uint64 MaxUpdateIndex uint64 } // header is exact byte layout of a header in reftable version 2. type header struct { headerV1 // HashID is only present if version is 2 HashID [4]byte } // parseHeader parses the header of a reftable. reader should be at the beginning // of the header. func parseHeader(reader io.Reader, hdr *header) error { if err := binary.Read(reader, binary.BigEndian, &hdr.headerV1); err != nil { return fmt.Errorf("reading header: %w", err) } if hdr.Magic != magic { return fmt.Errorf("unexpected magic bytes: %q", hdr.Magic) } if hdr.Version != 1 && hdr.Version != 2 { return fmt.Errorf("unsupported version: %d", hdr.Version) } if hdr.Version == 2 { if err := binary.Read(reader, binary.BigEndian, &hdr.HashID); err != nil { return fmt.Errorf("read hash id: %w", err) } if hdr.HashID != hashIDSHA1 && hdr.HashID != hashIDSHA256 { return fmt.Errorf("unsupported hash id: %q", hdr.HashID) } } return nil } // footerEnd is the exact byte layout of the unique fields in the footer after the duplicated header. type footerEnd struct { RefIndexOffset uint64 ObjectOffsetAndLen uint64 ObjectIndexOffset uint64 LogOffset uint64 LogIndexPosition uint64 CRC32 uint32 } // footer is the exact byte layout of a footer in a reftable. type footer struct { header footerEnd } // parseFooter parses the footer of a reftable. reader should be at the beginning // of the footer. func parseFooter(reader io.Reader, f *footer) error { footerBytes, err := io.ReadAll(reader) if err != nil { return fmt.Errorf("read all: %w", err) } footerReader := bytes.NewReader(footerBytes) if err := parseHeader(footerReader, &f.header); err != nil { return fmt.Errorf("parse header: %w", err) } if err := binary.Read(footerReader, binary.BigEndian, &f.footerEnd); err != nil { return fmt.Errorf("parse remainder: %w", err) } if crc32.ChecksumIEEE(footerBytes[:len(footerBytes)-binary.Size(f.CRC32)]) != f.CRC32 { return errors.New("checksum mismatch") } return nil } type block struct { BlockStart uint FullBlockSize uint HeaderOffset uint RestartCount uint16 RestartStart uint } // Table represents .ref table file. type Table struct { blockSize uint footerOffset uint src *os.File absolutePath string footer footer } // MinUpdateIndex is the minimum update index in the table. func (t *Table) MinUpdateIndex() uint64 { return t.footer.MinUpdateIndex } // MaxUpdateIndex is the maximum update index in the table. func (t *Table) MaxUpdateIndex() uint64 { return t.footer.MaxUpdateIndex } // shaFormat maps reftable sha format to Gitaly's hash object. func (t *Table) shaFormat() git.HashFormat { if t.footer.Version == 2 && t.footer.HashID == hashIDSHA256 { return git.HashSHA256 } return git.HashSHA1 } // parseUInt24 parses a big endian encoded uint24 into a uint. func parseUint24(data [3]byte) uint { return uint(data[2]) | uint(data[1])<<8 | uint(data[0])<<16 } // getBlockRange provides the abs block range if the block is smaller // than the table. func (t *Table) getBlockRange(offset, size uint) (uint, uint) { if offset >= t.footerOffset { return 0, 0 } if offset+size > t.footerOffset { size = t.footerOffset - offset } return offset, offset + size } // extractBlockLen extracts the block length from a given location. func (t *Table) extractBlockLen(src []byte, blockStart uint) uint { return uint(big.NewInt(0).SetBytes(src[blockStart+1 : blockStart+4]).Uint64()) } // getVarInt parses a variable int and increases the index. func (t *Table) getVarInt(src []byte, start uint, blockEnd uint) (uint, uint, error) { var val uint val = uint(src[start]) & 0x7f for (uint(src[start]) & 0x80) > 0 { start++ if start > blockEnd { return 0, 0, errors.New("exceeded block length") } val = ((val + 1) << 7) | (uint(src[start]) & 0x7f) } return start + 1, val, nil } // getRefsFromBlock provides the ref udpates from a reference block. func (t *Table) getRefsFromBlock(src []byte, b *block) ([]git.Reference, error) { var references []git.Reference prefix := "" // Skip the block_type and block_len idx := b.BlockStart + 4 for idx < b.RestartStart { var prefixLength, suffixLength, updateIndexDelta uint var err error idx, prefixLength, err = t.getVarInt(src, idx, b.RestartStart) if err != nil { return nil, fmt.Errorf("getting prefix length: %w", err) } idx, suffixLength, err = t.getVarInt(src, idx, b.RestartStart) if err != nil { return nil, fmt.Errorf("getting suffix length: %w", err) } extra := (suffixLength & 0x7) suffixLength >>= 3 refname := prefix[:prefixLength] + string(src[idx:idx+suffixLength]) idx += suffixLength idx, updateIndexDelta, err = t.getVarInt(src, idx, b.FullBlockSize) if err != nil { return nil, fmt.Errorf("getting update index delta: %w", err) } // we don't use this for now _ = updateIndexDelta reference := git.Reference{ Name: git.ReferenceName(refname), } switch extra { case 0: // Deletion, no value reference.Target = t.shaFormat().ZeroOID() case 1: // Regular reference hashSize := t.shaFormat().RawSize() reference.Target = hex.EncodeToString(src[idx : idx+uint(hashSize)]) idx += uint(hashSize) case 2: // Peeled Tag hashSize := t.shaFormat().RawSize() reference.Target = hex.EncodeToString(src[idx : idx+uint(hashSize)]) idx += uint(hashSize) // For now we don't need the peeledOID, but we still need // to skip the index. // peeledOID := ObjectID(bytesToHex(t.src[idx : idx+uint(hashSize)])) idx += uint(hashSize) case 3: // Symref var size uint idx, size, err = t.getVarInt(src, idx, b.FullBlockSize) if err != nil { return nil, fmt.Errorf("getting symref size: %w", err) } reference.Target = git.ReferenceName(src[idx : idx+size]).String() reference.IsSymbolic = true idx += size } prefix = refname references = append(references, reference) } return references, nil } // parseRefBlock parses a block and if it is a ref block, provides // all the reference updates. func (t *Table) parseRefBlock(src []byte, headerOffset, blockStart, blockEnd uint) ([]git.Reference, error) { currentBS := t.extractBlockLen(src, blockStart+headerOffset) fullBlockSize := t.blockSize if fullBlockSize == 0 { fullBlockSize = currentBS } else if currentBS < fullBlockSize && currentBS < (blockEnd-blockStart) && src[blockStart+currentBS] != 0 { fullBlockSize = currentBS } b := &block{ BlockStart: blockStart + headerOffset, FullBlockSize: fullBlockSize, } if err := binary.Read(bytes.NewBuffer(src[blockStart+currentBS-2:]), binary.BigEndian, &b.RestartCount); err != nil { return nil, fmt.Errorf("reading restart count: %w", err) } b.RestartStart = blockStart + currentBS - 2 - 3*uint(b.RestartCount) return t.getRefsFromBlock(src, b) } // GetReferences returns all references from the table. func (t *Table) GetReferences() ([]git.Reference, error) { headerOffset := uint(t.footer.Version.HeaderSize()) offset := uint(0) var allRefs []git.Reference if _, err := t.src.Seek(0, io.SeekStart); err != nil { return nil, fmt.Errorf("seek start: %w", err) } src, err := io.ReadAll(t.src) if err != nil { return nil, fmt.Errorf("read all: %w", err) } for offset < t.footerOffset { blockStart, blockEnd := t.getBlockRange(offset, t.blockSize) if blockStart == 0 && blockEnd == 0 { break } // If we run out of ref blocks, we can stop the iteration. if src[blockStart+headerOffset] != 'r' { return allRefs, nil } references, err := t.parseRefBlock(src, headerOffset, blockStart, blockEnd) if err != nil { return nil, fmt.Errorf("parsing block: %w", err) } if len(references) == 0 { break } allRefs = append(allRefs, references...) offset = blockEnd } return allRefs, nil } // PatchUpdateIndexes patches in-place the update indexes stored in the table's // header and footer, and syncs the file to the disk. func (t *Table) PatchUpdateIndexes(minVal, maxVal uint64) (returnedErr error) { // Table typically opens the file with read-only permissions. The update index // patching is an exception, and the only case when we should be modifying tables. // Typically the table files would not be modified, and the files in the storage // are read-only to prevent accidental modifications. Due to the default read-only // permissions, we'd fail to open most of the files in the storage for writes. // // Open a separate descriptor with write permissions to handle this special case // of patching update indexes. file, err := os.OpenFile(t.absolutePath, os.O_RDWR, 0) if err != nil { return fmt.Errorf("open file: %w", err) } defer func() { if err := file.Close(); err != nil { returnedErr = errors.Join(err, fmt.Errorf("close: %w", err)) } }() t.footer.MinUpdateIndex = minVal t.footer.MaxUpdateIndex = maxVal // Construct a buffer that contains the full footer with patched values. // // The footer contains the header as well. First serialize the header. buffer := bytes.NewBuffer(make([]byte, 0, t.footer.Version.FooterSize())) if err := binary.Write(buffer, binary.BigEndian, t.footer.headerV1); err != nil { return fmt.Errorf("write header: %w", err) } // Only the version two header contains the HashID. if t.footer.Version == 2 { if err := binary.Write(buffer, binary.BigEndian, t.footer.HashID); err != nil { return fmt.Errorf("write hash ID: %w", err) } } // After the header, serialize the remaining footer values. This will also serialize // the old CRC32 into the footer, but we'll patch it below. if err := binary.Write(buffer, binary.BigEndian, t.footer.footerEnd); err != nil { return fmt.Errorf("write footer: %w", err) } // The footer ends with a CRC32 that covers everything in the footer except the checksum // itself. Compute the checksum and override the old value that was written above when // we serialized the footer with the patched update indexes. footerWithoutChecksum := buffer.Bytes()[:buffer.Len()-crc32.Size] t.footer.CRC32 = crc32.ChecksumIEEE(footerWithoutChecksum) footerBytes := binary.BigEndian.AppendUint32(footerWithoutChecksum, t.footer.CRC32) // Finally, write the updated header and footer into the file. if _, err := file.WriteAt(footerBytes[:t.footer.Version.HeaderSize()], 0); err != nil { return fmt.Errorf("patch header: %w", err) } if _, err := file.WriteAt(footerBytes, int64(t.footerOffset)); err != nil { return fmt.Errorf("patch footer: %w", err) } if err := file.Sync(); err != nil { return fmt.Errorf("sync: %w", err) } return nil } // Close closes the table's associated file. func (t *Table) Close() error { return t.src.Close() } // ParseTable opens the table at the given path and parses it. Close must be called // once the table is no longer used to close the associated file. func ParseTable(absolutePath string) (_ *Table, returnedErr error) { src, err := os.Open(absolutePath) if err != nil { return nil, fmt.Errorf("open: %w", err) } defer func() { if returnedErr != nil { if err := src.Close(); err != nil { returnedErr = errors.Join(returnedErr, fmt.Errorf("close: %w", err)) } } }() t := &Table{src: src, absolutePath: absolutePath} var h header if err := parseHeader(src, &h); err != nil { return nil, fmt.Errorf("parse header: %w", err) } footerOffset, err := src.Seek(int64(-h.Version.FooterSize()), io.SeekEnd) if err != nil { return nil, fmt.Errorf("seek footer: %w", err) } t.footerOffset = uint(footerOffset) if err := parseFooter(src, &t.footer); err != nil { return nil, fmt.Errorf("parse footer: %w", err) } if h != t.footer.header { return nil, errors.New("footer doesn't match header") } t.blockSize = parseUint24(t.footer.BlockSize) return t, nil } // ReadTablesList returns a list of tables in the "tables.list" for the // reftable backend. func ReadTablesList(repoPath string) ([]Name, error) { tablesListPath := filepath.Join(repoPath, "reftable", "tables.list") data, err := os.ReadFile(tablesListPath) if err != nil { return nil, fmt.Errorf("reading tables.list: %w", err) } lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") names := make([]Name, len(lines)) for i, line := range lines { if names[i], err = ParseName(line); err != nil { return nil, fmt.Errorf("parse name: %w", err) } } return names, nil } // Name contains the structured information in the name of a .ref file. type Name struct { // MinUpdateIndex is the minimum update index contained in the file. MinUpdateIndex uint64 // MinUpdateIndex is the maximum update index contained in the file. MaxUpdateIndex uint64 // Suffix is the random suffix in the table's name. Suffix string } // String returns the string representation of the reftable name. func (n Name) String() string { return fmt.Sprintf("0x%012x-0x%012x-%s.ref", n.MinUpdateIndex, n.MaxUpdateIndex, n.Suffix) } // nameRegex is a regex for matching reftable names // e.g. 0x000000000001-0x00000000000a-b54f3b59.ref would result in the following submatches: // - 000000000001 (UpdateIndexMin) // - 00000000000a (UpdateIndexMax) // - b54f3b59 (Suffix) // // See the reftable documentation at https://www.git-scm.com/docs/reftable#_layout for more // information. var nameRegex = regexp.MustCompile("^0x([[:xdigit:]]{12,16})-0x([[:xdigit:]]{12,16})-([0-9a-zA-Z]{8}).ref$") // ParseName parses the name of a reftable file. func ParseName(reftableName string) (Name, error) { matches := nameRegex.FindStringSubmatch(reftableName) if len(matches) == 0 { return Name{}, fmt.Errorf("reftable name %q malformed", reftableName) } minIndex, err := strconv.ParseUint(matches[1], 16, 64) if err != nil { return Name{}, fmt.Errorf("parsing min index: %w", err) } maxIndex, err := strconv.ParseUint(matches[2], 16, 64) if err != nil { return Name{}, fmt.Errorf("parsing max index: %w", err) } return Name{ MinUpdateIndex: minIndex, MaxUpdateIndex: maxIndex, Suffix: matches[3], }, nil } ================================================ FILE: modules/git/remote.go ================================================ package git import "regexp" var ( isSchemeRegExp = regexp.MustCompile(`^[^:]+://`) // Ref: https://github.com/git/git/blob/master/Documentation/urls.txt#L37 scpLikeUrlRegExp = regexp.MustCompile(`^(?:(?P[^@]+)@)?(?P[^:\s]+):(?:(?P[0-9]{1,5}):)?(?P[^\\].*)$`) ) // MatchesScheme returns true if the given string matches a URL-like // format scheme. func MatchesScheme(url string) bool { return isSchemeRegExp.MatchString(url) } // MatchesScpLike returns true if the given string matches an SCP-like // format scheme. func MatchesScpLike(url string) bool { return scpLikeUrlRegExp.MatchString(url) } // IsLocalEndpoint returns true if the given URL string specifies a // local file endpoint. For example, on a Linux machine, // `/home/user/src/go-git` would match as a local endpoint, but // `https://github.com/src-d/go-git` would not. func IsLocalEndpoint(url string) bool { return !MatchesScheme(url) && !MatchesScpLike(url) } // FindScpLikeComponents returns the user, host, port and path of the // given SCP-like URL. func FindScpLikeComponents(url string) (user, host, port, path string) { m := scpLikeUrlRegExp.FindStringSubmatch(url) return m[1], m[2], m[3], m[4] } ================================================ FILE: modules/git/repo.go ================================================ package git import ( "context" "errors" "fmt" "strings" "github.com/antgroup/hugescm/modules/command" ) func IsBareRepository(ctx context.Context, repoPath string) bool { cmd := command.New(ctx, command.NoDir, "git", "--git-dir", repoPath, "config", "--get", "core.bare") v, err := cmd.OneLine() if err != nil { return false } return strings.EqualFold(v, "true") } const ( differentHashErr = "fatal: attempt to reinitialize repository with different hash" invalidBranchNameErr = "fatal: invalid initial branch name" ) var ( ErrDifferentHash = errors.New("attempt to reinitialize repository with different hash") ErrInvalidBranchName = errors.New("invalid initial branch name") ) func NewRepo(ctx context.Context, repoPath, branch string, bare bool, shaFormat HashFormat) error { branch = strings.TrimPrefix(branch, refHeadPrefix) stderr := command.NewStderr() psArgs := []string{"init", "--initial-branch=" + branch, "--object-format=" + shaFormat.String()} if bare { psArgs = append(psArgs, "--bare") } psArgs = append(psArgs, repoPath) cmd := command.NewFromOptions(ctx, &command.RunOpts{ Stderr: stderr, }, "git", psArgs...) if err := cmd.RunEx(); err != nil { message := stderr.String() if strings.HasPrefix(message, differentHashErr) { return ErrDifferentHash } if strings.HasPrefix(message, invalidBranchNameErr) { return ErrInvalidBranchName } return fmt.Errorf("initialize repo %s error %w stderr: %s", repoPath, err, message) } return nil } ================================================ FILE: modules/git/repo_test.go ================================================ package git import ( "fmt" "os" "path/filepath" "runtime" "testing" ) func TestIsBareRepository(t *testing.T) { _, filename, _, _ := runtime.Caller(0) repoPath := RevParseRepoPath(t.Context(), filepath.Dir(filename)) fmt.Fprintf(os.Stderr, "IsBareRepository %v\n", IsBareRepository(t.Context(), repoPath)) } ================================================ FILE: modules/git/signature.go ================================================ // Copyright 2015 The Gogs Authors. All rights reserved. // Copyright 2019 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package git import ( "bytes" "fmt" "io" "strconv" "time" ) const ( // GitTimeLayout is the (default) time layout used by git. GitTimeLayout = "Mon Jan _2 15:04:05 2006 -0700" ) // Signature represents the Author or Committer information. type Signature struct { // Name represents a person name. It is an arbitrary string. Name string `json:"name"` // Email is an email, but it cannot be assumed to be well-formed. Email string `json:"email"` // When is the timestamp of the signature. When time.Time `json:"when"` } const ( formatTimeZoneOnly = "-0700" ) // String implements the fmt.Stringer interface and formats a Signature as // expected in the Git commit internal object format. For instance: // // Taylor Blau 1494258422 -0600 func (s *Signature) String() string { at := s.When.Unix() zone := s.When.Format(formatTimeZoneOnly) return fmt.Sprintf("%s <%s> %d %s", s.Name, s.Email, at, zone) } // Decode decodes a byte array representing a signature to signature func (s *Signature) Decode(b []byte) { sig, _ := newSignatureFromCommitLine(b) s.Email = sig.Email s.Name = sig.Name s.When = sig.When } // Helper to get a signature from the commit line, which looks like these: // // author Patrick Gundlach 1378823654 +0200 // author Patrick Gundlach Thu, 07 Apr 2005 22:13:13 +0200 // // but without the "author " at the beginning (this method should) // be used for author and committer. func newSignatureFromCommitLine(line []byte) (sig *Signature, err error) { sig = new(Signature) emailStart := bytes.LastIndexByte(line, '<') emailEnd := bytes.LastIndexByte(line, '>') if emailStart == -1 || emailEnd == -1 || emailEnd < emailStart { return } sig.Name = string(line[:emailStart-1]) sig.Email = string(line[emailStart+1 : emailEnd]) hasTime := emailEnd+2 < len(line) if !hasTime { return } // Check date format. firstChar := line[emailEnd+2] if firstChar >= 48 && firstChar <= 57 { idx := bytes.IndexByte(line[emailEnd+2:], ' ') if idx < 0 { return } timestring := string(line[emailEnd+2 : emailEnd+2+idx]) seconds, _ := strconv.ParseInt(timestring, 10, 64) sig.When = time.Unix(seconds, 0) idx += emailEnd + 3 if idx >= len(line) || idx+5 > len(line) { return } timezone := string(line[idx : idx+5]) tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64) tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64) if err1 != nil || err2 != nil { return } if tzhours < 0 { tzmins *= -1 } tz := time.FixedZone("", int(tzhours*60*60+tzmins*60)) sig.When = sig.When.In(tz) } else { sig.When, err = time.Parse(GitTimeLayout, string(line[emailEnd+2:])) if err != nil { return } } return sig, err } func (s *Signature) Encode(w io.Writer) error { if _, err := fmt.Fprintf(w, "%s <%s> ", s.Name, s.Email); err != nil { return err } if err := s.encodeTimeAndTimeZone(w); err != nil { return err } return nil } func (s *Signature) encodeTimeAndTimeZone(w io.Writer) error { u := max(s.When.Unix(), 0) _, err := fmt.Fprintf(w, "%d %s", u, s.When.Format("-0700")) return err } func SignatureFromLine(line string) *Signature { if signature, err := newSignatureFromCommitLine([]byte(line)); err == nil { return signature } return &Signature{} } ================================================ FILE: modules/git/stats/commit-graph.go ================================================ // Copyright (c) 2016-present GitLab Inc. // SPDX-License-Identifier: MIT package stats import ( "bufio" "bytes" "errors" "fmt" "os" "path/filepath" ) // CommitGraphInfo returns information about the commit-graph of a repository. type CommitGraphInfo struct { // Exists tells whether a commit-graph exists. Exists bool `json:"exists"` // CommitGraphChainLength is the length of the commit-graph chain, if it exists. If the // repository does not have a commit-graph chain but a monolithic commit-graph, then this // field will be set to 0. CommitGraphChainLength uint64 `json:"commit_graph_chain_length"` // HasBloomFilters tells whether the commit-graph has bloom filters. Bloom filters are used // to answer the question whether a certain path has been changed in the commit the bloom // filter applies to. HasBloomFilters bool `json:"has_bloom_filters"` // HasGenerationData tells whether the commit-graph has generation data. Generation // data is stored as the corrected committer date, which is defined as the maximum // of the commit's own committer date or the corrected committer date of any of its // parents. This data can be used to determine whether a commit A comes after a // certain commit B. HasGenerationData bool `json:"has_generation_data"` // HasGenerationDataOverflow stores overflow data in case the corrected committer // date takes more than 31 bits to represent. HasGenerationDataOverflow bool `json:"has_generation_data_overflow"` } // CommitGraphInfoForRepository derives information about commit-graphs in the repository. // // Please refer to https://git-scm.com/docs/commit-graph#_file_layout for further information about // the commit-graph format. func CommitGraphInfoForRepository(repoPath string) (CommitGraphInfo, error) { const chunkTableEntrySize = 12 var info CommitGraphInfo commitGraphChainPath := filepath.Join(repoPath, "objects", "info", "commit-graphs", "commit-graph-chain") var commitGraphPaths []string // We first try to read the commit-graphs-chain in the repository. if chainData, err := os.ReadFile(commitGraphChainPath); err != nil { if !errors.Is(err, os.ErrNotExist) { return CommitGraphInfo{}, fmt.Errorf("reading commit-graphs chain: %w", err) } // If we couldn't find it, we check whether the monolithic commit-graph file exists // and use that instead. commitGraphPath := filepath.Join(repoPath, "objects", "info", "commit-graph") if _, err := os.Stat(commitGraphPath); err != nil { if errors.Is(err, os.ErrNotExist) { return CommitGraphInfo{Exists: false}, nil } return CommitGraphInfo{}, fmt.Errorf("statting commit-graph: %w", err) } commitGraphPaths = []string{commitGraphPath} info.Exists = true } else { // Otherwise, if we have found the commit-graph-chain, we use the IDs it contains as // the set of commit-graphs to check further down below. ids := bytes.Split(bytes.TrimSpace(chainData), []byte{'\n'}) commitGraphPaths = make([]string, 0, len(ids)) for _, id := range ids { commitGraphPaths = append(commitGraphPaths, filepath.Join(repoPath, "objects", "info", "commit-graphs", fmt.Sprintf("graph-%s.graph", id)), ) } info.Exists = true info.CommitGraphChainLength = uint64(len(commitGraphPaths)) } for _, graphFilePath := range commitGraphPaths { graphFile, err := os.Open(graphFilePath) if err != nil { if errors.Is(err, os.ErrNotExist) { // concurrently modified continue } return CommitGraphInfo{}, fmt.Errorf("read commit graph chain file: %w", err) } defer graphFile.Close() // nolint reader := bufio.NewReader(graphFile) // The header format is defined in gitformat-commit-graph(5). header := []byte{ 0, 0, 0, 0, // 4-byte signature: The signature is: {'C', 'G', 'P', 'H'} 0, // 1-byte version number: Currently, the only valid version is 1. 0, // 1-byte Hash Version 0, // 1-byte number (C) of "chunks" 0, // 1-byte number (B) of base commit-graphs } if n, err := reader.Read(header); err != nil { return CommitGraphInfo{}, fmt.Errorf("read commit graph file %q header: %w", graphFilePath, err) } else if n != len(header) { return CommitGraphInfo{}, fmt.Errorf("commit graph file %q is too small, no header", graphFilePath) } if !bytes.Equal(header[:4], []byte("CGPH")) { return CommitGraphInfo{}, fmt.Errorf("commit graph file %q doesn't have signature", graphFilePath) } if header[4] != 1 { return CommitGraphInfo{}, fmt.Errorf("commit graph file %q has unsupported version number: %v", graphFilePath, header[4]) } C := header[6] // number (C) of "chunks" table := make([]byte, (C+1)*chunkTableEntrySize) if n, err := reader.Read(table); err != nil { return CommitGraphInfo{}, fmt.Errorf("read commit graph file %q table of contents for the chunks: %w", graphFilePath, err) } else if n != len(table) { return CommitGraphInfo{}, fmt.Errorf("commit graph file %q is too small, no table of contents", graphFilePath) } if err := graphFile.Close(); err != nil { return CommitGraphInfo{}, fmt.Errorf("commit graph file %q close: %w", graphFilePath, err) } if !info.HasBloomFilters { info.HasBloomFilters = bytes.Contains(table, []byte("BIDX")) && bytes.Contains(table, []byte("BDAT")) } if !info.HasGenerationData { info.HasGenerationData = bytes.Contains(table, []byte("GDA2")) } if !info.HasGenerationDataOverflow { info.HasGenerationDataOverflow = bytes.Contains(table, []byte("GDO2")) } } return info, nil } ================================================ FILE: modules/git/stats/status.go ================================================ // Copyright (c) 2016-present GitLab Inc. // SPDX-License-Identifier: MIT package stats import ( "bufio" "bytes" "context" "encoding/binary" "errors" "fmt" "io" "io/fs" "os" "path/filepath" "strings" "time" "github.com/antgroup/hugescm/modules/git" "github.com/antgroup/hugescm/modules/git/reftable" ) const ( // StaleObjectsGracePeriod is time delta that is used to indicate cutoff wherein an object // would be considered old. Currently this is set to being 10 days. StaleObjectsGracePeriod = -10 * 24 * time.Hour ) // PackfilesCount returns the number of packfiles a repository has. func PackfilesCount(repoPath string) (uint64, error) { packfilesInfo, err := PackfilesStatus(repoPath) if err != nil { return 0, fmt.Errorf("deriving packfiles info: %w", err) } return packfilesInfo.Count, nil } // LooseObjects returns the number of loose objects that are not in a packfile. func LooseObjects(repoPath string) (uint64, error) { objectsInfo, err := LooseObjectsStatus(repoPath, time.Now()) if err != nil { return 0, err } return objectsInfo.Count, nil } // Stat contains information about the repository. type Stat struct { // LooseObjects contains information about loose objects. LooseObjects LooseObjectsStat `json:"loose_objects"` // Packfiles contains information about packfiles. Packfiles PackfilesStat `json:"packfiles"` // References contains information about the repository's references. References ReferencesStat `json:"references"` // CommitGraph contains information about the repository's commit-graphs. CommitGraph CommitGraphInfo `json:"commit_graph"` LFS LFSObjectsStat `json:"lfs"` } // Status computes the RepositoryInfo for a repository. func Status(ctx context.Context, repoPath string, refFormat string) (Stat, error) { var si Stat var err error si.LooseObjects, err = LooseObjectsStatus(repoPath, time.Now().Add(StaleObjectsGracePeriod)) if err != nil { return Stat{}, fmt.Errorf("counting loose objects: %w", err) } si.Packfiles, err = PackfilesStatus(repoPath) if err != nil { return Stat{}, fmt.Errorf("counting packfiles: %w", err) } si.References, err = ReferencesStatus(ctx, repoPath, refFormat) if err != nil { return Stat{}, fmt.Errorf("checking references: %w", err) } si.CommitGraph, err = CommitGraphInfoForRepository(repoPath) if err != nil { return Stat{}, fmt.Errorf("checking commit-graph info: %w", err) } si.LFS, _ = LFSObjectsStatus(repoPath) return si, nil } // ReferencesStat contains information about references. type ReferencesStat struct { // LooseReferencesCount is the number of unpacked, loose references that exist. LooseReferencesCount uint64 `json:"loose_references_count"` // PackedReferencesSize is the size of the packed-refs file in bytes. PackedReferencesSize uint64 `json:"packed_references_size"` // ReftableTables contains details of individual table files. ReftableTables []ReftableTable `json:"reftable_tables"` // ReftableUnrecognizedFilesCount is the number of files under the `reftables/` // directory that shouldn't exist, according to the entries in `tables.list`. ReftableUnrecognizedFilesCount uint64 `json:"reftable_unrecognized_files"` // ReferenceBackendName denotes the reference backend name of the repo. ReferenceBackendName string `json:"reference_backend"` } // ReftableTable contains information about an individual reftable table. type ReftableTable struct { // Size is the size in bytes. Size uint64 `json:"size"` // UpdateIndexMin is the min_update_index of the reftable table. This is derived // from the filename only. UpdateIndexMin uint64 `json:"update_index_min"` // UpdateIndexMax is the max_update_index of the reftable table. This is derived // from the filename only. UpdateIndexMax uint64 `json:"update_index_max"` } // ReferencesStatus derives information about references in the repository. func ReferencesStatus(ctx context.Context, repoPath string, refFormat string) (ReferencesStat, error) { var info ReferencesStat info.ReferenceBackendName = refFormat switch info.ReferenceBackendName { case "files": refsPath := filepath.Join(repoPath, "refs") if err := filepath.WalkDir(refsPath, func(path string, entry fs.DirEntry, err error) error { if err != nil { // It may happen that references got deleted concurrently. This is fine and expected, so we just // ignore any such errors. if errors.Is(err, os.ErrNotExist) { return nil } return err } if !entry.IsDir() { info.LooseReferencesCount++ } return nil }); err != nil { return ReferencesStat{}, fmt.Errorf("counting loose refs: %w", err) } if stat, err := os.Stat(filepath.Join(repoPath, "packed-refs")); err != nil { if !errors.Is(err, os.ErrNotExist) { return ReferencesStat{}, fmt.Errorf("getting packed-refs size: %w", err) } } else { info.PackedReferencesSize = uint64(stat.Size()) } case "reftable": refsPath := filepath.Join(repoPath, "reftable") tablesList, err := os.Open(filepath.Join(refsPath, "tables.list")) if err != nil { return ReferencesStat{}, fmt.Errorf("open tables.list: %w", err) } defer tablesList.Close() // nolint // Track the expected files under the `reftable/` directory. reftableRecognizedFiles := map[string]struct{}{ "tables.list": {}, "tables.list.lock": {}, } scanner := bufio.NewScanner(tablesList) scanner.Split(bufio.ScanLines) for scanner.Scan() { reftableName := scanner.Text() reftableRecognizedFiles[reftableName] = struct{}{} reftableStat, err := os.Stat(filepath.Join(refsPath, reftableName)) if err != nil { return ReferencesStat{}, fmt.Errorf("stat reftable table file: %w", err) } name, err := reftable.ParseName(reftableName) if err != nil { return ReferencesStat{}, fmt.Errorf("parse reftable name: %w", err) } info.ReftableTables = append(info.ReftableTables, ReftableTable{ Size: uint64(reftableStat.Size()), UpdateIndexMin: name.MinUpdateIndex, UpdateIndexMax: name.MaxUpdateIndex, }) } reftableDir, err := os.ReadDir(refsPath) if err != nil { return ReferencesStat{}, fmt.Errorf("read reftable dir: %w", err) } for _, fname := range reftableDir { if _, ok := reftableRecognizedFiles[fname.Name()]; !ok { info.ReftableUnrecognizedFilesCount++ } } } return info, nil } // LooseObjectsStat contains information about loose objects. type LooseObjectsStat struct { // Count is the number of loose objects. Count uint64 `json:"count"` // Size is the total size of all loose objects in bytes. Size uint64 `json:"size"` // StaleCount is the number of stale loose objects when taking into account the specified cutoff // date. StaleCount uint64 `json:"stale_count"` // StaleSize is the total size of stale loose objects when taking into account the specified // cutoff date. StaleSize uint64 `json:"stale_size"` // GarbageCount is the number of garbage files in the loose-objects shards. GarbageCount uint64 `json:"garbage_count"` // GarbageSize is the total size of garbage in the loose-objects shards. GarbageSize uint64 `json:"garbage_size"` } // LooseObjectsStatus derives information about loose objects in the repository. If a // cutoff date is given, then this function will only take into account objects which are older than // the given point in time. func LooseObjectsStatus(repoPath string, cutoffDate time.Time) (LooseObjectsStat, error) { var info LooseObjectsStat for i := 0; i <= 0xFF; i++ { entries, err := os.ReadDir(filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i))) if err != nil { if errors.Is(err, os.ErrNotExist) { continue } return LooseObjectsStat{}, fmt.Errorf("reading loose object shard: %w", err) } for _, entry := range entries { entryInfo, err := entry.Info() if err != nil { if errors.Is(err, fs.ErrNotExist) { continue } return LooseObjectsStat{}, fmt.Errorf("reading object info: %w", err) } if !isValidLooseObjectName(entry.Name()) { info.GarbageCount++ info.GarbageSize += uint64(entryInfo.Size()) continue } // Note: we don't `continue` here as we count stale objects into the total // number of objects. if entryInfo.ModTime().Before(cutoffDate) { info.StaleCount++ info.StaleSize += uint64(entryInfo.Size()) } info.Count++ info.Size += uint64(entryInfo.Size()) } } return info, nil } func isValidLooseObjectName(s string) bool { for _, c := range []byte(s) { if strings.IndexByte("0123456789abcdef", c) < 0 { return false } } return true } type PackEntry struct { Name string `json:"name"` Size uint64 `json:"size"` } // PackfilesStat contains information about packfiles. type PackfilesStat struct { // Count is the number of all packfiles, including stale and kept ones. Count uint64 `json:"count"` // Size is the total size of all packfiles in bytes, including stale and kept ones. Size uint64 `json:"size"` // PackEntries small pack count PackEntries []PackEntry `json:"entries"` // ReverseIndexCount is the number of reverse indices. ReverseIndexCount uint64 `json:"reverse_index_count"` // CruftCount is the number of cruft packfiles which have a .mtimes file. CruftCount uint64 `json:"cruft_count"` // CruftSize is the size of cruft packfiles which have a .mtimes file. CruftSize uint64 `json:"cruft_size"` // KeepCount is the number of .keep packfiles. KeepCount uint64 `json:"keep_count"` // KeepSize is the size of .keep packfiles. KeepSize uint64 `json:"keep_size"` // GarbageCount is the number of garbage files. GarbageCount uint64 `json:"garbage_count"` // GarbageSize is the total size of all garbage files in bytes. GarbageSize uint64 `json:"garbage_size"` // Bitmap contains information about the bitmap, if any exists. Bitmap BitmapStat `json:"bitmap"` // MultiPackIndex confains information about the multi-pack-index, if any exists. MultiPackIndex MultiPackIndexStat `json:"multi_pack_index"` // MultiPackIndexBitmap contains information about the bitmap for the multi-pack-index, if // any exists. MultiPackIndexBitmap BitmapStat `json:"multi_pack_index_bitmap"` } const ( LargePackThreshold uint64 = 2 * 1024 * 1024 * 1024 PackSizeTotal uint64 = 8 * 1024 * 1024 * 1024 ) func (pi PackfilesStat) NoLargePack() bool { for _, e := range pi.PackEntries { if e.Size > LargePackThreshold { return false } } return pi.Size < PackSizeTotal } // PackfilesStatus derives various information about packfiles for the given repository. func PackfilesStatus(repoPath string) (PackfilesStat, error) { packfilesPath := filepath.Join(repoPath, "objects", "pack") entries, err := os.ReadDir(packfilesPath) if err != nil { if errors.Is(err, os.ErrNotExist) { return PackfilesStat{}, nil } return PackfilesStat{}, err } packfilesMetadata := classifyPackfiles(entries) var info PackfilesStat for _, entry := range entries { entryName := entry.Name() switch { case hasPrefixAndSuffix(entryName, "pack-", ".pack"): size, err := entrySize(entry) if err != nil { return PackfilesStat{}, fmt.Errorf("getting packfile size: %w", err) } info.Count++ info.Size += size metadata := packfilesMetadata[entryName] switch { case metadata.hasKeep: info.KeepCount++ info.KeepSize += size case metadata.hasMtimes: info.CruftCount++ info.CruftSize += size default: info.PackEntries = append(info.PackEntries, PackEntry{Name: entryName, Size: size}) } case hasPrefixAndSuffix(entryName, "pack-", ".idx"): // We ignore normal indices as every packfile would have one anyway, or // otherwise the repository would be corrupted. case hasPrefixAndSuffix(entryName, "pack-", ".keep"): // We classify .keep files above. case hasPrefixAndSuffix(entryName, "pack-", ".mtimes"): // We classify .mtimes files above. case hasPrefixAndSuffix(entryName, "pack-", ".rev"): info.ReverseIndexCount++ case hasPrefixAndSuffix(entryName, "pack-", ".bitmap"): bitmap, err := BitmapStatus(filepath.Join(packfilesPath, entryName)) if err != nil { return PackfilesStat{}, fmt.Errorf("reading bitmap info: %w", err) } info.Bitmap = bitmap case entryName == "multi-pack-index": midxInfo, err := MultiPackIndexStatus(filepath.Join(packfilesPath, entryName)) if err != nil { return PackfilesStat{}, fmt.Errorf("reading multi-pack-index: %w", err) } info.MultiPackIndex = midxInfo case hasPrefixAndSuffix(entryName, "multi-pack-index-", ".bitmap"): bitmap, err := BitmapStatus(filepath.Join(packfilesPath, entryName)) if err != nil { return PackfilesStat{}, fmt.Errorf("reading multi-pack-index bitmap info: %w", err) } info.MultiPackIndexBitmap = bitmap default: size, err := entrySize(entry) if err != nil { if errors.Is(err, os.ErrNotExist) { // Unrecognized files may easily be temporary files written // by Git. It is expected that these may get concurrently // removed, so we just ignore the case where they've gone // missing. continue } return PackfilesStat{}, fmt.Errorf("getting garbage size: %w", err) } info.GarbageCount++ info.GarbageSize += size } } return info, nil } type packfileMetadata struct { hasKeep, hasMtimes bool } // classifyPackfiles classifies all directory entries that look like packfiles and derives whether // they have specific metadata or not. It returns a map of packfile names with the respective // metadata that has been found. func classifyPackfiles(entries []fs.DirEntry) map[string]packfileMetadata { packfileInfos := map[string]packfileMetadata{} for _, entry := range entries { if !strings.HasPrefix(entry.Name(), "pack-") { continue } extension := filepath.Ext(entry.Name()) packfileName := strings.TrimSuffix(entry.Name(), extension) + ".pack" packfileMetadata := packfileInfos[packfileName] switch extension { case ".keep": packfileMetadata.hasKeep = true case ".mtimes": packfileMetadata.hasMtimes = true } packfileInfos[packfileName] = packfileMetadata } return packfileInfos } func entrySize(entry fs.DirEntry) (uint64, error) { entryInfo, err := entry.Info() if err != nil { return 0, fmt.Errorf("getting file info: %w", err) } if entryInfo.Size() >= 0 { return uint64(entryInfo.Size()), nil } return 0, nil } func hasPrefixAndSuffix(s, prefix, suffix string) bool { return strings.HasPrefix(s, prefix) && strings.HasSuffix(s, suffix) } // BitmapStat contains information about a packfile or multi-pack-index bitmap. type BitmapStat struct { // Exists indicates whether the bitmap exists. This field would usually always be `true` // when read via `BitmapInfoForPath()`, but helps when the bitmap info is embedded into // another structure where it may only be conditionally read. Exists bool `json:"exists"` // Version is the version of the bitmap. Currently, this is expected to always be 1. Version uint16 `json:"version"` // HasHashCache indicates whether the name hash cache extension exists in the bitmap. This // extension records hashes of the path at which trees or blobs are found at the time of // writing the packfile so that it becomes possible to quickly find objects stored at the // same path. This mechanism is fed into the delta compression machinery to make the delta // heuristics more effective. HasHashCache bool `json:"has_hash_cache"` // HasLookupTable indicates whether the lookup table exists in the bitmap. Lookup tables // allow to defer loading bitmaps until required and thus speed up read-only bitmap // preparations. HasLookupTable bool `json:"has_lookup_table"` } // BitmapStatus reads the bitmap at the given path and returns information on that bitmap. func BitmapStatus(path string) (BitmapStat, error) { // The bitmap header is defined in // https://github.com/git/git/blob/master/Documentation/technical/bitmap-format.txt. bitmapHeader := []byte{ 0, 0, 0, 0, // 4-byte signature 0, 0, // 2-byte version number in network byte order 0, 0, // 2-byte flags in network byte order } file, err := os.Open(path) if err != nil { return BitmapStat{}, fmt.Errorf("opening bitmap: %w", err) } defer file.Close() // nolint if _, err := io.ReadFull(file, bitmapHeader); err != nil { return BitmapStat{}, fmt.Errorf("reading bitmap header: %w", err) } if !bytes.Equal(bitmapHeader[0:4], []byte{'B', 'I', 'T', 'M'}) { return BitmapStat{}, fmt.Errorf("invalid bitmap signature: %q", string(bitmapHeader[0:4])) } version := binary.BigEndian.Uint16(bitmapHeader[4:6]) if version != 1 { return BitmapStat{}, fmt.Errorf("unsupported version: %d", version) } flags := binary.BigEndian.Uint16(bitmapHeader[6:8]) return BitmapStat{ Exists: true, Version: version, HasHashCache: flags&0x4 == 0x4, HasLookupTable: flags&0x10 == 0x10, }, nil } type MultiPackIndexStat struct { // Exists determines whether the multi-pack-index exists or not. Exists bool `json:"exists"` // Version is the version of the multi-pack-index. Currently, Git only recognizes version 1. Version uint8 `json:"version"` // PackfileCount is the count of packfiles that the multi-pack-index tracks. PackfileCount uint64 `json:"packfile_count"` } // MultiPackIndexStatus reads the multi-pack-index at the given path and returns information on // it. Returns an error in case the file cannot be read or in case its format is not understood. func MultiPackIndexStatus(path string) (MultiPackIndexStat, error) { // Please refer to gitformat-pack(5) for the definition of the multi-pack-index header. midxHeader := []byte{ 0, 0, 0, 0, // 4-byte signature 0, // 1-byte version number 0, // 1-byte object ID version 0, // 1-byte number of chunks 0, // 1-byte number of base multi-pack-index files 0, 0, 0, 0, // 4-byte number of packfiles } file, err := os.Open(path) if err != nil { return MultiPackIndexStat{}, fmt.Errorf("opening multi-pack-index: %w", err) } defer file.Close() // nolint if _, err := io.ReadFull(file, midxHeader); err != nil { return MultiPackIndexStat{}, fmt.Errorf("reading header: %w", err) } if !bytes.Equal(midxHeader[0:4], []byte{'M', 'I', 'D', 'X'}) { return MultiPackIndexStat{}, fmt.Errorf("invalid signature: %q", string(midxHeader[0:4])) } version := midxHeader[4] if version != 1 { return MultiPackIndexStat{}, fmt.Errorf("invalid version: %d", version) } baseFiles := midxHeader[7] if baseFiles != 0 { return MultiPackIndexStat{}, fmt.Errorf("unsupported number of base files: %d", baseFiles) } packfileCount := binary.BigEndian.Uint32(midxHeader[8:12]) return MultiPackIndexStat{ Exists: true, Version: version, PackfileCount: uint64(packfileCount), }, nil } type LFSObjectsStat struct { Count uint64 `json:"count"` Size uint64 `json:"size"` } func LFSObjectsStatus(repoPath string) (LFSObjectsStat, error) { var si LFSObjectsStat err := filepath.WalkDir(filepath.Join(repoPath, "lfs/objects"), func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return nil } name := d.Name() if !git.IsValidateSHA256(name) { return nil } fi, err := d.Info() if err != nil { return err } si.Count++ si.Size += uint64(fi.Size()) return nil }) return si, err } ================================================ FILE: modules/git/tag.go ================================================ package git import ( "bufio" "context" "fmt" "io" "strings" "github.com/antgroup/hugescm/modules/command" ) func JoinTagPrefix(tag string) string { if strings.HasPrefix(tag, refTagPrefix) { return tag } return refTagPrefix + tag } type Tag struct { // Hash of the tag. Hash string `json:"hash"` // Name of the tag. Name string `json:"name"` // Object is the hash of the target object. Object string `json:"object"` // Type is the object type of the target. Type string `json:"type"` // Tagger is the one who created the tag. Tagger Signature `json:"tagger"` // Content is an arbitrary text message. Content string `json:"content"` size int64 } func (t *Tag) Size() int64 { return t.size } func (t *Tag) Extract() (message string, signature string) { if i := strings.Index(t.Content, "-----BEGIN"); i > 0 { return t.Content[:i], t.Content[i:] } return t.Content, "" } func (t *Tag) Message() string { m, _ := t.Extract() return m } func (t *Tag) ExtractCommitGPGSignature() *CommitGPGSignature { message, signature := t.Extract() if len(signature) == 0 { return nil } var w strings.Builder var err error if _, err = fmt.Fprintf(&w, "object %s\ntype %s\ntag %s\ntagger ", t.Object, t.Type, t.Name); err != nil { return nil } if err = t.Tagger.Encode(&w); err != nil { return nil } if _, err = fmt.Fprintf(&w, "\n\n"); err != nil { return nil } if _, err = w.WriteString(message); err != nil { return nil } return &CommitGPGSignature{ Signature: signature, Payload: strings.TrimSpace(w.String()) + "\n", } } // https://git-scm.com/docs/signature-format // https://github.blog/changelog/2022-08-23-ssh-commit-verification-now-supported/ func (t *Tag) Decode(hash string, reader io.Reader, size int64) error { t.Hash = hash t.size = size r, ok := reader.(*bufio.Reader) if !ok { r = bufio.NewReader(reader) } for { line, readErr := r.ReadString('\n') if readErr != nil && readErr != io.EOF { return readErr } line = strings.TrimSpace(line) if len(line) == 0 { break // Start of message } field, value, ok := strings.Cut(line, " ") if !ok { break } switch field { case "object": t.Object = value case "type": t.Type = value case "tag": t.Name = value case "tagger": t.Tagger.Decode([]byte(value)) } if readErr == io.EOF { return nil } } data, err := io.ReadAll(r) if err != nil { return err } t.Content = string(data) return nil } func FindTag(ctx context.Context, repoPath string, name string) (*Reference, error) { stderr := command.NewStderr() reader, err := NewReader(ctx, &command.RunOpts{RepoPath: repoPath, Stderr: stderr}, "tag", "-l", "--format", ReferenceLineFormat, "--", name) if err != nil { return nil, err } defer reader.Close() // nolint scanner := bufio.NewScanner(reader) if scanner.Scan() { return ParseOneReference(scanner.Text()) } return nil, NewTagNotFound(name) } ================================================ FILE: modules/git/tree.go ================================================ package git import ( "bufio" "encoding/hex" "io" "strconv" "strings" ) // We define these here instead of using the system ones because not all // operating systems use the traditional values. For example, zOS uses // different values. const ( sIFMT = FileMode(0170000) sIFREG = FileMode(0100000) sIFDIR = FileMode(0040000) sIFLNK = FileMode(0120000) sIFGITLINK = FileMode(0160000) ) // Tree encapsulates a Git tree object. type Tree struct { // Hash of the tree object. Hash string `json:"hash"` // Entries is the list of entries held by this tree. Entries []*TreeEntry `json:"entries"` size int64 } // TreeEntry encapsulates information about a single tree entry in a tree // listing. type TreeEntry struct { // Name is the entry name relative to the tree in which this entry is // contained. Name string `json:"name"` // Hash is the object ID (Hex) for this tree entry. Hash string `json:"hash"` // Filemode is the filemode of this tree entry on disk. Filemode FileMode `json:"mode"` } func (t *Tree) Size() int64 { return t.size } // Decode implements Object.Decode and decodes the uncompressed tree being // read. It returns the number of uncompressed bytes being consumed off of the // stream, which should be strictly equal to the size given. // // If any error was encountered along the way, that will be returned, along with // the number of bytes read up to that point. func (t *Tree) Decode(hash string, from io.Reader, size int64) (n int, err error) { t.Hash = hash t.size = size buf := bufio.NewReader(from) hashSize := len(t.Hash) / 2 var entries []*TreeEntry for { modes, err := buf.ReadString(' ') if err != nil { if err == io.EOF { break } return n, err } n += len(modes) modes = strings.TrimSuffix(modes, " ") mode, _ := strconv.ParseInt(modes, 8, 32) fname, err := buf.ReadString('\x00') if err != nil { return n, err } n += len(fname) fname = strings.TrimSuffix(fname, "\x00") var sha [GIT_SHA256_RAWSZ]byte if _, err = io.ReadFull(buf, sha[:hashSize]); err != nil { return n, err } n += hashSize entries = append(entries, &TreeEntry{ Name: fname, Hash: hex.EncodeToString(sha[:hashSize]), Filemode: FileMode(mode), }) } t.Entries = entries return n, nil } // Type is the type of entry (either blob: BlobObjectType, or a sub-tree: // TreeObjectType). func (e *TreeEntry) Type() string { switch e.Filemode & sIFMT { case sIFREG: return "blob" case sIFDIR: return "tree" case sIFLNK: return "blob" case sIFGITLINK: return "commit" default: return "unknown" } } // IsLink returns true if the given TreeEntry is a blob which represents a // symbolic link (i.e., with a filemode of 0120000. func (e *TreeEntry) IsLink() bool { return e.Filemode&sIFMT == sIFLNK } ================================================ FILE: modules/git/updateref.go ================================================ // Copyright (c) 2016-present GitLab Inc. // SPDX-License-Identifier: MIT package git import ( "bufio" "bytes" "context" "errors" "fmt" "io" "github.com/antgroup/hugescm/modules/command" ) var ( errClosed = errors.New("closed") ) // state represents a possible state the updater can be in. type state string const ( // stateIdle means the updater is ready for a new transaction to start. stateIdle state = "idle" // stateStarted means the updater has an open transaction and accepts // new reference changes. stateStarted state = "started" // statePrepared means the updater has prepared a transaction and no longer // accepts reference changes until the current transaction is committed and // a new one started. statePrepared state = "prepared" ) type RefUpdater struct { cmd *command.Command closeErr error stdin io.WriteCloser stdout io.ReadCloser reader *bufio.Reader stderr *bytes.Buffer shaFormat HashFormat ctx context.Context // state tracks the current state of the updater to ensure correct calling semantics. state state } func NewRefUpdater(ctx context.Context, repoPath string, environ []string, noDeref bool) (*RefUpdater, error) { shaFormat := HashFormatOK(repoPath) psArgs := []string{"update-ref", "-z", "--stdin"} if noDeref { psArgs = append(psArgs, "--no-deref") } // repoPath, environ var stderr bytes.Buffer cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: environ, RepoPath: repoPath, Stderr: &stderr, }, "git", psArgs...) stdin, err := cmd.StdinPipe() if err != nil { return nil, err } stdout, err := cmd.StdoutPipe() if err != nil { _ = stdin.Close() return nil, err } if err := cmd.Start(); err != nil { _ = stdin.Close() _ = stdout.Close() return nil, err } u := &RefUpdater{ cmd: cmd, stdout: stdout, stdin: stdin, stderr: &stderr, reader: bufio.NewReader(stdout), shaFormat: shaFormat, ctx: ctx, state: stateIdle, } return u, nil } // expectState returns an error and closes the updater if it is not in the expected state. func (u *RefUpdater) expectState(expected state) error { if u.closeErr != nil { return u.closeErr } if err := u.checkState(expected); err != nil { return u.closeWithError(err) } return nil } // checkState returns an error if the updater is not in the expected state. func (u *RefUpdater) checkState(expected state) error { if u.state != expected { return fmt.Errorf("expected state %q but it was %q", expected, u.state) } return nil } // Start begins a new reference transaction. The reference changes are not performed until Commit // is explicitly called. func (u *RefUpdater) Start() error { if err := u.expectState(stateIdle); err != nil { return err } u.state = stateStarted return u.setState("start") } // Update commands the reference to be updated to point at the object ID specified in newOID. If // newOID is the zero OID, then the branch will be deleted. If oldOID is a non-empty string, then // the reference will only be updated if its current value matches the old value. If the old value // is the zero OID, then the branch must not exist. // // A reference transaction must be started before calling Update. func (u *RefUpdater) Update(reference ReferenceName, newRev, oldRev string) error { if err := u.expectState(stateStarted); err != nil { return err } return u.write("update %s\x00%s\x00%s\x00", reference.String(), newRev, oldRev) } // UpdateSymbolicReference is used to do a symbolic reference update. We can potentially provide the oldTarget // or the oldOID. func (u *RefUpdater) UpdateSymbolicReference(reference, newTarget ReferenceName) error { if err := u.expectState(stateStarted); err != nil { return err } return u.write("symref-update %s\x00%s\x00\x00\x00", reference.String(), newTarget.String()) } // Create commands the reference to be created with the given object ID. The ref must not exist. // // A reference transaction must be started before calling Create. func (u *RefUpdater) Create(reference ReferenceName, oid string) error { return u.Update(reference, oid, u.shaFormat.ZeroOID()) } // Delete commands the reference to be removed from the repository. This command will ignore any old // state of the reference and just force-remove it. // // A reference transaction must be started before calling Delete. func (u *RefUpdater) Delete(reference ReferenceName) error { return u.Update(reference, u.shaFormat.ZeroOID(), "") } // Prepare prepares the reference transaction by locking all references and determining their // current values. The updates are not yet committed and will be rolled back in case there is no // call to `Commit()`. This call is optional. func (u *RefUpdater) Prepare() error { if err := u.expectState(stateStarted); err != nil { return err } u.state = statePrepared return u.setState("prepare") } // Commit applies the commands specified in other calls to the Updater. Commit finishes the // reference transaction and another one must be started before further changes can be staged. func (u *RefUpdater) Commit() error { // Commit can be called without preparing the transactions. if err := u.checkState(statePrepared); err != nil { if err := u.expectState(stateStarted); err != nil { return err } } u.state = stateIdle if err := u.setState("commit"); err != nil { return err } return nil } // Close closes the updater and aborts a possible open transaction. No changes will be written // to disk, all lockfiles will be cleaned up and the process will exit. func (u *RefUpdater) Close() error { return u.closeWithError(nil) } func (u *RefUpdater) teardown() { if u.stdin != nil { _ = u.stdin.Close() } if u.stdout != nil { _ = u.stdout.Close() } } func (u *RefUpdater) closeWithError(closeErr error) error { if u.closeErr != nil { return u.closeErr } u.teardown() // close input/output if err := u.cmd.Wait(); err != nil { u.closeErr = fmt.Errorf("close error: %w stderr: %s", err, u.stderr.String()) return err } if u.ctx.Err() != nil { u.closeErr = u.ctx.Err() return u.closeErr } if closeErr != nil { u.closeErr = closeErr return closeErr } u.closeErr = errClosed return nil } func (u *RefUpdater) write(format string, args ...any) error { if _, err := fmt.Fprintf(u.stdin, format, args...); err != nil { return u.closeWithError(err) } return nil } func (u *RefUpdater) setState(state string) error { if err := u.write("%s\x00", state); err != nil { return err } // For each state-changing command, git-update-ref(1) will report successful execution via // ": ok" lines printed to its stdout. Ideally, we should thus verify here whether // the command was successfully executed by checking for exactly this line, otherwise we // cannot be sure whether the command has correctly been processed by Git or if an error was // raised. line, err := u.reader.ReadString('\n') if err != nil { return u.closeWithError(fmt.Errorf("state update to %q failed: %w", state, err)) } if line != fmt.Sprintf("%s: ok\n", state) { return u.closeWithError(fmt.Errorf("state update to %q not successful: expected ok, got %q", state, line)) } return nil } ================================================ FILE: modules/git/util.go ================================================ package git import ( "bytes" "context" "errors" "fmt" "os" "path/filepath" "strings" "time" "github.com/antgroup/hugescm/modules/command" "github.com/antgroup/hugescm/modules/git/config" ) const ( Sundries = "sundries" ) func RevParseHashFormat(ctx context.Context, repoPath string) (string, error) { cmd := command.New(ctx, repoPath, "git", "rev-parse", "--show-object-format") format, err := cmd.OneLine() if err != nil { return "", fmt.Errorf("detect repo object format: %v", command.FromError(err)) } return format, nil } func HashFormatResult(repoPath string) (HashFormat, error) { cfg, err := config.BareDecode(repoPath) if err != nil { return HashUNKNOWN, err } return HashFormatFromName(cfg.HashFormat()), nil } func HashFormatOK(repoPath string) HashFormat { if h, err := HashFormatResult(repoPath); err == nil { return h } return HashSHA1 } // ExtensionsFormat: return objectFormat, refFormat func ExtensionsFormat(repoPath string) (HashFormat, string) { cfg, err := config.BareDecode(repoPath) if err != nil { return HashSHA1, "files" } return HashFormatFromName(cfg.HashFormat()), cfg.ReferencesFormat() } // RevParseRepoPath parse repo dir func RevParseRepoPath(ctx context.Context, p string) string { cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), RepoPath: p, }, "git", "rev-parse", "--git-dir") repoPath, err := cmd.OneLine() if err != nil { return p } if filepath.IsAbs(repoPath) { return repoPath } return filepath.Join(p, repoPath) } // --show-toplevel func RevParseWorktree(ctx context.Context, p string) (string, error) { cmd := command.NewFromOptions(ctx, &command.RunOpts{ Environ: os.Environ(), RepoPath: p, }, "git", "rev-parse", "--show-toplevel") repoPath, err := cmd.OneLine() if err != nil { return "", err } if filepath.IsAbs(repoPath) { return repoPath, nil } return filepath.Join(p, repoPath), nil } var ( ErrBlankRevision = errors.New("empty revision") ErrBadRevision = errors.New("revision can't start with '-'") ) // ValidateBytesRevision checks if a revision looks valid func ValidateBytesRevision(revision []byte) error { if len(revision) == 0 { return ErrBlankRevision } if bytes.HasPrefix(revision, []byte("-")) { return ErrBadRevision } return nil } // ValidateBytesRevision checks if a revision looks valid func ValidateRevision(revision string) error { if len(revision) == 0 { return ErrBlankRevision } if strings.HasPrefix(revision, "-") { return ErrBadRevision } return nil } // FallbackTimeValue is the value returned by `SafeTimeParse` in case it // encounters a parse error. It's the maximum time value possible in golang. // See https://gitlab.com/gitlab-org/gitaly/issues/556#note_40289573 var FallbackTimeValue = time.Unix(1<<63-62135596801, 999999999) // PareTimeFallback parses a git date string with the RFC3339 format. If the date // is invalid (possibly because the date is larger than golang's largest value) // it returns the maximum date possible. func PareTimeFallback(s string) time.Time { if t, err := time.Parse(time.RFC3339, s); err == nil { return t } return FallbackTimeValue } func NewSundriesDir(repoPath string, pattern string) (string, error) { sundries := filepath.Join(repoPath, Sundries) if err := os.Mkdir(sundries, 0700); err != nil && !os.IsExist(err) { return "", err } return os.MkdirTemp(sundries, pattern) } ================================================ FILE: modules/git/version.go ================================================ package git import ( "bytes" "context" "fmt" "os" "strconv" "strings" "sync" "github.com/antgroup/hugescm/modules/command" ) type Version struct { versionString string major, minor, patch uint32 rc bool } // NewVersion constructs a new Git version from the given components. func NewVersion(major, minor, patch uint32) Version { return Version{ versionString: fmt.Sprintf("%d.%d.%d", major, minor, patch), major: major, minor: minor, patch: patch, } } // ParseVersionOutput parses output returned by git-version(1). It is expected to be in the format // "git version 2.39.1". func ParseVersionOutput(versionOutput []byte) (Version, error) { trimmedVersionOutput := string(bytes.Trim(versionOutput, " \n")) versionString := strings.SplitN(trimmedVersionOutput, " ", 3) if len(versionString) != 3 { return Version{}, fmt.Errorf("invalid version format: %q", string(versionOutput)) } version, err := ParseVersion(versionString[2]) if err != nil { return Version{}, fmt.Errorf("cannot parse git version: %w", err) } return version, nil } // String returns the string representation of the version. func (v Version) String() string { return v.versionString } // LessThan determines whether the version is older than another version. func (v Version) LessThan(other Version) bool { switch { case v.major < other.major: return true case v.major > other.major: return false case v.minor < other.minor: return true case v.minor > other.minor: return false case v.patch < other.patch: return true case v.patch > other.patch: return false case v.rc && !other.rc: return true case !v.rc && other.rc: return false default: // this should only be reachable when versions are equal return false } } // Equal determines whether the version is the same as another version. func (v Version) Equal(other Version) bool { return v == other } // GreaterOrEqual determines whether the version is newer than or equal to another version. func (v Version) GreaterOrEqual(other Version) bool { return !v.LessThan(other) } // ParseVersion parses a git version string. func ParseVersion(versionStr string) (Version, error) { versionSplit := strings.SplitN(versionStr, ".", 4) if len(versionSplit) < 3 { return Version{}, fmt.Errorf("expected major.minor.patch in %q", versionStr) } ver := Version{ versionString: versionStr, } for i, v := range []*uint32{&ver.major, &ver.minor, &ver.patch} { var n64 uint64 if versionSplit[i] == "GIT" { // Git falls back to vx.x.GIT if it's unable to describe the current version // or if there's a version file. We should just treat this as "0", even // though it may have additional commits on top. n64 = 0 } else { rcSplit := strings.SplitN(versionSplit[i], "-", 2) var err error n64, err = strconv.ParseUint(rcSplit[0], 10, 32) if err != nil { return Version{}, err } if len(rcSplit) == 2 && strings.HasPrefix(rcSplit[1], "rc") { ver.rc = true } } *v = uint32(n64) } if len(versionSplit) == 4 { if strings.HasPrefix(versionSplit[3], "rc") { ver.rc = true } } return ver, nil } func gitVersionDetect() (Version, error) { cmd := command.New(context.Background(), command.NoDir, "git", "version") versionOutput, err := cmd.Output() if err != nil { return Version{}, err } return ParseVersionOutput(versionOutput) } var ( VersionDetect = sync.OnceValues(gitVersionDetect) ) // IsVersionAtLeast returns whether the git version is the one specified or higher // argument is plain version string separated by '.' e.g. "2.3.1" but can omit minor/patch func IsGitVersionAtLeast(other Version) bool { v, err := VersionDetect() if err != nil { fmt.Fprintf(os.Stderr, "Error getting git version: %v\n", err) return false } return v.GreaterOrEqual(other) } ================================================ FILE: modules/git/version_test.go ================================================ package git import ( "fmt" "os" "testing" "time" ) func TestVersion(t *testing.T) { for range 10 { now := time.Now() v, err := VersionDetect() if err != nil { fmt.Fprintf(os.Stderr, "%v\n", err) return } fmt.Fprintf(os.Stderr, "%s use time: %v\n", v, time.Since(now)) } } func TestIsGitVersionAtLeast(t *testing.T) { fmt.Fprintf(os.Stderr, ">= 2.36.0: %v\n", IsGitVersionAtLeast(NewVersion(2, 36, 0))) } ================================================ FILE: modules/hexview/format.go ================================================ package hexview import ( "bytes" "fmt" "io" "math" "strings" "github.com/antgroup/hugescm/modules/term" ) const ( CN byte = 0 /* null */ CS byte = 1 /* space */ CP byte = 2 /* print */ CC byte = 3 /* control */ CH byte = 4 /* high */ ) var ( colorTable = []byte{ CN, CC, CC, CC, CC, CC, CC, CC, CC, CC, CS, CS, CS, CS, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CC, CS, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CP, CC, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, CH, } displayTable = []byte{ 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, } color256Index = []string{"\x1b[90m", "\x1b[92m", "\x1b[96m", "\x1b[95m", "\x1b[93m"} color24Index = []string{"\x1b[90m", "\x1b[38;2;67;233;123m", "\x1b[38;2;0;201;255m", "\x1b[38;2;255;0;255m", "\x1b[38;2;254;225;64m"} ) type binaryPrinter struct { *bytes.Buffer w io.Writer colorIndex []string } func newBinaryPrinter(w io.Writer, colorMode term.Level) *binaryPrinter { byteBuffer := &bytes.Buffer{} byteBuffer.Grow(400) colorTable := color256Index if colorMode == term.Level16M { colorTable = color24Index } return &binaryPrinter{Buffer: byteBuffer, w: w, colorIndex: colorTable} } // left_corner: '┌', // horizontal_line: '─', // column_separator: '┬', // right_corner: '┐', // left_corner: '└', // horizontal_line: '─', // column_separator: '┴', // right_corner: '┘', // │ ┊ const ( // Hexadecimal => 2 // [ 4d 5a 90 00 03 00 00 00 ] panelSize = 2*8 + 9 ) func (b *binaryPrinter) doPrintln(a ...string) { for _, s := range a { _, _ = b.WriteString(s) } _ = b.WriteByte('\n') } func (b *binaryPrinter) writeBorder() error { panelStr := strings.Repeat("─", panelSize) h8 := strings.Repeat("─", 8) b.doPrintln("┌", h8, "┬", panelStr, "┬", panelStr, "┬", h8, "┬", h8, "┐") return b.flush() } func (b *binaryPrinter) writeFooter() error { panelStr := strings.Repeat("─", panelSize) h8 := strings.Repeat("─", 8) b.doPrintln("└", h8, "┴", panelStr, "┴", panelStr, "┴", h8, "┴", h8, "┘") return b.flush() } func (b *binaryPrinter) formatByte(v byte) { c := colorTable[v] fmt.Fprintf(b.Buffer, "%s%02x\x1b[0m ", b.colorIndex[c], v) } func (b *binaryPrinter) displayByte(v byte) { c := colorTable[v] fmt.Fprintf(b.Buffer, "%s%c\x1b[0m", b.colorIndex[c], displayTable[v]) } func (b *binaryPrinter) writeLine(offset int64, input []byte) error { fmt.Fprintf(b.Buffer, "│\x1b[90m%08x\x1b[0m│ ", offset) var i int for ; i < min(8, len(input)); i++ { b.formatByte(input[i]) } for ; i < 8; i++ { _, _ = b.WriteString(" ") } _, _ = b.WriteString("┊ ") for ; i < min(16, len(input)); i++ { b.formatByte(input[i]) } for ; i < 16; i++ { _, _ = b.WriteString(" ") } _, _ = b.WriteString("│") var j int for ; j < min(8, len(input)); j++ { b.displayByte(input[j]) } for ; j < 8; j++ { _, _ = b.WriteString(" ") } _, _ = b.WriteString("┊") for ; j < min(16, len(input)); j++ { b.displayByte(input[j]) } for ; j < 16; j++ { _, _ = b.WriteString(" ") } _, _ = b.WriteString("│\n") return b.flush() } func (b *binaryPrinter) flush() error { _, err := b.w.Write(b.Bytes()) b.Reset() return err } func Format(r io.Reader, w io.Writer, size int64, colorMode term.Level) error { if size < 0 { size = math.MaxInt64 } var input [16]byte b := newBinaryPrinter(w, colorMode) if err := b.writeBorder(); err != nil { return err } var offset int64 for { readBytes := min(size, 16) n, err := io.ReadFull(r, input[:readBytes]) if err != nil && err != io.ErrUnexpectedEOF { break } if err := b.writeLine(offset, input[:n]); err != nil { return err } size -= int64(n) if size <= 0 { break } if n != 16 { break } offset += 16 } return b.writeFooter() } ================================================ FILE: modules/hexview/format_test.go ================================================ package hexview import ( "bytes" "crypto/rand" "os" "testing" "github.com/antgroup/hugescm/modules/term" ) func TestFormat(t *testing.T) { b := make([]byte, 1000) _, err := rand.Read(b[10:]) if err != nil { return } _ = Format(bytes.NewReader(b), os.Stdout, int64(len(b)), term.Level16M) } func TestFormatOverflow(t *testing.T) { b := make([]byte, 1000) _, err := rand.Read(b[10:]) if err != nil { return } _ = Format(bytes.NewReader(b), os.Stdout, int64(len(b))+8, term.Level16M) } func TestBorder(t *testing.T) { input := make([]byte, 15) _, err := rand.Read(input) if err != nil { return } b := newBinaryPrinter(os.Stderr, term.Level16M) _ = b.writeBorder() _ = b.writeLine(0, input) _ = b.writeLine(16, []byte("world")) _ = b.writeFooter() } ================================================ FILE: modules/keyring/LICENSE ================================================ The MIT License (MIT) Copyright (c) 2016 Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/keyring/README.md ================================================ # Keyring - 跨平台密钥管理库 基于 purego 的跨平台密钥管理库,完全兼容 git credential 工具。 ## Linux 特殊说明 **重要:Linux 下的默认行为** 在 Linux 系统上,keyring 默认**不存储密码**,以避免在无 GUI 或服务器环境中出现 DBUS 连接错误。 ### 启用密码存储 如需在 Linux 上启用密码存储,有以下两种方式: #### 1. 使用环境变量(推荐用于 CI/CD 或临时使用) ```bash export ZETA_CREDENTIAL_STORAGE=secret-service ``` #### 2. 使用配置文件(推荐用于长期使用) ```bash # 全局配置 zeta config --global credential.storage secret-service # 或本地配置 zeta config credential.storage secret-service ``` ### 存储模式说明 | 模式 | 说明 | 适用场景 | |------|------|----------| | `auto` | 自动选择(默认) | 自动检测环境,Linux 下默认不存储 | | `secret-service` | 使用 libsecret/Secret Service | 有桌面环境的 Linux(需要 DBUS) | | `none` | 禁用存储 | 完全禁用凭据存储 | ## 与 zalando/go-keyring 的重大差异 ### 1. 完全兼容 Git Credential 工具 - **go-keyring**: 使用自定义的查询和存储格式,与 git credential 不兼容 - **zeta/keyring**: 严格按照 git credential 工具的格式和属性存储凭据 **兼容的工具:** - `git-credential-osxkeychain` (macOS) - `git-credential-manager` (Windows) - `git-credential-libsecret` (Linux) ### 2. 纯 Purego 实现 - **go-keyring**: macOS 使用 cgo 调用 Security framework,Windows 使用 syscall - **zeta/keyring**: 完全使用 purego,通过纯 Go 代码调用平台 API **优点:** - 无 CGO 依赖,编译更简单 - 支持交叉编译 - 更好的可移植性 ### 3. 统一的凭据结构 - **go-keyring**: 使用简单的 `(service, username, password)` 三元组 - **zeta/keyring**: 使用完整的凭据结构,包含 protocol、server、path、port 等信息 ```go type Cred struct { UserName string Password string Protocol string // 协议类型:http, https, imap, smtp, ftp 等 Server string // 服务器地址(不含端口) Path string // 路径(可选) Port int // 端口(可选) } ``` ### 4. 函数命名符合 Git 惯例 - **go-keyring**: 使用 `Get/Set/Delete` - **zeta/keyring**: 使用 `Get/Store/Erase`,与 git credential 的 `get/store/erase` 命令保持一致 ### 5. 多用户支持 - **go-keyring**: 一个 service 只能有一个 username - **zeta/keyring**: 同一 server 可以有多个不同的 username,完全支持多用户场景 ### 6. 移除接口抽象 - **go-keyring**: 定义了 `Keyring` 接口和多种实现 - **zeta/keyring**: 直接导出平台特定的函数,通过 build tags 选择实现 **优点:** - 代码更简洁,减少抽象层次 - 调用方更直观,无需实例化对象 ## 使用方式 ### 基本用法 ```go import "github.com/zeta/zeta/modules/keyring" // 从 URL 解析凭据 cred := keyring.NewCredFromURL("https://github.com/zeta/zeta") // 设置密码 cred.UserName = "username" cred.Password = "password" // 存储 err := keyring.Store(context.Background(), cred) // 获取 retrieved, err := keyring.Get(context.Background(), cred) if err == nil { fmt.Println("Password:", retrieved.Password) } // 删除 err := keyring.Erase(context.Background(), cred) ``` ### 从 URL 自动解析 ```go // 支持多种 URL 格式 cred1 := keyring.NewCredFromURL("https://github.com/zeta/zeta") // cred1.Protocol = "https" // cred1.Server = "github.com" cred2 := keyring.NewCredFromURL("http://example.com:8080/path") // cred2.Protocol = "http" // cred2.Server = "example.com" // cred2.Port = 8080 // cred2.Path = "/path" ``` ### 手动构造凭据 ```go cred := &keyring.Cred{ Protocol: "https", Server: "example.com", Port: 443, UserName: "user", Password: "pass", } err := keyring.Store(context.Background(), cred) ``` ## 平台实现 ### macOS (Darwin) - 使用 Security framework - 完全兼容 `git-credential-osxkeychain` - 纯 purego 实现,无 CGO 依赖 - 支持:kSecAttrProtocol、kSecAttrAuthenticationType 等属性 **目标名称格式:** `server[:port]` ### Windows - 使用 Windows Credential Manager API - 完全兼容 `git-credential-manager` - 支持 UTF-16 编码 **目标名称格式:** `zeta::[:][]` ### Linux/Unix - **默认行为**:不存储密码,避免 DBUS 错误 - 可选使用 Secret Service API (libsecret) - 完全兼容 `git-credential-libsecret` - 需要显式配置才能启用存储 **启用存储:** ```bash # 方式1:环境变量 export ZETA_CREDENTIAL_STORAGE=secret-service # 方式2:配置文件 zeta config credential.storage secret-service ``` **目标名称格式:** `zeta::[:][]` ## 错误处理 ```go cred := keyring.NewCredFromURL("https://example.com") // 检查凭据是否存在 _, err := keyring.Get(context.Background(), cred) if errors.Is(err, keyring.ErrNotFound) { fmt.Println("Credential not found") } // 检查存储是否被禁用(Linux 默认行为) err = keyring.Store(context.Background(), cred) if errors.Is(err, keyring.ErrStorageDisabled) { fmt.Println("Credential storage is disabled on Linux") fmt.Println("To enable: export ZETA_CREDENTIAL_STORAGE=secret-service") } ``` ## 最佳实践 1. **始终使用完整的凭据信息**:包括 protocol、server、username 等 2. **使用 NewCredFromURL**:从 URL 自动解析,避免手动构造错误 3. **处理 ErrNotFound**:区分"找不到"和"其他错误" 4. **处理 ErrStorageDisabled**:在 Linux 上检查存储是否启用 5. **使用 context**:支持超时和取消操作 6. **不要硬编码密码**:始终使用 keyring 存储敏感信息 7. **Linux 环境**:明确告知用户如何启用凭据存储 ## 限制 - 每个凭据必须有 server 字段 - Username 和 Password 不能为空 - 不支持空字节(null byte)在这些字段中 ## 许可证 Apache License Version 2.0 ================================================ FILE: modules/keyring/VERSION ================================================ https://github.com/zalando/go-keyring 28657a580d2cfb4b21ff91769ce687ce4a31cb22 2024-08-16: Code rewritten, do not merge upstream. ================================================ FILE: modules/keyring/keyring.go ================================================ package keyring import ( "errors" "net" "net/url" "strconv" ) var ( // ErrNotFound is the expected error if the secret isn't found in the keyring. ErrNotFound = errors.New("secret not found in keyring") // ErrSetDataTooBig is returned if Set was called with too much data. // On macOS: The combination of service, username & password should not exceed ~3000 bytes // On Windows: The service is limited to 32KiB while the password is limited to 2560 bytes // On Linux/Unix: There is no theoretical limit but performance suffers with big values (>100KiB) ErrSetDataTooBig = errors.New("data passed to Set was too big") // ErrStorageDisabled indicates that credential storage is disabled. ErrStorageDisabled = errors.New("credential storage is disabled") // ErrNoEncryptionKey indicates that encryption key is required but not provided. ErrNoEncryptionKey = errors.New("encryption key is required for file storage") ) // Cred represents credentials for a server. // This design follows git-credential-osxkeychain pattern where // credentials are identified by (protocol, host, username) tuple. type Cred struct { UserName string Password string // Protocol specifies protocol type (http, https, imap, smtp, ftp, etc.) Protocol string // Server specifies the server name or IP address (without port) Server string // Path specifies the path component (optional, for some protocols) Path string // Port specifies the port number (optional, 0 means use default) Port int } // Option is a functional option for configuring keyring behavior. // This is used to configure credential storage backend on platforms that support multiple backends. // On macOS and Windows, the default backend is always used unless explicitly overridden. type Option func(*Options) // Options holds configuration for keyring operations. type Options struct { // Storage specifies the credential storage backend. // // Platform-specific behavior: // - macOS: Default uses Security.framework; "security" uses /usr/bin/security CLI; "file" uses encrypted file // - Windows: Default uses Credential Manager; "file" uses encrypted file // - Linux: Default is "none"; "secret-service" uses Secret Service API; "file" uses encrypted file Storage string // EncryptionKey specifies the key for encrypting credentials in file storage. // Required when Storage="file". EncryptionKey string // StoragePath specifies the path for encrypted credential file. // Only used when Storage="file". // Default: ~/.config/zeta/credentials StoragePath string } // WithStorage sets the credential storage backend. // Valid values depend on the platform: // - macOS: "security" (/usr/bin/security CLI), "file" // - Windows: "file" // - Linux: "secret-service", "file" func WithStorage(storage string) Option { return func(o *Options) { o.Storage = storage } } // WithEncryptionKey sets the encryption key for file-based credential storage. // Required when Storage="file". func WithEncryptionKey(key string) Option { return func(o *Options) { o.EncryptionKey = key } } // WithStoragePath sets the path for encrypted credential file. // Only used when Storage="file". func WithStoragePath(path string) Option { return func(o *Options) { o.StoragePath = path } } func resolveStorageOptions(opts ...Option) *Options { options := &Options{ Storage: storageAuto, } for _, o := range opts { o(options) } return options } // Storage mode constants used across platforms const ( storageAuto = "auto" storageFile = "file" storageNone = "none" ) // NewCredFromURL creates a Cred from a URL, extracting protocol, server, and port. // If the URL specifies a default port for the protocol (e.g., 443 for https), // the port is not stored to ensure consistent credential lookup. func NewCredFromURL(targetURL string) *Cred { u, err := url.Parse(targetURL) if err != nil { return &Cred{ Server: targetURL, } } cred := &Cred{ Protocol: u.Scheme, Server: u.Hostname(), Path: u.Path, } // Extract port, but skip default ports to ensure consistent credential lookup if u.Port() != "" { if port, err := strconv.Atoi(u.Port()); err == nil { if defaultPorts[u.Scheme] != port { cred.Port = port } } } return cred } // defaultPorts maps protocols to their default ports. var defaultPorts = map[string]int{ "http": 80, "https": 443, "ftp": 21, "ssh": 22, } // buildTargetName constructs a unique target name for storing credentials. // Format: "zeta+://[:][]" func buildTargetName(cred *Cred) string { protocol := cred.Protocol if protocol == "" { protocol = "https" } var host string if cred.Port != 0 { host = net.JoinHostPort(cred.Server, strconv.Itoa(cred.Port)) } else { host = cred.Server } u := &url.URL{ Scheme: "zeta+" + protocol, Host: host, Path: cred.Path, } return u.String() } // parseTargetName parses a target name back into a Cred struct // Format: "zeta+://[:][]" func parseTargetName(target string) *Cred { u, err := url.Parse(target) if err != nil { return &Cred{Server: target} } // Extract protocol from "zeta+" scheme scheme := u.Scheme protocol, found := parseSchemePrefix(scheme, "zeta+") if !found { return &Cred{Server: target} } cred := &Cred{ Protocol: protocol, Server: u.Hostname(), Path: u.Path, } if u.Port() != "" { if port, err := strconv.Atoi(u.Port()); err == nil { cred.Port = port } } return cred } // parseSchemePrefix parses a scheme like "zeta+https" and returns the protocol part func parseSchemePrefix(scheme, prefix string) (protocol string, found bool) { if len(scheme) <= len(prefix) { return "", false } if scheme[:len(prefix)] != prefix { return "", false } return scheme[len(prefix):], true } ================================================ FILE: modules/keyring/keyring_darwin.go ================================================ //go:build darwin // Package keyring provides cross-platform credential storage for Zeta. // This file implements the macOS (Darwin) backend using purego without CGO. // Default: Uses Security.framework via purego (recommended) // Alternative: Set storage="security" to use /usr/bin/security CLI tool // Alternative: Set storage="file" to use encrypted file storage package keyring import ( "context" "errors" "fmt" "strings" "sync" "unsafe" "github.com/ebitengine/purego" ) // Core Foundation and Security framework constants const ( kCFStringEncodingUTF8 = 0x08000100 kCFAllocatorDefault = 0 ) type osStatus int32 const ( errSecSuccess osStatus = 0 // No error. errSecDuplicateItem osStatus = -25299 // The specified item already exists in the keychain. errSecItemNotFound osStatus = -25300 // The specified item could not be found in the keychain. ) type _CFRange struct { location int64 length int64 } type _CFNumberType int64 // CFNumberType is alias for CFIndex, which is int64 on 64-bit systems const ( // CFNumber type constants for number conversion kCFNumberIntType _CFNumberType = 3 // SInt32Type ) var ( kCFTypeDictionaryKeyCallBacks uintptr kCFTypeDictionaryValueCallBacks uintptr kCFBooleanTrue uintptr ) var ( kSecClass uintptr kSecClassInternetPassword uintptr kSecAttrServer uintptr kSecAttrAccount uintptr kSecAttrProtocol uintptr kSecAttrProtocolHTTP uintptr kSecAttrProtocolHTTPS uintptr kSecAttrProtocolFTP uintptr kSecAttrProtocolFTPS uintptr kSecAttrProtocolIMAP uintptr kSecAttrProtocolIMAPS uintptr kSecAttrProtocolSMTP uintptr kSecAttrPort uintptr kSecAttrPath uintptr kSecAttrAuthenticationType uintptr kSecAttrAuthenticationTypeDefault uintptr kSecValueData uintptr kSecReturnData uintptr kSecReturnAttributes uintptr kSecMatchLimit uintptr kSecMatchLimitAll uintptr ) var ( CFDictionaryCreate func(allocator uintptr, keys, values *uintptr, numValues int64, keyCallBacks, valueCallBacks uintptr) uintptr CFStringCreateWithCString func(allocator uintptr, cStr string, encoding uint32) uintptr CFDataCreate func(alloc uintptr, bytes []byte, length int64) uintptr CFDataGetLength func(theData uintptr) int64 CFDataGetBytes func(theData uintptr, range_ _CFRange, buffer []byte) CFRelease func(cf uintptr) CFNumberCreate func(allocator uintptr, theType _CFNumberType, valuePtr uintptr) uintptr ) var ( SecItemCopyMatching func(query uintptr, result *uintptr) osStatus SecItemAdd func(query uintptr, result uintptr) osStatus SecItemUpdate func(query uintptr, attributesToUpdate uintptr) osStatus SecItemDelete func(query uintptr) osStatus CFDictionaryGetValue func(theDict uintptr, key uintptr) uintptr CFStringGetCString func(theString uintptr, buffer *byte, bufferSize int64, encoding uint32) int64 CFStringGetLength func(theString uintptr) int64 ) var ( puregoOnce sync.Once puregoErr error ) // ensureInitialized ensures the keyring is initialized. // It uses sync.Once to ensure initialization happens only once. // Returns an error if initialization fails. func ensureInitialized() error { puregoOnce.Do(func() { puregoErr = initializeKeyring() }) return puregoErr } // initializeKeyring initializes the PureGo bindings for macOS Security framework. func initializeKeyring() error { cfLib, err := purego.Dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", purego.RTLD_NOW|purego.RTLD_GLOBAL) if err != nil { return fmt.Errorf("failed to load CoreFoundation framework: %w", err) } // Load CoreFoundation constants ptr, err := purego.Dlsym(cfLib, "kCFTypeDictionaryKeyCallBacks") if err != nil { return fmt.Errorf("failed to load kCFTypeDictionaryKeyCallBacks: %w", err) } kCFTypeDictionaryKeyCallBacks = deref(ptr) ptr, err = purego.Dlsym(cfLib, "kCFTypeDictionaryValueCallBacks") if err != nil { return fmt.Errorf("failed to load kCFTypeDictionaryValueCallBacks: %w", err) } kCFTypeDictionaryValueCallBacks = deref(ptr) ptr, err = purego.Dlsym(cfLib, "kCFBooleanTrue") if err != nil { return fmt.Errorf("failed to load kCFBooleanTrue: %w", err) } kCFBooleanTrue = deref(ptr) purego.RegisterLibFunc(&CFDictionaryCreate, cfLib, "CFDictionaryCreate") purego.RegisterLibFunc(&CFStringCreateWithCString, cfLib, "CFStringCreateWithCString") purego.RegisterLibFunc(&CFDataCreate, cfLib, "CFDataCreate") purego.RegisterLibFunc(&CFDataGetLength, cfLib, "CFDataGetLength") purego.RegisterLibFunc(&CFDataGetBytes, cfLib, "CFDataGetBytes") purego.RegisterLibFunc(&CFRelease, cfLib, "CFRelease") purego.RegisterLibFunc(&CFNumberCreate, cfLib, "CFNumberCreate") secLib, err := purego.Dlopen("/System/Library/Frameworks/Security.framework/Security", purego.RTLD_NOW|purego.RTLD_GLOBAL) if err != nil { return fmt.Errorf("failed to load Security framework: %w", err) } // Load Security constants symbols := []struct { sym string addr *uintptr }{ {"kSecClass", &kSecClass}, {"kSecClassInternetPassword", &kSecClassInternetPassword}, {"kSecAttrServer", &kSecAttrServer}, {"kSecAttrAccount", &kSecAttrAccount}, {"kSecAttrProtocol", &kSecAttrProtocol}, {"kSecAttrProtocolHTTP", &kSecAttrProtocolHTTP}, {"kSecAttrProtocolHTTPS", &kSecAttrProtocolHTTPS}, {"kSecAttrProtocolFTP", &kSecAttrProtocolFTP}, {"kSecAttrProtocolFTPS", &kSecAttrProtocolFTPS}, {"kSecAttrProtocolIMAP", &kSecAttrProtocolIMAP}, {"kSecAttrProtocolIMAPS", &kSecAttrProtocolIMAPS}, {"kSecAttrProtocolSMTP", &kSecAttrProtocolSMTP}, {"kSecAttrPort", &kSecAttrPort}, {"kSecAttrPath", &kSecAttrPath}, {"kSecAttrAuthenticationType", &kSecAttrAuthenticationType}, {"kSecAttrAuthenticationTypeDefault", &kSecAttrAuthenticationTypeDefault}, {"kSecValueData", &kSecValueData}, {"kSecReturnData", &kSecReturnData}, {"kSecReturnAttributes", &kSecReturnAttributes}, {"kSecMatchLimit", &kSecMatchLimit}, {"kSecMatchLimitAll", &kSecMatchLimitAll}, } for _, s := range symbols { ptr, err := purego.Dlsym(secLib, s.sym) if err != nil { return fmt.Errorf("failed to load %s: %w", s.sym, err) } *s.addr = deref(ptr) } purego.RegisterLibFunc(&SecItemCopyMatching, secLib, "SecItemCopyMatching") purego.RegisterLibFunc(&SecItemAdd, secLib, "SecItemAdd") purego.RegisterLibFunc(&SecItemUpdate, secLib, "SecItemUpdate") purego.RegisterLibFunc(&SecItemDelete, secLib, "SecItemDelete") purego.RegisterLibFunc(&CFDictionaryGetValue, cfLib, "CFDictionaryGetValue") purego.RegisterLibFunc(&CFStringGetCString, cfLib, "CFStringGetCString") purego.RegisterLibFunc(&CFStringGetLength, cfLib, "CFStringGetLength") return nil } // Get retrieves credentials from the configured storage backend. // Default uses Security.framework via purego. // Set opts storage="security" to use /usr/bin/security CLI. // Set opts storage="file" to use encrypted file storage. func Get(ctx context.Context, cred *Cred, opts ...Option) (*Cred, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } if cred == nil { return nil, errors.New("credential cannot be nil") } options := resolveStorageOptions(opts...) switch options.Storage { case storageAuto: return getFromKeychain(ctx, cred) case storageSecurity: return getFromSecurityCLI(ctx, cred) case storageFile: storage, err := newCredentialStorage(options.EncryptionKey, options.StoragePath) if err != nil { return nil, fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Get(ctx, cred) case storageNone: return nil, ErrNotFound default: return nil, fmt.Errorf("unknown storage mode: %s", options.Storage) } } // getFromKeychain retrieves credentials using Security.framework via purego. func getFromKeychain(ctx context.Context, cred *Cred) (*Cred, error) { select { case <-ctx.Done(): return nil, ctx.Err() default: } if err := ensureInitialized(); err != nil { return nil, fmt.Errorf("failed to initialize keyring: %w", err) } if cred.Server == "" { return nil, errors.New("server is required") } cfServer := CFStringCreateWithCString(kCFAllocatorDefault, cred.Server, kCFStringEncodingUTF8) defer CFRelease(cfServer) // Build query following git-credential-osxkeychain pattern: // Use kSecClassInternetPassword, kSecAttrServer as base // Add optional fields: kSecAttrProtocol, kSecAttrAccount, kSecAttrPath, kSecAttrPort // Add kSecReturnAttributes and kSecReturnData to get both metadata and password keys := []uintptr{ kSecClass, kSecAttrServer, kSecReturnAttributes, kSecReturnData, } values := []uintptr{ kSecClassInternetPassword, cfServer, kCFBooleanTrue, kCFBooleanTrue, } // Add optional fields and track CF objects for cleanup optionalFields := newOptionalFields(cred, &keys, &values) defer optionalFields.Release() // Add authentication type (required for git-credential-osxkeychain compatibility) keys = append(keys, kSecAttrAuthenticationType) values = append(values, kSecAttrAuthenticationTypeDefault) query := CFDictionaryCreate( kCFAllocatorDefault, &keys[0], &values[0], int64(len(keys)), kCFTypeDictionaryKeyCallBacks, kCFTypeDictionaryValueCallBacks, ) defer CFRelease(query) var result uintptr st := SecItemCopyMatching(query, &result) if st == errSecItemNotFound { return nil, ErrNotFound } if st != errSecSuccess { return nil, fmt.Errorf("error SecItemCopyMatching: %d", st) } defer CFRelease(result) // Extract username from result accountValue := CFDictionaryGetValue(result, kSecAttrAccount) username := "" if accountValue != 0 { // CFStringGetLength returns UTF-16 code units, but CFStringGetCString needs UTF-8 buffer. // UTF-8 can use up to 4 bytes per character, so allocate 4x the UTF-16 length. if length := CFStringGetLength(accountValue); length > 0 { buffer := make([]byte, length*4+1) if CFStringGetCString(accountValue, &buffer[0], int64(len(buffer)), kCFStringEncodingUTF8) == 0 { return nil, errors.New("failed to convert username to UTF-8") } username = strings.TrimRight(string(buffer), "\x00") } } // Extract password from result passwordValue := CFDictionaryGetValue(result, kSecValueData) password := "" if passwordValue != 0 { length := CFDataGetLength(passwordValue) if length > 0 { buffer := make([]byte, length) CFDataGetBytes(passwordValue, _CFRange{0, length}, buffer) password = string(buffer) } } return &Cred{ UserName: username, Password: password, Protocol: cred.Protocol, Server: cred.Server, Path: cred.Path, Port: cred.Port, }, nil } // Store saves credentials to the configured storage backend. // Default uses Security.framework via purego. // Set opts storage="security" to use /usr/bin/security CLI. // Set opts storage="file" to use encrypted file storage. func Store(ctx context.Context, cred *Cred, opts ...Option) error { if ctx.Err() != nil { return ctx.Err() } if cred == nil { return errors.New("credential cannot be nil") } // Validate input if cred.UserName == "" { return errors.New("username cannot be empty") } if cred.Password == "" { return errors.New("password cannot be empty") } if cred.Server == "" { return errors.New("server cannot be empty") } // Validate username cannot contain null byte if strings.Contains(cred.UserName, "\x00") { return errors.New("invalid username: contains null byte") } options := resolveStorageOptions(opts...) switch options.Storage { case storageAuto: return storeToKeychain(ctx, cred) case storageSecurity: return storeToSecurityCLI(ctx, cred) case storageFile: storage, err := newCredentialStorage(options.EncryptionKey, options.StoragePath) if err != nil { return fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Store(ctx, cred) case storageNone: return ErrStorageDisabled default: return fmt.Errorf("unknown storage mode: %s", options.Storage) } } // storeToKeychain stores credentials using Security.framework via purego. func storeToKeychain(ctx context.Context, cred *Cred) error { select { case <-ctx.Done(): return ctx.Err() default: } if err := ensureInitialized(); err != nil { return fmt.Errorf("failed to initialize keyring: %w", err) } cfServer := CFStringCreateWithCString(kCFAllocatorDefault, cred.Server, kCFStringEncodingUTF8) defer CFRelease(cfServer) cfPasswordData := CFDataCreate(kCFAllocatorDefault, []byte(cred.Password), int64(len(cred.Password))) defer CFRelease(cfPasswordData) // Build attributes following git-credential-osxkeychain pattern: // Always include: kSecClass, kSecAttrServer, kSecAttrAccount, kSecAttrProtocol, kSecAttrAuthenticationType // Optionally include: kSecAttrPath, kSecAttrPort // Then update with: kSecValueData keys := []uintptr{ kSecClass, kSecAttrServer, kSecValueData, } values := []uintptr{ kSecClassInternetPassword, cfServer, cfPasswordData, } // Add optional fields and track CF objects for cleanup optionalFields := newOptionalFields(cred, &keys, &values) defer optionalFields.Release() // Add authentication type (required for git-credential-osxkeychain compatibility) keys = append(keys, kSecAttrAuthenticationType) values = append(values, kSecAttrAuthenticationTypeDefault) query := CFDictionaryCreate( kCFAllocatorDefault, &keys[0], &values[0], int64(len(keys)), kCFTypeDictionaryKeyCallBacks, kCFTypeDictionaryValueCallBacks, ) defer CFRelease(query) sa := SecItemAdd(query, 0) if sa == errSecSuccess { return nil } if sa != errSecDuplicateItem { return fmt.Errorf("error SecItemAdd: %d", sa) } // Build update query matching same criteria as add query updateKeys := []uintptr{kSecClass, kSecAttrServer} updateValues := []uintptr{kSecClassInternetPassword, cfServer} // Add optional fields and track CF objects for cleanup updateOptionalFields := newOptionalFields(cred, &updateKeys, &updateValues) defer updateOptionalFields.Release() // Add authentication type (required for git-credential-osxkeychain compatibility) updateKeys = append(updateKeys, kSecAttrAuthenticationType) updateValues = append(updateValues, kSecAttrAuthenticationTypeDefault) updateQuery := CFDictionaryCreate( kCFAllocatorDefault, &updateKeys[0], &updateValues[0], int64(len(updateKeys)), kCFTypeDictionaryKeyCallBacks, kCFTypeDictionaryValueCallBacks, ) defer CFRelease(updateQuery) // Build attributes to update (password and account) cfAccount := CFStringCreateWithCString(kCFAllocatorDefault, cred.UserName, kCFStringEncodingUTF8) defer CFRelease(cfAccount) attrsToUpdateKeys := []uintptr{kSecValueData, kSecAttrAccount} attrsToUpdateValues := []uintptr{cfPasswordData, cfAccount} attrsToUpdate := CFDictionaryCreate( kCFAllocatorDefault, &attrsToUpdateKeys[0], &attrsToUpdateValues[0], int64(len(attrsToUpdateKeys)), kCFTypeDictionaryKeyCallBacks, kCFTypeDictionaryValueCallBacks, ) defer CFRelease(attrsToUpdate) su := SecItemUpdate(updateQuery, attrsToUpdate) if su != errSecSuccess { return fmt.Errorf("error SecItemUpdate: %d", su) } return nil } // Erase removes credentials from the configured storage backend. // Default uses Security.framework via purego. // Set opts storage="security" to use /usr/bin/security CLI. // Set opts storage="file" to use encrypted file storage. func Erase(ctx context.Context, cred *Cred, opts ...Option) error { select { case <-ctx.Done(): return ctx.Err() default: } if cred == nil { return errors.New("credential cannot be nil") } options := resolveStorageOptions(opts...) switch options.Storage { case storageAuto: return eraseFromKeychain(ctx, cred) case storageSecurity: return eraseFromSecurityCLI(ctx, cred) case storageFile: storage, err := newCredentialStorage(options.EncryptionKey, options.StoragePath) if err != nil { return fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Erase(ctx, cred) case storageNone: return ErrStorageDisabled default: return fmt.Errorf("unknown storage mode: %s", options.Storage) } } // eraseFromKeychain removes credentials using Security.framework via purego. func eraseFromKeychain(ctx context.Context, cred *Cred) error { select { case <-ctx.Done(): return ctx.Err() default: } if err := ensureInitialized(); err != nil { return fmt.Errorf("failed to initialize keyring: %w", err) } // Use server from cred server := cred.Server if server == "" { return errors.New("server is required") } cfServer := CFStringCreateWithCString(kCFAllocatorDefault, server, kCFStringEncodingUTF8) defer CFRelease(cfServer) // Build query following git-credential-osxkeychain pattern: // Use kSecClass, kSecAttrServer as base // Add optional fields: kSecAttrProtocol, kSecAttrAccount, kSecAttrPath, kSecAttrPort // Note: SecItemDelete does NOT support kSecMatchLimit - it deletes all matching items by default. keys := []uintptr{ kSecClass, kSecAttrServer, } values := []uintptr{ kSecClassInternetPassword, cfServer, } // Add optional fields and track CF objects for cleanup optionalFields := newOptionalFields(cred, &keys, &values) defer optionalFields.Release() // Add authentication type (required for git-credential-osxkeychain compatibility) keys = append(keys, kSecAttrAuthenticationType) values = append(values, kSecAttrAuthenticationTypeDefault) query := CFDictionaryCreate( kCFAllocatorDefault, &keys[0], &values[0], int64(len(keys)), kCFTypeDictionaryKeyCallBacks, kCFTypeDictionaryValueCallBacks, ) defer CFRelease(query) st := SecItemDelete(query) if st == errSecItemNotFound { // Item not found is not an error - deletion is idempotent return nil } if st != errSecSuccess { return fmt.Errorf("error SecItemDelete: %d", st) } return nil } // darwinProtocolFromScheme converts protocol string to keychain protocol constant. func darwinProtocolFromScheme(protocol string) uintptr { switch strings.ToLower(protocol) { case "https": return kSecAttrProtocolHTTPS case "http": return kSecAttrProtocolHTTP case "ftp": return kSecAttrProtocolFTP case "ftps": return kSecAttrProtocolFTPS case "imap": return kSecAttrProtocolIMAP case "imaps": return kSecAttrProtocolIMAPS case "smtp": return kSecAttrProtocolSMTP default: return 0 // Unknown protocol } } // ========== Helper Functions ========== // darwinOptionalFields holds optional CF objects that may be added to queries. type darwinOptionalFields struct { cfProtocol uintptr cfAccount uintptr cfPath uintptr cfPort uintptr } // Release releases all CF objects held by darwinOptionalFields. // Note: cfProtocol is a constant value, not a CF object, so it's not released. func (f *darwinOptionalFields) Release() { if f.cfAccount != 0 { CFRelease(f.cfAccount) f.cfAccount = 0 } if f.cfPath != 0 { CFRelease(f.cfPath) f.cfPath = 0 } if f.cfPort != 0 { CFRelease(f.cfPort) f.cfPort = 0 } } // newOptionalFields creates and returns darwinOptionalFields with optional credential fields. // It appends the fields to the provided keys and values slices. // The caller should call fields.Release() when no longer needed. func newOptionalFields(cred *Cred, keys, values *[]uintptr) *darwinOptionalFields { fields := &darwinOptionalFields{} // Add protocol if specified if cred.Protocol != "" { if protocol := darwinProtocolFromScheme(cred.Protocol); protocol != 0 { fields.cfProtocol = protocol *keys = append(*keys, kSecAttrProtocol) *values = append(*values, protocol) } } // Add username if specified if cred.UserName != "" { fields.cfAccount = CFStringCreateWithCString(kCFAllocatorDefault, cred.UserName, kCFStringEncodingUTF8) *keys = append(*keys, kSecAttrAccount) *values = append(*values, fields.cfAccount) } // Add path if specified if cred.Path != "" { fields.cfPath = CFStringCreateWithCString(kCFAllocatorDefault, cred.Path, kCFStringEncodingUTF8) *keys = append(*keys, kSecAttrPath) *values = append(*values, fields.cfPath) } // Add port if specified // Use int32 (kCFNumberIntType) to support full port range 0-65535 // int16 can only hold 0-32767 which is insufficient if cred.Port != 0 { portInt32 := int32(cred.Port) fields.cfPort = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, uintptr(unsafe.Pointer(&portInt32))) *keys = append(*keys, kSecAttrPort) *values = append(*values, fields.cfPort) } return fields } // deref dereferences a uintptr that points to another uintptr. // This is used to load values from symbol addresses returned by Dlsym. // For example, Dlsym returns the address of kCFBooleanTrue, which itself // contains the actual CFBooleanRef value. // // The double-pointer cast pattern (**(**uintptr)(unsafe.Pointer(&ptr))) is used // instead of the simpler *(*uintptr)(unsafe.Pointer(ptr)) to satisfy go vet's // unsafe.Pointer conversion rules: we take the address of the local variable // (rule 1) rather than converting a uintptr directly to unsafe.Pointer (rule 4). func deref(ptr uintptr) uintptr { return **(**uintptr)(unsafe.Pointer(&ptr)) } ================================================ FILE: modules/keyring/keyring_darwin_security.go ================================================ //go:build darwin package keyring import ( "bufio" "bytes" "context" "encoding/hex" "errors" "fmt" "io" "os/exec" "regexp" "strconv" "strings" "unicode" ) const ( storageSecurity = "security" // macOS only: /usr/bin/security CLI // securityCLIPath is the path to the security command-line tool securityCLIPath = "/usr/bin/security" // securityErrNotFoundExitCode is the exit code returned by security CLI when an item is not found. securityErrNotFoundExitCode = 44 // maxSecurityCommandLen is an internal defensive limit for security CLI commands. // This is NOT a documented limit of the security CLI itself, but rather a sanity check // to prevent unreasonably large credentials that may indicate a problem upstream. maxSecurityCommandLen = 64 * 1024 ) var ( shellEscapePattern = regexp.MustCompile(`[^\w@%+=:,./-]`) ) // protocolFourCC converts a protocol string to the 4-character code used by // macOS security CLI's -r flag. These codes correspond to the kSecAttrProtocol // constants in Security.framework (e.g., kSecAttrProtocolHTTPS = 'htps'). // Returns empty string for unknown protocols, in which case the caller should // omit the -r flag to avoid incorrect matching. func protocolFourCC(protocol string) string { switch strings.ToLower(protocol) { case "http": return "http" case "https": return "htps" case "ftp": return "ftp " case "ftps": return "ftps" case "imap": return "imap" case "imaps": return "imps" case "smtp": return "smtp" default: return "" } } // isSecurityNotFoundError checks if the error indicates that the item was not found. // It prioritizes exit code 44, with string matching as a fallback for compatibility. func isSecurityNotFoundError(err error, output []byte) bool { // Priority 1: Check exit code 44 (official not-found indicator) if exitErr, ok := errors.AsType[*exec.ExitError](err); ok { if exitErr.ExitCode() == securityErrNotFoundExitCode { return true } } // Priority 2: Fallback to string matching for compatibility outputStr := string(output) return strings.Contains(outputStr, "could not be found") || strings.Contains(outputStr, "The specified item could not be found") } // shellQuote returns a shell-escaped version of the string s. // The returned value is a string that can safely be used as one token in a shell command line. // // NOTE: This quoting logic is specifically designed for the `security -i` interactive mode, // which has its own command parser. The behavior is based on empirical testing of security CLI // and is NOT guaranteed by Apple documentation. This implementation may need adjustment if // future macOS versions change the CLI parser behavior. func shellQuote(s string) string { if len(s) == 0 { return "''" } if shellEscapePattern.MatchString(s) { return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'" } return s } // getFromSecurityCLI retrieves credentials using /usr/bin/security CLI. // Uses find-internet-password which is compatible with git-credential-osxkeychain. // This is a fallback when Security.framework access is blocked by security software. // The query parameters must match the purego implementation in keyring_darwin.go. func getFromSecurityCLI(ctx context.Context, cred *Cred) (*Cred, error) { if cred == nil { return nil, errors.New("credential cannot be nil") } if cred.Server == "" { return nil, errors.New("server is required") } // Use security find-internet-password to retrieve credentials // This matches the purego implementation and git-credential-osxkeychain pattern // -s: server name (host only, not full URL) // -r: protocol (4-char code, e.g., htps for https) // -P: port (optional) // -p: path (optional) // -a: account name (optional, but improves precision when multiple accounts exist) // -g: display password args := []string{"find-internet-password", "-s", cred.Server} // Add protocol if known (matches purego kSecAttrProtocol) if fourCC := protocolFourCC(cred.Protocol); fourCC != "" { args = append(args, "-r", fourCC) } // Add port if specified (matches purego kSecAttrPort) if cred.Port != 0 { args = append(args, "-P", strconv.Itoa(cred.Port)) } // Add path if specified (matches purego kSecAttrPath) if cred.Path != "" { args = append(args, "-p", cred.Path) } // Add account name for more precise query if available if cred.UserName != "" { args = append(args, "-a", cred.UserName) } // Add -g to display password args = append(args, "-g") cmd := exec.CommandContext(ctx, securityCLIPath, args...) out, err := cmd.CombinedOutput() if err != nil { if isSecurityNotFoundError(err, out) { return nil, ErrNotFound } return nil, fmt.Errorf("security find-internet-password failed: %w, output: %s", err, string(out)) } return parseKeychainOutput(bytes.NewReader(out)) } // parseKeychainOutput parses the output from security find-internet-password. // Output format example: // // keychain: "/Users/**/Library/Keychains/login.keychain-db" // version: 512 // class: "inet" // attributes: // "acct"="username" // "acct"=0x75736572 (hex format on some macOS versions) // "srvr"="https://zeta.example.io" // password: "password" // password: 0x68656c6c6f (hex format on some macOS versions) func parseKeychainOutput(r io.Reader) (*Cred, error) { scanner := bufio.NewScanner(r) cred := &Cred{} var err error for scanner.Scan() { line := strings.TrimFunc(scanner.Text(), unicode.IsSpace) // Parse account name: "acct"="username" or "acct"=0x... if suffix, ok := strings.CutPrefix(line, `"acct"`); ok { _, acct, _ := strings.Cut(suffix, "=") acct = strings.TrimFunc(acct, unicode.IsSpace) cred.UserName, err = parseBlobValue(acct) if err != nil { // If parsing fails, try using it as-is (be lenient for CLI fallback) cred.UserName = acct } continue } // Parse password: password: "password" or password: 0x... if password, ok := strings.CutPrefix(line, "password:"); ok { password = strings.TrimFunc(password, unicode.IsSpace) cred.Password, err = parseBlobValue(password) if err != nil { // If parsing fails, try using it as-is (be lenient for CLI fallback) cred.Password = password } continue } } // Check for scanner errors (e.g., line too long) if err = scanner.Err(); err != nil { return nil, fmt.Errorf("failed to parse keychain output: %w", err) } // Validate that password was parsed successfully // Password is the core field - without it, the credential is incomplete if cred.Password == "" { return nil, ErrNotFound } return cred, nil } // parseBlobValue parses a value from security CLI output. // It handles both quoted strings ("value") and hex format (0x68656c6c6f). func parseBlobValue(s string) (string, error) { // Handle hex format: 0x68656c6c6f if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") { decoded, err := hex.DecodeString(s[2:]) if err != nil { return "", fmt.Errorf("failed to decode hex value: %w", err) } return string(decoded), nil } // Handle quoted string return strconv.Unquote(s) } // storeToSecurityCLI stores credentials using /usr/bin/security CLI. // Uses add-internet-password which is compatible with git-credential-osxkeychain. // The storage parameters must match the purego implementation in keyring_darwin.go. func storeToSecurityCLI(ctx context.Context, cred *Cred) error { if cred == nil { return errors.New("credential cannot be nil") } if cred.UserName == "" { return errors.New("username cannot be empty") } if cred.Password == "" { return errors.New("password cannot be empty") } if cred.Server == "" { return errors.New("server cannot be empty") } // Use security -i for interactive mode to handle special characters cmd := exec.CommandContext(ctx, securityCLIPath, "-i") stdin, err := cmd.StdinPipe() if err != nil { return fmt.Errorf("failed to create stdin pipe: %w", err) } if err = cmd.Start(); err != nil { return fmt.Errorf("failed to start security command: %w", err) } // Build the add-internet-password command // -U flag updates existing item if present // -s: server name (host only, not full URL) - matches purego kSecAttrServer // -r: protocol (4-char code) - matches purego kSecAttrProtocol // -P: port (optional) - matches purego kSecAttrPort // -p: path (optional) - matches purego kSecAttrPath // -a: account name - matches purego kSecAttrAccount // -w: password var commandBuilder strings.Builder commandBuilder.WriteString("add-internet-password -U -s ") commandBuilder.WriteString(shellQuote(cred.Server)) // Add protocol if known (matches purego kSecAttrProtocol) if fourCC := protocolFourCC(cred.Protocol); fourCC != "" { commandBuilder.WriteString(" -r ") commandBuilder.WriteString(fourCC) } if cred.Port != 0 { commandBuilder.WriteString(" -P ") commandBuilder.WriteString(strconv.Itoa(cred.Port)) } if cred.Path != "" { commandBuilder.WriteString(" -p ") commandBuilder.WriteString(shellQuote(cred.Path)) } commandBuilder.WriteString(" -a ") commandBuilder.WriteString(shellQuote(cred.UserName)) commandBuilder.WriteString(" -w ") commandBuilder.WriteString(shellQuote(cred.Password)) commandBuilder.WriteString("\n") command := commandBuilder.String() // Limit command length as a defensive measure against unreasonably large input. // Keychain itself doesn't have this limit, but extremely long server names or // passwords usually indicate a problem upstream. This limit is conservative // and can be increased if needed. if len(command) > maxSecurityCommandLen { _ = stdin.Close() _ = cmd.Wait() return ErrSetDataTooBig } // Write the command if _, err := io.WriteString(stdin, command); err != nil { _ = stdin.Close() _ = cmd.Wait() return fmt.Errorf("failed to write command: %w", err) } // Close stdin to signal end of input if err = stdin.Close(); err != nil { _ = cmd.Wait() return fmt.Errorf("failed to close stdin: %w", err) } // Wait for the command to complete if err = cmd.Wait(); err != nil { return fmt.Errorf("security add-internet-password failed: %w", err) } return nil } // eraseFromSecurityCLI removes credentials using /usr/bin/security CLI. // Uses delete-internet-password to match the find-internet-password pattern. // The query parameters must match the purego implementation in keyring_darwin.go. func eraseFromSecurityCLI(ctx context.Context, cred *Cred) error { if cred == nil { return errors.New("credential cannot be nil") } if cred.Server == "" { return errors.New("server is required") } // Use delete-internet-password to match find-internet-password // -s: server name (host only, not full URL) - matches purego kSecAttrServer // -r: protocol (4-char code) - matches purego kSecAttrProtocol // -P: port (optional) - matches purego kSecAttrPort // -p: path (optional) - matches purego kSecAttrPath // -a: account name (optional, but ensures precise deletion when multiple accounts exist) args := []string{"delete-internet-password", "-s", cred.Server} // Add protocol if known (matches purego kSecAttrProtocol) if fourCC := protocolFourCC(cred.Protocol); fourCC != "" { args = append(args, "-r", fourCC) } // Add port if specified (matches purego kSecAttrPort) if cred.Port != 0 { args = append(args, "-P", strconv.Itoa(cred.Port)) } // Add path if specified (matches purego kSecAttrPath) if cred.Path != "" { args = append(args, "-p", cred.Path) } // Add account name for more precise deletion if available if cred.UserName != "" { args = append(args, "-a", cred.UserName) } cmd := exec.CommandContext(ctx, securityCLIPath, args...) out, err := cmd.CombinedOutput() if err != nil { // Item not found is not an error - deletion is idempotent if isSecurityNotFoundError(err, out) { return nil } return fmt.Errorf("security delete-internet-password failed: %w, output: %s", err, string(out)) } return nil } ================================================ FILE: modules/keyring/keyring_darwin_security_test.go ================================================ //go:build darwin package keyring import ( "errors" "context" "testing" ) func TestSecurityCLI(t *testing.T) { ctx := context.Background() // Test credential cred := &Cred{ Server: "test.example.com", Protocol: "https", UserName: "testuser", Password: "testpassword123", } // Test 1: Store credential t.Run("Store", func(t *testing.T) { err := storeToSecurityCLI(ctx, cred) if err != nil { t.Fatalf("storeToSecurityCLI failed: %v", err) } t.Log("Store: OK") }) // Test 2: Get credential t.Run("Get", func(t *testing.T) { got, err := getFromSecurityCLI(ctx, cred) if err != nil { t.Fatalf("getFromSecurityCLI failed: %v", err) } if got.UserName != cred.UserName { t.Errorf("username mismatch: got %q, want %q", got.UserName, cred.UserName) } if got.Password != cred.Password { t.Errorf("password mismatch: got %q, want %q", got.Password, cred.Password) } t.Logf("Get: OK - username=%q, password=%q", got.UserName, got.Password) }) // Test 3: Erase credential t.Run("Erase", func(t *testing.T) { err := eraseFromSecurityCLI(ctx, cred) if err != nil { t.Fatalf("eraseFromSecurityCLI failed: %v", err) } t.Log("Erase: OK") }) // Test 4: Get after erase (should return ErrNotFound) t.Run("GetAfterErase", func(t *testing.T) { _, err := getFromSecurityCLI(ctx, cred) if !errors.Is(err, ErrNotFound) { t.Errorf("expected ErrNotFound, got: %v", err) } else { t.Log("GetAfterErase: OK - returned ErrNotFound as expected") } }) // Test 5: Erase again (should be idempotent, return nil) t.Run("EraseAgain", func(t *testing.T) { err := eraseFromSecurityCLI(ctx, cred) if err != nil { t.Errorf("eraseFromSecurityCLI should be idempotent, got error: %v", err) } else { t.Log("EraseAgain: OK - idempotent deletion returned nil") } }) } func TestSecurityCLIWithHTTP(t *testing.T) { ctx := context.Background() cred := &Cred{ Server: "http.example.com", Protocol: "http", UserName: "httpuser", Password: "httppassword", } // Store and verify err := storeToSecurityCLI(ctx, cred) if err != nil { t.Fatalf("storeToSecurityCLI failed: %v", err) } t.Log("Store (HTTP): OK") got, err := getFromSecurityCLI(ctx, cred) if err != nil { t.Fatalf("getFromSecurityCLI failed: %v", err) } t.Logf("Get (HTTP): OK - username=%q, password=%q", got.UserName, got.Password) // Cleanup _ = eraseFromSecurityCLI(ctx, cred) } func TestSecurityCLIWithSpecialChars(t *testing.T) { ctx := context.Background() cred := &Cred{ Server: "special.example.com", Protocol: "https", UserName: "user with spaces", Password: "p@ssw0rd!#$%^&*()", } // Store and verify err := storeToSecurityCLI(ctx, cred) if err != nil { t.Fatalf("storeToSecurityCLI failed: %v", err) } t.Log("Store (special chars): OK") got, err := getFromSecurityCLI(ctx, cred) if err != nil { t.Fatalf("getFromSecurityCLI failed: %v", err) } if got.UserName != cred.UserName { t.Errorf("username mismatch: got %q, want %q", got.UserName, cred.UserName) } if got.Password != cred.Password { t.Errorf("password mismatch: got %q, want %q", got.Password, cred.Password) } t.Logf("Get (special chars): OK - username=%q, password=%q", got.UserName, got.Password) // Cleanup _ = eraseFromSecurityCLI(ctx, cred) } ================================================ FILE: modules/keyring/keyring_darwin_test.go ================================================ //go:build darwin package keyring import ( "errors" "testing" ) func TestGet(t *testing.T) { cred, err := Get(t.Context(), &Cred{Server: "zeta.io"}) if err != nil { if errors.Is(err, ErrNotFound) { t.Skip("no credential found for zeta.io") } t.Fatalf("Get failed: %v", err) } t.Logf("found credential: username=%q, server=%q", cred.UserName, cred.Server) } ================================================ FILE: modules/keyring/keyring_file.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 //go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || windows package keyring import ( "context" "crypto/aes" "crypto/cipher" "crypto/rand" "crypto/sha256" "errors" "fmt" "io" "maps" "os" "os/user" "path/filepath" "slices" "sync" "time" "github.com/antgroup/hugescm/modules/base58" "github.com/antgroup/hugescm/modules/strengthen" "github.com/pelletier/go-toml/v2" "golang.org/x/crypto/hkdf" ) // credentialStorage implements encrypted file-based credential storage. // Credentials are stored in TOML format with each field encrypted separately. type credentialStorage struct { mu sync.Mutex configDir string key []byte storagePath string } // credentialEntry represents a single encrypted credential entry in TOML type credentialEntry struct { Target string `toml:"target"` Username string `toml:"username"` Password string `toml:"password"` } // credentialsFile represents the TOML file structure type credentialsFile struct { Credentials []credentialEntry `toml:"credentials"` } const ( defaultCredentialsFileName = "credentials" nonceSize = 12 lockRetryInterval = 20 * time.Millisecond lockStaleAfter = 2 * time.Minute ) // newCredentialStorage creates a new file-based credential storage. // If encryptionKey is empty, it will be automatically derived from system information. func newCredentialStorage(encryptionKey, storagePath string) (*credentialStorage, error) { configDir, err := getConfigDir() if err != nil { return nil, fmt.Errorf("failed to get config directory: %w", err) } key, err := deriveOrValidateKey(encryptionKey) if err != nil { return nil, err } // Set storage path if storagePath == "" { storagePath = filepath.Join(configDir, defaultCredentialsFileName) } return &credentialStorage{ configDir: configDir, key: key, storagePath: storagePath, }, nil } // deriveOrValidateKey derives or validates the encryption key. // Supports: raw string, base58-encoded, or auto-derived. func deriveOrValidateKey(encryptionKey string) ([]byte, error) { if encryptionKey == "" { return deriveEncryptionKey() } // Try base58 first (project standard). If it decodes to a valid AES key // length, use it directly. Otherwise, treat input as raw string and hash it. if keyBytes := base58.Decode(encryptionKey); len(keyBytes) > 0 { if slices.Contains([]int{16, 24, 32}, len(keyBytes)) { // Use HKDF to derive a 32-byte key for AES-256 // This preserves the full entropy of shorter keys (16 or 24 bytes) // rather than zero-padding which reduces effective security. if len(keyBytes) < 32 { derived := make([]byte, 32) kdf := hkdf.New(sha256.New, keyBytes, nil, []byte("zeta-keyring-v1")) if _, err := io.ReadFull(kdf, derived); err != nil { return nil, fmt.Errorf("failed to derive key: %w", err) } return derived, nil } return keyBytes, nil } } // Fallback: hash the raw string return hashKey(encryptionKey), nil } // hashKey hashes a raw string to a 32-byte key func hashKey(key string) []byte { h := sha256.New() h.Write([]byte(key)) return h.Sum(nil) } // getConfigDir returns the configuration directory path func getConfigDir() (string, error) { homeDir, err := os.UserHomeDir() if err != nil { return "", err } configDir := filepath.Join(homeDir, ".config", "zeta") if err := os.MkdirAll(configDir, 0700); err != nil { return "", fmt.Errorf("failed to create config directory: %w", err) } return configDir, nil } // deriveEncryptionKey derives an AES-256 key from system-specific information. // Key = SHA-256(home_dir || hostname || username) // // SECURITY WARNING: This provides obfuscation-level protection, NOT cryptographic security. // The key is derived from publicly accessible system information (home directory, hostname, // username), which can be easily obtained by an attacker with local access. This prevents // casual snooping but NOT a determined attacker. // // For production use requiring real security, provide an explicit encryption key via // WithEncryptionKey() option, stored securely (e.g., hardware security module, secure // enclave, or user-provided passphrase through a KDF like Argon2 or scrypt). func deriveEncryptionKey() ([]byte, error) { homeDir, err := os.UserHomeDir() if err != nil { return nil, fmt.Errorf("failed to get home directory: %w", err) } hostname, _ := os.Hostname() if hostname == "" { hostname = "unknown" } username := "unknown" if currentUser, err := user.Current(); err == nil { username = currentUser.Username } h := sha256.New() h.Write([]byte(homeDir)) h.Write([]byte(hostname)) h.Write([]byte(username)) return h.Sum(nil), nil } // encrypt encrypts plaintext using AES-256-GCM and returns base58-encoded ciphertext func (s *credentialStorage) encrypt(plaintext string) (string, error) { block, err := aes.NewCipher(s.key) if err != nil { return "", fmt.Errorf("failed to create cipher: %w", err) } gcm, err := cipher.NewGCM(block) if err != nil { return "", fmt.Errorf("failed to create GCM: %w", err) } nonce := make([]byte, nonceSize) if _, err := io.ReadFull(rand.Reader, nonce); err != nil { return "", fmt.Errorf("failed to generate nonce: %w", err) } ciphertext := gcm.Seal(nonce, nonce, []byte(plaintext), nil) return base58.Encode(ciphertext), nil } // decrypt decrypts base58-encoded ciphertext using AES-256-GCM func (s *credentialStorage) decrypt(ciphertext string) (string, error) { data := base58.Decode(ciphertext) if len(data) == 0 { return "", errors.New("failed to decode base58") } if len(data) < nonceSize { return "", errors.New("ciphertext too short") } block, err := aes.NewCipher(s.key) if err != nil { return "", fmt.Errorf("failed to create cipher: %w", err) } gcm, err := cipher.NewGCM(block) if err != nil { return "", fmt.Errorf("failed to create GCM: %w", err) } nonce, ciphertextBytes := data[:nonceSize], data[nonceSize:] plaintext, err := gcm.Open(nil, nonce, ciphertextBytes, nil) if err != nil { return "", fmt.Errorf("failed to decrypt: %w", err) } return string(plaintext), nil } // readCredentials reads all credentials from the TOML file func (s *credentialStorage) readCredentials() (map[string]*Cred, error) { file, err := os.Open(s.storagePath) if err != nil { if os.IsNotExist(err) { return make(map[string]*Cred), nil } return nil, fmt.Errorf("failed to open credentials file: %w", err) } defer file.Close() // nolint var credFile credentialsFile if err := toml.NewDecoder(file).Decode(&credFile); err != nil { return nil, fmt.Errorf("failed to parse credentials file: %w", err) } credentials := make(map[string]*Cred, len(credFile.Credentials)) for _, entry := range credFile.Credentials { cred, ok := s.decryptCredentialEntry(entry) if !ok { continue // Skip unparseable entries } credentials[cred.target] = cred.Cred } return credentials, nil } // decryptedCredential holds a decrypted credential with its target type decryptedCredential struct { *Cred target string } // decryptCredentialEntry decrypts a credential entry func (s *credentialStorage) decryptCredentialEntry(entry credentialEntry) (*decryptedCredential, bool) { target, err := s.decrypt(entry.Target) if err != nil { return nil, false } username, err := s.decrypt(entry.Username) if err != nil { return nil, false } password, err := s.decrypt(entry.Password) if err != nil { return nil, false } cred := parseTargetName(target) cred.UserName = username cred.Password = password return &decryptedCredential{Cred: cred, target: target}, true } // writeCredentials writes all credentials to the TOML file func (s *credentialStorage) writeCredentials(credentials map[string]*Cred) error { credFile := credentialsFile{ Credentials: make([]credentialEntry, 0, len(credentials)), } // Use maps.Keys for deterministic iteration (Go 1.23+) // Build entries in sorted order for reproducible output keys := slices.Sorted(maps.Keys(credentials)) for _, target := range keys { cred := credentials[target] entry, err := s.encryptCredentialEntry(target, cred) if err != nil { return err } credFile.Credentials = append(credFile.Credentials, entry) } storageDir := filepath.Dir(s.storagePath) if err := os.MkdirAll(storageDir, 0700); err != nil { return fmt.Errorf("failed to create storage directory: %w", err) } // Write to a temporary fd and rename atomically to avoid partial/truncated writes. fd, err := os.CreateTemp(storageDir, filepath.Base(s.storagePath)+".tmp-*") if err != nil { return fmt.Errorf("failed to create temporary credentials file: %w", err) } tmpPath := fd.Name() defer func() { _ = os.Remove(tmpPath) }() if err := fd.Chmod(0600); err != nil { _ = fd.Close() return fmt.Errorf("failed to set temporary credentials file permission: %w", err) } if err := toml.NewEncoder(fd).Encode(credFile); err != nil { _ = fd.Close() return fmt.Errorf("failed to encode credentials to TOML: %w", err) } if err := fd.Sync(); err != nil { _ = fd.Close() return fmt.Errorf("failed to sync temporary credentials file: %w", err) } if err := fd.Close(); err != nil { return fmt.Errorf("failed to close temporary credentials file: %w", err) } if err := strengthen.FinalizeObject(tmpPath, s.storagePath); err != nil { return fmt.Errorf("failed to replace credentials file: %w", err) } return nil } // acquireFileLock acquires a cross-process lock by creating an exclusive lock file. func (s *credentialStorage) acquireFileLock(ctx context.Context) (func(), error) { lockPath := s.storagePath + ".lock" for { lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600) if err == nil { _, _ = io.WriteString(lockFile, fmt.Sprintf("%d\n", os.Getpid())) _ = lockFile.Close() return func() { _ = os.Remove(lockPath) }, nil } if !os.IsExist(err) { return nil, fmt.Errorf("failed to acquire file lock: %w", err) } if staleInfo, statErr := os.Stat(lockPath); statErr == nil && time.Since(staleInfo.ModTime()) > lockStaleAfter { _ = os.Remove(lockPath) continue } if err := ctx.Err(); err != nil { return nil, err } time.Sleep(lockRetryInterval) } } // encryptCredentialEntry encrypts a credential entry func (s *credentialStorage) encryptCredentialEntry(target string, cred *Cred) (credentialEntry, error) { encryptedTarget, err := s.encrypt(target) if err != nil { return credentialEntry{}, fmt.Errorf("failed to encrypt target: %w", err) } encryptedUsername, err := s.encrypt(cred.UserName) if err != nil { return credentialEntry{}, fmt.Errorf("failed to encrypt username: %w", err) } encryptedPassword, err := s.encrypt(cred.Password) if err != nil { return credentialEntry{}, fmt.Errorf("failed to encrypt password: %w", err) } return credentialEntry{ Target: encryptedTarget, Username: encryptedUsername, Password: encryptedPassword, }, nil } // Get retrieves credentials from the file storage func (s *credentialStorage) Get(ctx context.Context, cred *Cred) (*Cred, error) { if err := ctx.Err(); err != nil { return nil, err } s.mu.Lock() defer s.mu.Unlock() credentials, err := s.readCredentials() if err != nil { return nil, err } target := buildTargetName(cred) stored, ok := credentials[target] if !ok { return nil, ErrNotFound } return stored, nil } // Store saves credentials to the file storage func (s *credentialStorage) Store(ctx context.Context, cred *Cred) error { if err := ctx.Err(); err != nil { return err } if cred == nil || cred.UserName == "" || cred.Password == "" { return errors.New("invalid credential") } s.mu.Lock() defer s.mu.Unlock() releaseLock, err := s.acquireFileLock(ctx) if err != nil { return err } defer releaseLock() credentials, err := s.readCredentials() if err != nil { return err } credentials[buildTargetName(cred)] = cred return s.writeCredentials(credentials) } // Erase removes credentials from the file storage func (s *credentialStorage) Erase(ctx context.Context, cred *Cred) error { if err := ctx.Err(); err != nil { return err } s.mu.Lock() defer s.mu.Unlock() releaseLock, err := s.acquireFileLock(ctx) if err != nil { return err } defer releaseLock() credentials, err := s.readCredentials() if err != nil { return err } target := buildTargetName(cred) if _, ok := credentials[target]; !ok { return nil } delete(credentials, target) return s.writeCredentials(credentials) } // Name returns the storage name func (s *credentialStorage) Name() string { return "file" } ================================================ FILE: modules/keyring/keyring_file_test.go ================================================ //go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || windows package keyring import ( "bytes" "context" "errors" "os" "path/filepath" "testing" "time" ) func TestDeriveOrValidateKeyRawStringFallback(t *testing.T) { key, err := deriveOrValidateKey("password") if err != nil { t.Fatalf("deriveOrValidateKey returned error: %v", err) } expected := hashKey("password") if !bytes.Equal(key, expected) { t.Fatalf("unexpected key derivation for raw string") } } func TestCredentialStorageEraseIsIdempotent(t *testing.T) { storagePath := filepath.Join(t.TempDir(), "credentials") storage, err := newCredentialStorage("my-secret-key", storagePath) if err != nil { t.Fatalf("newCredentialStorage failed: %v", err) } cred := &Cred{Protocol: "https", Server: "example.com", UserName: "u", Password: "p"} if err := storage.Erase(t.Context(), cred); err != nil { t.Fatalf("Erase on non-existing credential should succeed, got: %v", err) } if err := storage.Store(t.Context(), cred); err != nil { t.Fatalf("Store failed: %v", err) } if err := storage.Erase(t.Context(), cred); err != nil { t.Fatalf("Erase failed: %v", err) } if _, err := storage.Get(t.Context(), cred); !errors.Is(err, ErrNotFound) { t.Fatalf("expected ErrNotFound after erase, got: %v", err) } } func TestAcquireFileLockTimeout(t *testing.T) { storagePath := filepath.Join(t.TempDir(), "credentials") storage, err := newCredentialStorage("my-secret-key", storagePath) if err != nil { t.Fatalf("newCredentialStorage failed: %v", err) } lockPath := storagePath + ".lock" if err := os.WriteFile(lockPath, []byte("busy"), 0600); err != nil { t.Fatalf("failed to create lock file: %v", err) } ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) defer cancel() _, err = storage.acquireFileLock(ctx) if !errors.Is(err, context.DeadlineExceeded) { t.Fatalf("expected context deadline exceeded, got: %v", err) } } func TestAcquireFileLockBreaksStaleLock(t *testing.T) { storagePath := filepath.Join(t.TempDir(), "credentials") storage, err := newCredentialStorage("my-secret-key", storagePath) if err != nil { t.Fatalf("newCredentialStorage failed: %v", err) } lockPath := storagePath + ".lock" if err := os.WriteFile(lockPath, []byte("stale"), 0600); err != nil { t.Fatalf("failed to create stale lock file: %v", err) } old := time.Now().Add(-lockStaleAfter - time.Second) if err := os.Chtimes(lockPath, old, old); err != nil { t.Fatalf("failed to set stale lock file mtime: %v", err) } release, err := storage.acquireFileLock(t.Context()) if err != nil { t.Fatalf("acquireFileLock failed for stale lock: %v", err) } release() if _, err := os.Stat(lockPath); !os.IsNotExist(err) { t.Fatalf("expected lock file to be removed, got: %v", err) } } ================================================ FILE: modules/keyring/keyring_test.go ================================================ package keyring import ( "fmt" "os" "testing" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" ) const ( service = "test-service" testuser = "test-user" password = "test-password" ) func TestBuildTargetName(t *testing.T) { tests := []struct { name string cred *Cred expected string }{ { name: "basic https", cred: &Cred{ Protocol: "https", Server: "example.com", }, expected: "zeta+https://example.com", }, { name: "with port", cred: &Cred{ Protocol: "https", Server: "example.com", Port: 8080, }, expected: "zeta+https://example.com:8080", }, { name: "with path", cred: &Cred{ Protocol: "https", Server: "example.com", Path: "/repo", }, expected: "zeta+https://example.com/repo", }, { name: "with port and path", cred: &Cred{ Protocol: "https", Server: "example.com", Port: 8080, Path: "/repo", }, expected: "zeta+https://example.com:8080/repo", }, { name: "empty protocol defaults to https", cred: &Cred{ Server: "example.com", }, expected: "zeta+https://example.com", }, { name: "http protocol", cred: &Cred{ Protocol: "http", Server: "example.com", }, expected: "zeta+http://example.com", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := buildTargetName(tt.cred) if result != tt.expected { t.Errorf("buildTargetName() = %q, want %q", result, tt.expected) } }) } } func TestParseTargetName(t *testing.T) { tests := []struct { name string target string expected *Cred }{ { name: "basic https", target: "zeta+https://example.com", expected: &Cred{ Protocol: "https", Server: "example.com", }, }, { name: "with port", target: "zeta+https://example.com:8080", expected: &Cred{ Protocol: "https", Server: "example.com", Port: 8080, }, }, { name: "with path", target: "zeta+https://example.com/repo", expected: &Cred{ Protocol: "https", Server: "example.com", Path: "/repo", }, }, { name: "with port and path", target: "zeta+https://example.com:8080/repo", expected: &Cred{ Protocol: "https", Server: "example.com", Port: 8080, Path: "/repo", }, }, { name: "http protocol", target: "zeta+http://example.com", expected: &Cred{ Protocol: "http", Server: "example.com", }, }, { name: "invalid format without zeta prefix", target: "example.com", expected: &Cred{ Server: "example.com", }, }, { name: "invalid format without protocol separator", target: "zeta+example.com", expected: &Cred{ Server: "zeta+example.com", }, }, { name: "ipv6 address", target: "zeta+https://[::1]:8080", expected: &Cred{ Protocol: "https", Server: "::1", Port: 8080, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := parseTargetName(tt.target) if result.Protocol != tt.expected.Protocol { t.Errorf("Protocol = %q, want %q", result.Protocol, tt.expected.Protocol) } if result.Server != tt.expected.Server { t.Errorf("Server = %q, want %q", result.Server, tt.expected.Server) } if result.Port != tt.expected.Port { t.Errorf("Port = %d, want %d", result.Port, tt.expected.Port) } if result.Path != tt.expected.Path { t.Errorf("Path = %q, want %q", result.Path, tt.expected.Path) } }) } } func TestBuildAndParseTargetName(t *testing.T) { tests := []struct { name string cred *Cred }{ { name: "basic https", cred: &Cred{ Protocol: "https", Server: "example.com", }, }, { name: "with port", cred: &Cred{ Protocol: "https", Server: "example.com", Port: 8080, }, }, { name: "with path", cred: &Cred{ Protocol: "https", Server: "example.com", Path: "/repo/project", }, }, { name: "with port and path", cred: &Cred{ Protocol: "https", Server: "git.example.com", Port: 22, Path: "/org/repo.git", }, }, { name: "http protocol", cred: &Cred{ Protocol: "http", Server: "localhost", Port: 3000, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { target := buildTargetName(tt.cred) result := parseTargetName(target) if result.Protocol != tt.cred.Protocol { t.Errorf("Protocol = %q, want %q", result.Protocol, tt.cred.Protocol) } if result.Server != tt.cred.Server { t.Errorf("Server = %q, want %q", result.Server, tt.cred.Server) } if result.Port != tt.cred.Port { t.Errorf("Port = %d, want %d", result.Port, tt.cred.Port) } if result.Path != tt.cred.Path { t.Errorf("Path = %q, want %q", result.Path, tt.cred.Path) } }) } } // TestStore tests setting a user and password in keyring. func TestStore(t *testing.T) { cred := NewCredFromURL("https://" + service) cred.UserName = testuser cred.Password = password err := Store(t.Context(), cred) if err != nil { t.Errorf("Should not fail, got: %s", err) } } func TestEncodePassword(t *testing.T) { encoder := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewEncoder() encodedCred, _, err := transform.Bytes(encoder, []byte("My Password 你好 🦚")) if err != nil { fmt.Fprintf(os.Stderr, "my password: %v\n", err) return } fmt.Fprintf(os.Stderr, "%x\n", encodedCred) dec := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder() password, _, err := transform.Bytes(dec, encodedCred) if err != nil { fmt.Fprintf(os.Stderr, "my password: %v\n", err) return } fmt.Fprintf(os.Stderr, "Password: %v\n", string(password)) } ================================================ FILE: modules/keyring/keyring_unix.go ================================================ //go:build dragonfly || freebsd || linux || netbsd || openbsd // Package keyring provides cross-platform credential storage for Zeta. // This file implements Unix/Linux storage with configurable storage storages. // // Linux Behavior: // - By default (storage="auto"): Does NOT store credentials unless explicitly configured // - To enable storage, set: zeta config credential.storage secret-service // - Or set environment variable: ZETA_CREDENTIAL_STORAGE=secret-service // // This design avoids DBUS errors on systems without Secret Service. package keyring import ( "context" "errors" "fmt" "strings" ss "github.com/antgroup/hugescm/modules/keyring/secret_service" dbus "github.com/godbus/dbus/v5" ) // Constants for Unix/Linux systems const ( // zetaUserName is the fixed username used for all stored credentials. // We use a constant username and encode the actual username in the credential data. zetaUserName = "zeta-credential-manager" // maxUnixUserNameLength is the maximum username length for Unix/Linux systems. // Matched with Windows CRED_MAX_USERNAME_LENGTH for consistency. maxUnixUserNameLength = 513 // maxUnixPasswordLength is the maximum password length for Unix/Linux systems. // While there's no theoretical limit, performance suffers with big values (>100KiB). // We set a reasonable limit of 100KiB. maxUnixPasswordLength = 100 * 1024 // 100 KiB ) // Storage mode constants for Unix/Linux const ( storageSecretService = "secret-service" ) // storageConfig holds configuration for credential storage type storageConfig struct { mode string encryptionKey string storagePath string } // resolveStorageConfig determines the credential storage configuration. // Priority: opts parameters > default (none) // Note: Environment variables are already handled by upper layer (repository.go) func resolveStorageConfig(opts ...Option) *storageConfig { options := resolveStorageOptions(opts...) cfg := &storageConfig{ mode: strings.ToLower(strings.TrimSpace(options.Storage)), encryptionKey: options.EncryptionKey, storagePath: options.StoragePath, } // Default to "none" if not configured if cfg.mode == "" { cfg.mode = storageNone } return cfg } // getCredentialStorageWithConfig returns a credential storage instance with the given config. func getCredentialStorageWithConfig(cfg *storageConfig) (*credentialStorage, error) { return newCredentialStorage(cfg.encryptionKey, cfg.storagePath) } // Get retrieves credentials from the configured storage. // On Linux, this will only attempt to read if storage is configured. // Returns ErrNotFound if credential doesn't exist or storage is disabled. func Get(ctx context.Context, cred *Cred, opts ...Option) (*Cred, error) { if ctx.Err() != nil { return nil, ctx.Err() } if cred == nil { return nil, errors.New("credential cannot be nil") } cfg := resolveStorageConfig(opts...) mode := cfg.mode switch mode { case storageNone, storageAuto: // For "auto" or "none", don't attempt to read by default // This prevents DBUS errors on systems without Secret Service return nil, ErrNotFound case storageSecretService: return getFromSecretService(cred) case storageFile: storage, err := getCredentialStorageWithConfig(cfg) if err != nil { return nil, fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Get(ctx, cred) default: // Unknown storage mode, treat as disabled return nil, ErrNotFound } } // Store saves credentials to the configured storage. // On Linux, this will only attempt to store if storage is explicitly configured. // Returns ErrStorageDisabled if storage is not enabled. func Store(ctx context.Context, cred *Cred, opts ...Option) error { if ctx.Err() != nil { return ctx.Err() } if cred == nil { return errors.New("credential cannot be nil") } // Validate input if cred.UserName == "" { return errors.New("username cannot be empty") } if cred.Password == "" { return errors.New("password cannot be empty") } if cred.Server == "" { return errors.New("server cannot be empty") } // Validate username cannot contain null byte if strings.Contains(cred.UserName, "\x00") { return errors.New("invalid username: contains null byte") } // Validate size limits if len(cred.UserName) > maxUnixUserNameLength { return fmt.Errorf("username too long (max %d bytes)", maxUnixUserNameLength) } if len(cred.Password) > maxUnixPasswordLength { return fmt.Errorf("password too long (max %d bytes)", maxUnixPasswordLength) } cfg := resolveStorageConfig(opts...) mode := cfg.mode switch mode { case storageNone, storageAuto: // For "auto" or "none", don't store credentials by default // This prevents DBUS errors and is the safe default for Linux return ErrStorageDisabled case storageSecretService: return storeToSecretService(cred) case storageFile: storage, err := getCredentialStorageWithConfig(cfg) if err != nil { return fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Store(ctx, cred) default: // Unknown storage mode, treat as disabled return ErrStorageDisabled } } // Erase removes credentials from the configured storage. // Returns ErrStorageDisabled if storage is not enabled. func Erase(ctx context.Context, cred *Cred, opts ...Option) error { if ctx.Err() != nil { return ctx.Err() } if cred == nil { return errors.New("credential cannot be nil") } cfg := resolveStorageConfig(opts...) mode := cfg.mode switch mode { case storageNone, storageAuto: return ErrStorageDisabled case storageSecretService: return eraseFromSecretService(cred) case storageFile: storage, err := getCredentialStorageWithConfig(cfg) if err != nil { return fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Erase(ctx, cred) default: return ErrStorageDisabled } } // getFromSecretService retrieves credentials from libsecret (Secret Service API). // Note: libsecret API is synchronous and doesn't support context cancellation. func getFromSecretService(cred *Cred) (*Cred, error) { svc, err := ss.NewSecretService() if err != nil { return nil, fmt.Errorf("failed to connect to secret service: %w", err) } targetName := buildTargetName(cred) item, err := findItem(svc, targetName, zetaUserName) if err != nil { return nil, err } // Open a session to retrieve the secret session, err := svc.OpenSession() if err != nil { return nil, fmt.Errorf("failed to open session: %w", err) } defer svc.Close(session) // Unlock the item if it's locked if err := svc.Unlock(item); err != nil { return nil, fmt.Errorf("failed to unlock item: %w", err) } // Retrieve the secret secret, err := svc.GetSecret(item, session.Path()) if err != nil { return nil, fmt.Errorf("failed to get secret: %w", err) } // Parse the credential data (username + null byte + password) userName, password, ok := strings.Cut(string(secret.Value), "\x00") if !ok { return nil, errors.New("invalid credential format") } // Validate password if password == "" { return nil, errors.New("invalid credential: empty password not allowed") } // Return credential with all fields return &Cred{ UserName: userName, Password: password, Protocol: cred.Protocol, Server: cred.Server, Port: cred.Port, Path: cred.Path, }, nil } // storeToSecretService saves credentials in libsecret (Secret Service API). // Note: libsecret API is synchronous and doesn't support context cancellation. func storeToSecretService(cred *Cred) error { svc, err := ss.NewSecretService() if err != nil { return fmt.Errorf("failed to connect to secret service: %w", err) } // Open a session session, err := svc.OpenSession() if err != nil { return fmt.Errorf("failed to open session: %w", err) } defer svc.Close(session) targetName := buildTargetName(cred) // Build attributes for searching the credential attributes := map[string]string{ "username": zetaUserName, "service": targetName, } // Create secret object secret := ss.NewSecret(session.Path(), cred.Password) // Get login collection collection := svc.GetLoginCollection() // Unlock the collection if err := svc.Unlock(collection.Path()); err != nil { return fmt.Errorf("failed to unlock collection: %w", err) } // Encode credential data (username + null byte + password) body := fmt.Sprintf("%s\x00%s", cred.UserName, cred.Password) // Create or update the item secret.Value = []byte(body) // Try to create the item err = svc.CreateItem( collection, fmt.Sprintf("Zeta credential for %s", cred.Server), attributes, secret, ) if err != nil { // Item might already exist, try to update it item, findErr := findItem(svc, targetName, zetaUserName) if findErr != nil { return fmt.Errorf("failed to create item: %w", err) } if err := svc.Delete(item); err != nil { return fmt.Errorf("failed to delete existing item: %w", err) } // Try creating again if err := svc.CreateItem( collection, fmt.Sprintf("Zeta credential for %s", cred.Server), attributes, secret, ); err != nil { return fmt.Errorf("failed to create item after delete: %w", err) } } return nil } // eraseFromSecretService removes credentials from libsecret (Secret Service API). // Note: libsecret API is synchronous and doesn't support context cancellation. func eraseFromSecretService(cred *Cred) error { svc, err := ss.NewSecretService() if err != nil { return fmt.Errorf("failed to connect to secret service: %w", err) } targetName := buildTargetName(cred) item, err := findItem(svc, targetName, zetaUserName) if err != nil { if errors.Is(err, ErrNotFound) { return nil } return err } if err := svc.Delete(item); err != nil { return fmt.Errorf("failed to delete item: %w", err) } return nil } // findItem searches for an item in libsecret by service and username. func findItem(svc *ss.SecretService, service, user string) (dbus.ObjectPath, error) { collection := svc.GetLoginCollection() search := map[string]string{ "username": user, "service": service, } if err := svc.Unlock(collection.Path()); err != nil { return "", fmt.Errorf("failed to unlock collection: %w", err) } results, err := svc.SearchItems(collection, search) if err != nil { return "", fmt.Errorf("failed to search items: %w", err) } if len(results) == 0 { return "", ErrNotFound } return results[0], nil } ================================================ FILE: modules/keyring/keyring_windows.go ================================================ //go:build windows // Package keyring provides cross-platform credential storage for Zeta. // This file implements the Windows backend using Windows Credential Manager. // Default: Uses Windows Credential Manager API // Alternative: Set storage="file" to use encrypted file storage package keyring import ( "context" "errors" "fmt" "strings" "syscall" "unsafe" "golang.org/x/sys/windows" ) // Constants for Windows Credential Manager const ( // CRED_MAX_USERNAME_LENGTH is the maximum username length in Windows. // Source: https://learn.microsoft.com/en-us/windows/win32/api/wincred/ns-wincred-credentiala CRED_MAX_USERNAME_LENGTH = 513 // CRED_MAX_GENERIC_TARGET_NAME_LENGTH is the maximum target name length. CRED_MAX_GENERIC_TARGET_NAME_LENGTH = 32767 // CRED_MAX_CREDENTIAL_BLOB_SIZE is the maximum size of CredentialBlob in bytes. // Note: The official CRED_MAX_CREDENTIAL_BLOB_SIZE (512) applies to domain credentials. // For CRED_TYPE_GENERIC, the practical limit is higher (typically 2560 bytes). // We use the higher limit for generic credentials to match git-credential-manager behavior. CRED_MAX_CREDENTIAL_BLOB_SIZE = 2560 // CRED_TYPE_GENERIC is the credential type for generic credentials. CRED_TYPE_GENERIC = 1 // CRED_PERSIST_LOCAL_MACHINE stores the credential in the local machine. CRED_PERSIST_LOCAL_MACHINE = 2 // CRED_PERSIST_SESSION stores the credential for the session only. CRED_PERSIST_SESSION = 1 ) // Windows API constants var ( // advapi32.dll functions modadvapi32 = windows.NewLazySystemDLL("advapi32.dll") procCredWriteW = modadvapi32.NewProc("CredWriteW") procCredReadW = modadvapi32.NewProc("CredReadW") procCredDeleteW = modadvapi32.NewProc("CredDeleteW") procCredFree = modadvapi32.NewProc("CredFree") // Error codes ERROR_NOT_FOUND = syscall.Errno(1168) // ERROR_NOT_FOUND ) // CREDENTIALW is the Windows credential structure. // Source: https://learn.microsoft.com/en-us/windows/win32/api/wincred/ns-wincred-credentialw type CREDENTIALW struct { Flags uint32 Type uint32 TargetName *uint16 Comment *uint16 LastWritten windows.Filetime CredentialBlobSize uint32 CredentialBlob *byte Persist uint32 AttributeCount uint32 Attributes uintptr TargetAlias *uint16 UserName *uint16 } // Get retrieves credentials from the configured storage backend. // Default uses Windows Credential Manager. // Set opts storage="file" to use encrypted file storage. func Get(ctx context.Context, cred *Cred, opts ...Option) (*Cred, error) { if ctx.Err() != nil { return nil, ctx.Err() } if cred == nil { return nil, errors.New("credential cannot be nil") } options := resolveStorageOptions(opts...) switch options.Storage { case storageAuto: return getFromCred(ctx, cred) case storageFile: storage, err := newCredentialStorage(options.EncryptionKey, options.StoragePath) if err != nil { return nil, fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Get(ctx, cred) case storageNone: return nil, ErrNotFound default: return nil, fmt.Errorf("unknown storage mode: %s", options.Storage) } } // getFromCred retrieves credentials using Windows Credential Manager. func getFromCred(ctx context.Context, cred *Cred) (*Cred, error) { // Check context before starting select { case <-ctx.Done(): return nil, ctx.Err() default: } targetName := buildTargetName(cred) if targetName == "" { return nil, errors.New("invalid credential: target name cannot be empty") } // Convert target name to UTF-16 targetNameUTF16, err := windows.UTF16PtrFromString(targetName) if err != nil { return nil, fmt.Errorf("failed to convert target name to UTF-16: %w", err) } // Prepare credential buffer var result *CREDENTIALW // Read credential ret, _, err := procCredReadW.Call( uintptr(unsafe.Pointer(targetNameUTF16)), CRED_TYPE_GENERIC, 0, // Flags uintptr(unsafe.Pointer(&result)), ) if ret == 0 { // Windows syscall returns errno as err, check it explicitly if errno, ok := err.(syscall.Errno); ok && errno == ERROR_NOT_FOUND { return nil, ErrNotFound } return nil, fmt.Errorf("failed to read credential: %w", err) } defer procCredFree.Call(uintptr(unsafe.Pointer(result))) // Extract username username := cred.UserName if result.UserName != nil { username = windows.UTF16PtrToString(result.UserName) } // Extract password if result.CredentialBlob == nil || result.CredentialBlobSize == 0 { return nil, errors.New("password cannot be empty") } passwordRaw := unsafe.Slice(result.CredentialBlob, result.CredentialBlobSize) password := string(passwordRaw) return &Cred{ UserName: username, Password: password, Protocol: cred.Protocol, Server: cred.Server, Port: cred.Port, Path: cred.Path, }, nil } // Store saves credentials to the configured storage backend. // Default uses Windows Credential Manager. // Set opts storage="file" to use encrypted file storage. func Store(ctx context.Context, cred *Cred, opts ...Option) error { if ctx.Err() != nil { return ctx.Err() } if cred == nil { return errors.New("credential cannot be nil") } // Validate input if cred.UserName == "" { return errors.New("username cannot be empty") } if cred.Password == "" { return errors.New("password cannot be empty") } if cred.Server == "" { return errors.New("server cannot be empty") } // Validate username cannot contain null byte if strings.Contains(cred.UserName, "\x00") { return errors.New("invalid username: contains null byte") } options := resolveStorageOptions(opts...) switch options.Storage { case storageAuto: return storeToCred(ctx, cred) case storageFile: storage, err := newCredentialStorage(options.EncryptionKey, options.StoragePath) if err != nil { return fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Store(ctx, cred) case storageNone: return ErrStorageDisabled default: return fmt.Errorf("unknown storage mode: %s", options.Storage) } } // storeToCred stores credentials using Windows Credential Manager. func storeToCred(ctx context.Context, cred *Cred) error { // Check context before starting select { case <-ctx.Done(): return ctx.Err() default: } // Validate size limits if len(cred.UserName) > CRED_MAX_USERNAME_LENGTH { return fmt.Errorf("username too long (max %d bytes)", CRED_MAX_USERNAME_LENGTH) } targetName := buildTargetName(cred) if targetName == "" { return errors.New("invalid credential: target name cannot be empty") } // Validate target name length if len(targetName) > CRED_MAX_GENERIC_TARGET_NAME_LENGTH { return fmt.Errorf("target name too long (max %d bytes)", CRED_MAX_GENERIC_TARGET_NAME_LENGTH) } // Convert target name and username to UTF-16 targetNameUTF16, err := windows.UTF16PtrFromString(targetName) if err != nil { return fmt.Errorf("failed to convert target name to UTF-16: %w", err) } userNameUTF16, err := windows.UTF16PtrFromString(cred.UserName) if err != nil { return fmt.Errorf("failed to convert username to UTF-16: %w", err) } commentStr := fmt.Sprintf("Zeta credential for %s", cred.Server) commentUTF16, err := windows.UTF16PtrFromString(commentStr) if err != nil { return fmt.Errorf("failed to convert comment to UTF-16: %w", err) } password := []byte(cred.Password) if len(password) > CRED_MAX_CREDENTIAL_BLOB_SIZE { return fmt.Errorf("password too long (max %d bytes)", CRED_MAX_CREDENTIAL_BLOB_SIZE) } // Prepare credential structure c := CREDENTIALW{ Type: CRED_TYPE_GENERIC, Persist: CRED_PERSIST_LOCAL_MACHINE, TargetName: targetNameUTF16, UserName: userNameUTF16, CredentialBlobSize: uint32(len(password)), Comment: commentUTF16, } if len(password) > 0 { c.CredentialBlob = &password[0] } // Write credential ret, _, err := procCredWriteW.Call( uintptr(unsafe.Pointer(&c)), 0, // Flags ) if ret == 0 { return fmt.Errorf("failed to write credential: %w", err) } return nil } // Erase removes credentials from the configured storage backend. // Default uses Windows Credential Manager. // Set opts storage="file" to use encrypted file storage. func Erase(ctx context.Context, cred *Cred, opts ...Option) error { if ctx.Err() != nil { return ctx.Err() } if cred == nil { return errors.New("credential cannot be nil") } options := resolveStorageOptions(opts...) switch options.Storage { case storageAuto: return eraseFromCred(ctx, cred) case storageFile: storage, err := newCredentialStorage(options.EncryptionKey, options.StoragePath) if err != nil { return fmt.Errorf("failed to initialize file storage: %w", err) } return storage.Erase(ctx, cred) case storageNone: return ErrStorageDisabled default: return fmt.Errorf("unknown storage mode: %s", options.Storage) } } // eraseFromCred removes credentials using Windows Credential Manager. func eraseFromCred(ctx context.Context, cred *Cred) error { // Check context before starting select { case <-ctx.Done(): return ctx.Err() default: } targetName := buildTargetName(cred) if targetName == "" { return errors.New("invalid credential: target name cannot be empty") } // Convert target name to UTF-16 targetNameUTF16, err := windows.UTF16PtrFromString(targetName) if err != nil { return fmt.Errorf("failed to convert target name to UTF-16: %w", err) } // Delete credential ret, _, err := procCredDeleteW.Call( uintptr(unsafe.Pointer(targetNameUTF16)), CRED_TYPE_GENERIC, 0, // Flags ) if ret == 0 { // Windows syscall returns errno as err, check it explicitly if errno, ok := err.(syscall.Errno); ok && errno == ERROR_NOT_FOUND { return nil } return fmt.Errorf("failed to delete credential: %w", err) } return nil } ================================================ FILE: modules/keyring/secret_service/secret_service.go ================================================ package ss import ( "fmt" "slices" "errors" dbus "github.com/godbus/dbus/v5" ) const ( serviceName = "org.freedesktop.secrets" servicePath = "/org/freedesktop/secrets" serviceInterface = "org.freedesktop.Secret.Service" collectionInterface = "org.freedesktop.Secret.Collection" collectionsInterface = "org.freedesktop.Secret.Service.Collections" itemInterface = "org.freedesktop.Secret.Item" sessionInterface = "org.freedesktop.Secret.Session" promptInterface = "org.freedesktop.Secret.Prompt" loginCollectionAlias = "/org/freedesktop/secrets/aliases/default" collectionBasePath = "/org/freedesktop/secrets/collection/" ) // Secret defines a org.freedesktop.Secret.Item secret struct. type Secret struct { Session dbus.ObjectPath Parameters []byte Value []byte ContentType string `dbus:"content_type"` } // NewSecret initializes a new Secret. func NewSecret(session dbus.ObjectPath, secret string) Secret { return Secret{ Session: session, Parameters: []byte{}, Value: []byte(secret), ContentType: "text/plain; charset=utf8", } } // SecretService is an interface for the Secret Service dbus API. type SecretService struct { *dbus.Conn object dbus.BusObject } // NewSecretService inializes a new SecretService object. func NewSecretService() (*SecretService, error) { conn, err := dbus.SessionBus() if err != nil { return nil, err } return &SecretService{ conn, conn.Object(serviceName, servicePath), }, nil } // OpenSession opens a secret service session. func (s *SecretService) OpenSession() (dbus.BusObject, error) { var disregard dbus.Variant var sessionPath dbus.ObjectPath err := s.object.Call(serviceInterface+".OpenSession", 0, "plain", dbus.MakeVariant("")).Store(&disregard, &sessionPath) if err != nil { return nil, err } return s.Object(serviceName, sessionPath), nil } // CheckCollectionPath accepts dbus path and returns nil if the path is found // in the collection interface (and can be used). func (s *SecretService) CheckCollectionPath(path dbus.ObjectPath) error { obj := s.Object(serviceName, servicePath) val, err := obj.GetProperty(collectionsInterface) if err != nil { return err } paths := val.Value().([]dbus.ObjectPath) if slices.Contains(paths, path) { return nil } return errors.New("path not found") } // GetCollection returns a collection from a name. func (s *SecretService) GetCollection(name string) dbus.BusObject { return s.Object(serviceName, dbus.ObjectPath(collectionBasePath+name)) } // GetLoginCollection decides and returns the dbus collection to be used for login. func (s *SecretService) GetLoginCollection() dbus.BusObject { path := dbus.ObjectPath(collectionBasePath + "login") if err := s.CheckCollectionPath(path); err != nil { path = dbus.ObjectPath(loginCollectionAlias) } return s.Object(serviceName, path) } // Unlock unlocks a collection. func (s *SecretService) Unlock(collection dbus.ObjectPath) error { var unlocked []dbus.ObjectPath var prompt dbus.ObjectPath err := s.object.Call(serviceInterface+".Unlock", 0, []dbus.ObjectPath{collection}).Store(&unlocked, &prompt) if err != nil { return err } _, v, err := s.handlePrompt(prompt) if err != nil { return err } collections := v.Value() switch c := collections.(type) { case []dbus.ObjectPath: unlocked = append(unlocked, c...) } if len(unlocked) != 1 || (collection != loginCollectionAlias && unlocked[0] != collection) { return fmt.Errorf("failed to unlock correct collection '%v'", collection) } return nil } // Close closes a secret service dbus session. func (s *SecretService) Close(session dbus.BusObject) error { return session.Call(sessionInterface+".Close", 0).Err } // CreateCollection with the supplied label. func (s *SecretService) CreateCollection(label string) (dbus.BusObject, error) { properties := map[string]dbus.Variant{ collectionInterface + ".Label": dbus.MakeVariant(label), } var collection, prompt dbus.ObjectPath err := s.object.Call(serviceInterface+".CreateCollection", 0, properties, ""). Store(&collection, &prompt) if err != nil { return nil, err } _, v, err := s.handlePrompt(prompt) if err != nil { return nil, err } if v.String() != "" { collection = dbus.ObjectPath(v.String()) } return s.Object(serviceName, collection), nil } // CreateItem creates an item in a collection, with label, attributes and a // related secret. func (s *SecretService) CreateItem(collection dbus.BusObject, label string, attributes map[string]string, secret Secret) error { properties := map[string]dbus.Variant{ itemInterface + ".Label": dbus.MakeVariant(label), itemInterface + ".Attributes": dbus.MakeVariant(attributes), } var item, prompt dbus.ObjectPath err := collection.Call(collectionInterface+".CreateItem", 0, properties, secret, true).Store(&item, &prompt) if err != nil { return err } _, _, err = s.handlePrompt(prompt) if err != nil { return err } return nil } // handlePrompt checks if a prompt should be handles and handles it by // triggering the prompt and waiting for the Sercret service daemon to display // the prompt to the user. func (s *SecretService) handlePrompt(prompt dbus.ObjectPath) (bool, dbus.Variant, error) { if prompt != dbus.ObjectPath("/") { err := s.AddMatchSignal(dbus.WithMatchObjectPath(prompt), dbus.WithMatchInterface(promptInterface), ) if err != nil { return false, dbus.MakeVariant(""), err } defer func(s *SecretService, options ...dbus.MatchOption) { _ = s.RemoveMatchSignal(options...) }(s, dbus.WithMatchObjectPath(prompt), dbus.WithMatchInterface(promptInterface)) promptSignal := make(chan *dbus.Signal, 1) s.Signal(promptSignal) err = s.Object(serviceName, prompt).Call(promptInterface+".Prompt", 0, "").Err if err != nil { return false, dbus.MakeVariant(""), err } signal := <-promptSignal switch signal.Name { case promptInterface + ".Completed": dismissed := signal.Body[0].(bool) result := signal.Body[1].(dbus.Variant) return dismissed, result, nil } } return false, dbus.MakeVariant(""), nil } // SearchItems returns a list of items matching the search object. func (s *SecretService) SearchItems(collection dbus.BusObject, search any) ([]dbus.ObjectPath, error) { var results []dbus.ObjectPath err := collection.Call(collectionInterface+".SearchItems", 0, search).Store(&results) if err != nil { return nil, err } return results, nil } // GetSecret gets secret from an item in a given session. func (s *SecretService) GetSecret(itemPath dbus.ObjectPath, session dbus.ObjectPath) (*Secret, error) { var secret Secret err := s.Object(serviceName, itemPath).Call(itemInterface+".GetSecret", 0, session).Store(&secret) if err != nil { return nil, err } return &secret, nil } // Delete deletes an item from the collection. func (s *SecretService) Delete(itemPath dbus.ObjectPath) error { var prompt dbus.ObjectPath err := s.Object(serviceName, itemPath).Call(itemInterface+".Delete", 0).Store(&prompt) if err != nil { return err } _, _, err = s.handlePrompt(prompt) if err != nil { return err } return nil } ================================================ FILE: modules/lfs/LICENSE ================================================ MIT License Copyright (c) 2014-2021 GitHub, Inc. and Git LFS contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Portions of the subprocess and tools directories are copied from Go and are under the following license: Copyright (c) 2010 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Note that Git LFS uses components from other Go modules (included in vendor/) which are under different licenses. See those LICENSE files for details. ================================================ FILE: modules/lfs/error.go ================================================ package lfs import ( "errors" "fmt" ) type notAPointerError struct { message string } func (e *notAPointerError) Error() string { return fmt.Sprintf("Pointer file error: %v", e.message) } func NewNotAPointerError(message string) error { return ¬APointerError{message: message} } func IsNewNotAPointerError(err error) bool { var e *notAPointerError return errors.As(err, &e) } type badPointerKeyError struct { message string } func (e *badPointerKeyError) Error() string { return fmt.Sprintf("bad LFS Pointer: %v", e.message) } func NewBadPointerKeyError(message string) error { return &badPointerKeyError{message: message} } func IsBadPointerKeyError(err error) bool { var e *badPointerKeyError return errors.As(err, &e) } ================================================ FILE: modules/lfs/pointer.go ================================================ package lfs import ( "bufio" "bytes" "crypto/sha256" "encoding/hex" "errors" "fmt" "io" "regexp" "slices" "sort" "strconv" "strings" "github.com/antgroup/hugescm/modules/git/gitobj" ) const ( // blobSizeCutoff is used to determine which files to scan for Git LFS // pointers. Any file with a size below this cutoff will be scanned. blobSizeCutoff = 1024 ) const ( PointerMIME = "text/vnd.git-lfs" ) var ( v1Aliases = []string{ "http://git-media.io/v/2", // alpha "https://hawser.github.com/spec/v1", // pre-release "https://git-lfs.github.com/spec/v1", // public launch } latest = "https://git-lfs.github.com/spec/v1" oidType = "sha256" oidRE = regexp.MustCompile(`\A[0-9a-f]{64}\z`) matcherRE = regexp.MustCompile("git-media|hawser|git-lfs") extRE = regexp.MustCompile(`\Aext-\d{1}-\w+`) pointerKeys = []string{"version", "oid", "size"} EmptyObjectSHA256 = hex.EncodeToString(sha256.New().Sum(nil)) ) type Pointer struct { Version string Oid string Size int64 OidType string Extensions []*PointerExtension Canonical bool } // A PointerExtension is parsed from the Git LFS Pointer file. type PointerExtension struct { Name string Priority int Oid string OidType string } type ByPriority []*PointerExtension func (p ByPriority) Len() int { return len(p) } func (p ByPriority) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p ByPriority) Less(i, j int) bool { return p[i].Priority < p[j].Priority } func NewPointer(oid string, size int64, exts []*PointerExtension) *Pointer { return &Pointer{latest, oid, size, oidType, exts, true} } func NewPointerExtension(name string, priority int, oid string) *PointerExtension { return &PointerExtension{name, priority, oid, oidType} } func (p *Pointer) Encode(writer io.Writer) (int, error) { return EncodePointer(writer, p) } func (p *Pointer) Encoded() string { if p.Size == 0 { return "" } var buffer bytes.Buffer fmt.Fprintf(&buffer, "version %s\n", latest) for _, ext := range p.Extensions { fmt.Fprintf(&buffer, "ext-%d-%s %s:%s\n", ext.Priority, ext.Name, ext.OidType, ext.Oid) } fmt.Fprintf(&buffer, "oid %s:%s\n", p.OidType, p.Oid) fmt.Fprintf(&buffer, "size %d\n", p.Size) return buffer.String() } func EmptyPointer() *Pointer { return NewPointer(EmptyObjectSHA256, 0, nil) } func EncodePointer(writer io.Writer, pointer *Pointer) (int, error) { return writer.Write([]byte(pointer.Encoded())) } func DecodePointerFromBlob(b *gitobj.Blob) (*Pointer, error) { // Check size before reading if b.Size >= blobSizeCutoff { return nil, NewNotAPointerError("blob size exceeds Git LFS pointer size cutoff") } return DecodePointer(b.Contents) } func DecodePointer(reader io.Reader) (*Pointer, error) { p, _, err := DecodeFrom(reader) return p, err } // DecodeFrom decodes an *lfs.Pointer from the given io.Reader, "reader". // If the pointer encoded in the reader could successfully be read and decoded, // it will be returned with a nil error. // // If the pointer could not be decoded, an io.Reader containing the entire // blob's data will be returned, along with a parse error. func DecodeFrom(reader io.Reader) (*Pointer, io.Reader, error) { buf := make([]byte, blobSizeCutoff) n, err := reader.Read(buf) buf = buf[:n] var contents io.Reader = bytes.NewReader(buf) if !errors.Is(err, io.EOF) { contents = io.MultiReader(contents, reader) } if err != nil && !errors.Is(err, io.EOF) { return nil, contents, err } if len(buf) == 0 { return EmptyPointer(), contents, nil } p, err := decodeKV(bytes.TrimSpace(buf)) if err == nil && p != nil { p.Canonical = p.Encoded() == string(buf) } return p, contents, err } func Decode(buf []byte) (*Pointer, error) { if len(buf) >= blobSizeCutoff { return nil, NewNotAPointerError("blob size exceeds Git LFS pointer size cutoff") } p, err := decodeKV(bytes.TrimSpace(buf)) if err == nil && p != nil { p.Canonical = p.Encoded() == string(buf) } return p, err } func verifyVersion(version string) error { if len(version) == 0 { return NewNotAPointerError("Missing version") } if slices.Contains(v1Aliases, version) { return nil } return fmt.Errorf("invalid version: %s", version) } func decodeKV(data []byte) (*Pointer, error) { kvps, exts, err := decodeKVData(data) if err != nil { if IsBadPointerKeyError(err) { return nil, NewNotAPointerError(err.Error()) } return nil, err } if err := verifyVersion(kvps["version"]); err != nil { return nil, err } value, ok := kvps["oid"] if !ok { return nil, errors.New("invalid OID") } oid, err := parseOid(value) if err != nil { return nil, err } value = kvps["size"] size, err := strconv.ParseInt(value, 10, 64) if err != nil || size < 0 { return nil, fmt.Errorf("invalid size: %q", value) } var extensions []*PointerExtension if exts != nil { for key, value := range exts { ext, err := parsePointerExtension(key, value) if err != nil { return nil, err } extensions = append(extensions, ext) } if err = validatePointerExtensions(extensions); err != nil { return nil, err } sort.Sort(ByPriority(extensions)) } return NewPointer(oid, size, extensions), nil } func parseOid(value string) (string, error) { parts := strings.SplitN(value, ":", 2) if len(parts) != 2 { return "", fmt.Errorf("invalid OID value: %s", value) } if parts[0] != oidType { return "", fmt.Errorf("invalid OID type: %s", parts[0]) } oid := parts[1] if !oidRE.Match([]byte(oid)) { return "", fmt.Errorf("invalid OID: %s", oid) } return oid, nil } func parsePointerExtension(key string, value string) (*PointerExtension, error) { keyParts := strings.SplitN(key, "-", 3) if len(keyParts) != 3 || keyParts[0] != "ext" { return nil, fmt.Errorf("invalid extension value: %s", value) } p, err := strconv.Atoi(keyParts[1]) if err != nil || p < 0 { return nil, fmt.Errorf("invalid priority: %s", keyParts[1]) } name := keyParts[2] oid, err := parseOid(value) if err != nil { return nil, err } return NewPointerExtension(name, p, oid), nil } func validatePointerExtensions(exts []*PointerExtension) error { m := make(map[int]struct{}) for _, ext := range exts { if _, exist := m[ext.Priority]; exist { return fmt.Errorf("duplicate priority found: %d", ext.Priority) } m[ext.Priority] = struct{}{} } return nil } func decodeKVData(data []byte) (kvps map[string]string, exts map[string]string, err error) { kvps = make(map[string]string) if !matcherRE.Match(data) { err = NewNotAPointerError("invalid header") return } scanner := bufio.NewScanner(bytes.NewBuffer(data)) line := 0 numKeys := len(pointerKeys) for scanner.Scan() { text := scanner.Text() if len(text) == 0 { continue } parts := strings.SplitN(text, " ", 2) if len(parts) < 2 { err = NewNotAPointerError(fmt.Sprintf("error reading line %d: %s", line, text)) return } key := parts[0] value := parts[1] if numKeys <= line { err = NewNotAPointerError(fmt.Sprintf("extra line: %s", text)) return } if expected := pointerKeys[line]; key != expected { if !extRE.Match([]byte(key)) { err = NewBadPointerKeyError(fmt.Sprintf("got %s want: %s", expected, key)) return } if exts == nil { exts = make(map[string]string) } exts[key] = value continue } line += 1 kvps[key] = value } err = scanner.Err() return } func EncodeSimple(oid string, size int64) string { p := &Pointer{Oid: oid, Size: size, OidType: oidType} return p.Encoded() } ================================================ FILE: modules/lfs/pointer_test.go ================================================ package lfs import ( "bufio" "bytes" "errors" "io" "strings" "testing" ) func assertLine(t *testing.T, r *bufio.Reader, expected string) { actual, err := r.ReadString('\n') if err != nil { t.Errorf("Expected nil, got %v", err) } if expected != actual { t.Errorf("Expected %v, got %v", expected, actual) } } func TestEncode(t *testing.T) { var buf bytes.Buffer pointer := NewPointer("booya", 12345, nil) _, err := EncodePointer(&buf, pointer) if err != nil { t.Errorf("Expected nil, got %v", err) } bufReader := bufio.NewReader(&buf) assertLine(t, bufReader, "version https://git-lfs.github.com/spec/v1\n") assertLine(t, bufReader, "oid sha256:booya\n") assertLine(t, bufReader, "size 12345\n") line, err := bufReader.ReadString('\n') if err == nil { t.Fatalf("More to read: %s", line) } if !errors.Is(err, io.EOF) { t.Fatalf("Expected %v, got %v", io.EOF, err) } } func TestEncodeEmpty(t *testing.T) { var buf bytes.Buffer pointer := NewPointer("", 0, nil) _, err := EncodePointer(&buf, pointer) if nil != err { t.Errorf("Expected %v, got %v", nil, err) } bufReader := bufio.NewReader(&buf) val, err := bufReader.ReadString('\n') if val != "" { t.Errorf("Expected %v, got %v", "", val) } if !errors.Is(err, io.EOF) { t.Errorf("Expected %v, got %v", io.EOF, err) } } func TestEncodeExtensions(t *testing.T) { var buf bytes.Buffer exts := []*PointerExtension{ NewPointerExtension("foo", 0, "foo_oid"), NewPointerExtension("bar", 1, "bar_oid"), NewPointerExtension("baz", 2, "baz_oid"), } pointer := NewPointer("main_oid", 12345, exts) _, err := EncodePointer(&buf, pointer) if err != nil { t.Errorf("Expected nil, got %v", err) } bufReader := bufio.NewReader(&buf) assertLine(t, bufReader, "version https://git-lfs.github.com/spec/v1\n") assertLine(t, bufReader, "ext-0-foo sha256:foo_oid\n") assertLine(t, bufReader, "ext-1-bar sha256:bar_oid\n") assertLine(t, bufReader, "ext-2-baz sha256:baz_oid\n") assertLine(t, bufReader, "oid sha256:main_oid\n") assertLine(t, bufReader, "size 12345\n") line, err := bufReader.ReadString('\n') if err == nil { t.Fatalf("More to read: %s", line) } if !errors.Is(err, io.EOF) { t.Errorf("Expected %v, got %v", io.EOF, err) } } func TestDecode(t *testing.T) { ex := `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345` p, err := DecodePointer(bytes.NewBufferString(ex)) assertEqualWithExample(t, ex, nil, err) assertEqualWithExample(t, ex, latest, p.Version) assertEqualWithExample(t, ex, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) assertEqualWithExample(t, ex, "sha256", p.OidType) assertEqualWithExample(t, ex, int64(12345), p.Size) } func TestDecodeExtensions(t *testing.T) { ex := `version https://git-lfs.github.com/spec/v1 ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345` p, err := DecodePointer(bytes.NewBufferString(ex)) assertEqualWithExample(t, ex, nil, err) assertEqualWithExample(t, ex, latest, p.Version) assertEqualWithExample(t, ex, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) assertEqualWithExample(t, ex, int64(12345), p.Size) assertEqualWithExample(t, ex, "sha256", p.OidType) assertEqualWithExample(t, ex, "foo", p.Extensions[0].Name) assertEqualWithExample(t, ex, 0, p.Extensions[0].Priority) assertEqualWithExample(t, ex, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", p.Extensions[0].Oid) assertEqualWithExample(t, ex, "sha256", p.Extensions[0].OidType) assertEqualWithExample(t, ex, "bar", p.Extensions[1].Name) assertEqualWithExample(t, ex, 1, p.Extensions[1].Priority) assertEqualWithExample(t, ex, "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", p.Extensions[1].Oid) assertEqualWithExample(t, ex, "sha256", p.Extensions[1].OidType) assertEqualWithExample(t, ex, "baz", p.Extensions[2].Name) assertEqualWithExample(t, ex, 2, p.Extensions[2].Priority) assertEqualWithExample(t, ex, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", p.Extensions[2].Oid) assertEqualWithExample(t, ex, "sha256", p.Extensions[2].OidType) } func TestDecodeExtensionsSort(t *testing.T) { ex := `version https://git-lfs.github.com/spec/v1 ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345` p, err := DecodePointer(bytes.NewBufferString(ex)) assertEqualWithExample(t, ex, nil, err) assertEqualWithExample(t, ex, latest, p.Version) assertEqualWithExample(t, ex, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) assertEqualWithExample(t, ex, int64(12345), p.Size) assertEqualWithExample(t, ex, "sha256", p.OidType) assertEqualWithExample(t, ex, "foo", p.Extensions[0].Name) assertEqualWithExample(t, ex, 0, p.Extensions[0].Priority) assertEqualWithExample(t, ex, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", p.Extensions[0].Oid) assertEqualWithExample(t, ex, "sha256", p.Extensions[0].OidType) assertEqualWithExample(t, ex, "bar", p.Extensions[1].Name) assertEqualWithExample(t, ex, 1, p.Extensions[1].Priority) assertEqualWithExample(t, ex, "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", p.Extensions[1].Oid) assertEqualWithExample(t, ex, "sha256", p.Extensions[1].OidType) assertEqualWithExample(t, ex, "baz", p.Extensions[2].Name) assertEqualWithExample(t, ex, 2, p.Extensions[2].Priority) assertEqualWithExample(t, ex, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", p.Extensions[2].Oid) assertEqualWithExample(t, ex, "sha256", p.Extensions[2].OidType) } func TestDecodePreRelease(t *testing.T) { ex := `version https://hawser.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345` p, err := DecodePointer(bytes.NewBufferString(ex)) assertEqualWithExample(t, ex, nil, err) assertEqualWithExample(t, ex, latest, p.Version) assertEqualWithExample(t, ex, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) assertEqualWithExample(t, ex, "sha256", p.OidType) assertEqualWithExample(t, ex, int64(12345), p.Size) } func TestDecodeFromEmptyReader(t *testing.T) { p, buf, err := DecodeFrom(strings.NewReader("")) by, _ := io.ReadAll(buf) if err != nil { t.Errorf("Expected nil, got %v", err) } if p.Oid != "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" { t.Errorf("Expected %v, got %v", p.Oid, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") } if p.Size != int64(0) { t.Errorf("Expected %v, got %v", p.Size, int64(0)) } if len(by) != 0 { t.Errorf("Expected empty") } } func TestDecodeCanonical(t *testing.T) { canonicalExamples := []string{ // standard `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345 `, // extensions `version https://git-lfs.github.com/spec/v1 ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345 `, // empty file "", } nonCanonicalExamples := []string{ // missing trailing newline `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // carriage returns "version https://git-lfs.github.com/spec/v1\r\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\r\nsize 12345\r\n", // trailing whitespace "version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 12345 \n", // unsorted extensions `version https://git-lfs.github.com/spec/v1 ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345 `, } for _, ex := range canonicalExamples { p, err := DecodePointer(bytes.NewBufferString(ex)) if err != nil { t.Errorf("Error decoding: %v", err) } if p.Canonical != true { t.Errorf("Expected %v, got %v", p.Canonical, true) } } for _, ex := range nonCanonicalExamples { p, err := DecodePointer(bytes.NewBufferString(ex)) if err != nil { t.Errorf("Error decoding: %v", err) } if p.Canonical != false { t.Errorf("Expected %v, got %v", p.Canonical, false) } } } func TestDecodeInvalid(t *testing.T) { examples := []string{ "invalid stuff", // no sha "# git-media", // bad oid `version https://git-lfs.github.com/spec/v1 oid sha256:boom size 12345`, // bad oid type `version https://git-lfs.github.com/spec/v1 oid shazam:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // no oid `version https://git-lfs.github.com/spec/v1 size 12345`, // bad version `version http://git-media.io/v/whatever oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // no version `oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // bad size `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size fif`, // no size `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393`, // bad `key value` format `version=https://git-lfs.github.com/spec/v1 oid=sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size=fif`, // no git-media `version=http://wat.io/v/2 oid=sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size=fif`, // extra key `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345 wat wat`, // keys out of order `version https://git-lfs.github.com/spec/v1 size 12345 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393`, // bad ext name `version https://git-lfs.github.com/spec/v1 ext-0-$$$$ sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // bad ext priority `version https://git-lfs.github.com/spec/v1 ext-#-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // duplicate ext priority `version https://git-lfs.github.com/spec/v1 ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff ext-0-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // ext priority over 9 `version https://git-lfs.github.com/spec/v1 ext-10-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // bad ext oid `version https://git-lfs.github.com/spec/v1 ext-0-foo sha256:boom oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // bad ext oid type `version https://git-lfs.github.com/spec/v1 ext-0-foo boom:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393 size 12345`, // bad OID `version https://git-lfs.github.com/spec/v1 oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393& size 177735`, } for _, ex := range examples { p, err := DecodePointer(bytes.NewBufferString(ex)) if err == nil { t.Errorf("No error decoding: %v\nFrom:\n%s", p, strings.TrimSpace(ex)) } } } func assertEqualWithExample(t *testing.T, example string, expected, actual any) { if expected != actual { t.Errorf("Expected %v, got %v\nExample:\n%s", expected, actual, strings.TrimSpace(example)) } } ================================================ FILE: modules/locale/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: modules/locale/README.md ================================================ Port from: https://github.com/Xuanwo/go-locale ================================================ FILE: modules/locale/error.go ================================================ package locale import ( "errors" ) var ( // ErrNotDetected returns while no locale detected. ErrNotDetected = errors.New("not detected") // ErrNotSupported means current platform or language is not supported. ErrNotSupported = errors.New("not supported") ) // Error is the error returned by locale. type Error struct { Op string Err error } func (e *Error) Error() string { return e.Op + ": " + e.Err.Error() } // Unwrap implements xerrors.Wrapper func (e *Error) Unwrap() error { return e.Err } ================================================ FILE: modules/locale/locale.go ================================================ package locale import ( "errors" "golang.org/x/text/language" ) // Detect will detect current env's language. func Detect() (tag language.Tag, err error) { lang, err := detect() if err != nil { return language.Und, err } return language.Make(lang[0]), nil } // DetectAll will detect current env's all available language. func DetectAll() (tags []language.Tag, err error) { lang, err := detect() if err != nil { return } tags = make([]language.Tag, 0, len(lang)) for _, v := range lang { tags = append(tags, language.Make(v)) } return } type detector func() ([]string, error) func detect() (lang []string, err error) { for _, fn := range detectors { lang, err = fn() if err != nil && errors.Is(err, ErrNotDetected) { continue } if err != nil { return } return } return nil, &Error{"detect", ErrNotDetected} } ================================================ FILE: modules/locale/locale_darwin.go ================================================ //go:build darwin package locale import ( "bufio" "bytes" "os/exec" "strings" ) var detectors = []detector{ detectViaEnvLanguage, detectViaEnvLc, detectViaDefaultsSystem, } // detectViaUserDefaultsSystem will detect language via Apple User Defaults System // // We will read AppleLocale and AppleLanguages in this order: // - user AppleLocale // - user AppleLanguages // - global AppleLocale // - global AppleLanguages // // ref: // - Apple Developer Guide: https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/UserDefaults/AboutPreferenceDomains/AboutPreferenceDomains.html // - Homebrew: https://github.com/Homebrew/brew/pull/7940 func detectViaDefaultsSystem() ([]string, error) { // Read user's apple locale setting. m, err := parseDefaultsSystemAppleLocale("-g") if err == nil { return m, nil } // Read user's apple languages setting. m, err = parseDefaultsSystemAppleLanguages("-g") if err == nil { return m, nil } // Read global locale preferences. m, err = parseDefaultsSystemAppleLocale("/Library/Preferences/.GlobalPreferences") if err == nil { return m, nil } // Read global language preferences. m, err = parseDefaultsSystemAppleLanguages("/Library/Preferences/.GlobalPreferences") if err == nil { return m, nil } return nil, &Error{"detect via defaults system", ErrNotDetected} } // parseDefaultsSystemAppleLocale will parse the AppleLocale output. func parseDefaultsSystemAppleLocale(domain string) ([]string, error) { cmd := exec.Command("defaults", "read", domain, "AppleLocale") var out bytes.Buffer cmd.Stdout = &out err := cmd.Run() if err != nil { return nil, &Error{"detect via user defaults system", err} } content := strings.TrimSpace(out.String()) if len(content) == 0 { return nil, &Error{"detect via defaults system", ErrNotDetected} } return []string{content}, nil } // parseDefaultsSystemAppleLanguages will parse the AppleLanguages output. // // Output should be like: // // ( // // en, // ja, // fr, // de, // es, // it, // pt, // "pt-PT", // nl, // sv, // nb, // da, // fi, // ru, // pl, // "zh-Hans", // "zh-Hant", // ko, // ar, // cs, // hu, // tr // // ) func parseDefaultsSystemAppleLanguages(domain string) ([]string, error) { cmd := exec.Command("defaults", "read", domain, "AppleLanguages") var out bytes.Buffer cmd.Stdout = &out err := cmd.Run() if err != nil { return nil, &Error{"detect via user defaults system", err} } m := make([]string, 0) s := bufio.NewScanner(&out) for s.Scan() { text := s.Text() // Ignore "(" and ")" if !strings.HasPrefix(text, " ") { continue } // Trim all space, " and , text = strings.Trim(text, " \",") // Doing canonicalize if value, ok := oldAppleLocaleToCanonical[text]; ok { text = value } m = append(m, text) } if len(m) == 0 { return nil, &Error{"detect via user defaults system", ErrNotDetected} } return m, nil } // oldAppleLocaleToCanonical is borrowed from swift-corelibs-foundation's CFLocaleIdentifier.c // // Old Apple devices could return "English" instead of "en-US", this map will make them canonical // // refs: // - CFLocaleIdentifier.c: https://github.com/apple/swift-corelibs-foundation/blob/main/CoreFoundation/Locale.subproj/CFLocaleIdentifier.c var oldAppleLocaleToCanonical = map[string]string{ "Afrikaans": "af", // # __CFBundleLanguageNamesArray "Albanian": "sq", // # __CFBundleLanguageNamesArray "Amharic": "am", // # __CFBundleLanguageNamesArray "Arabic": "ar", // # __CFBundleLanguageNamesArray "Armenian": "hy", // # __CFBundleLanguageNamesArray "Assamese": "as", // # __CFBundleLanguageNamesArray "Aymara": "ay", // # __CFBundleLanguageNamesArray "Azerbaijani": "az", // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") "Basque": "eu", // # __CFBundleLanguageNamesArray "Belarusian": "be", // # handle other names "Belorussian": "be", // # handle other names "Bengali": "bn", // # __CFBundleLanguageNamesArray "Brazilian Portugese": "pt-BR", // # from Installer.app Info.plist IFLanguages key, misspelled "Brazilian Portuguese": "pt-BR", // # correct spelling for above "Breton": "br", // # __CFBundleLanguageNamesArray "Bulgarian": "bg", // # __CFBundleLanguageNamesArray "Burmese": "my", // # __CFBundleLanguageNamesArray "Byelorussian": "be", // # __CFBundleLanguageNamesArray "Catalan": "ca", // # __CFBundleLanguageNamesArray "Chewa": "ny", // # handle other names "Chichewa": "ny", // # handle other names "Chinese": "zh", // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") "Chinese, Simplified": "zh-Hans", // # from Installer.app Info.plist IFLanguages key "Chinese, Traditional": "zh-Hant", // # correct spelling for below "Chinese, Tradtional": "zh-Hant", // # from Installer.app Info.plist IFLanguages key, misspelled "Croatian": "hr", // # __CFBundleLanguageNamesArray "Czech": "cs", // # __CFBundleLanguageNamesArray "Danish": "da", // # __CFBundleLanguageNamesArray "Dutch": "nl", // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") "Dzongkha": "dz", // # __CFBundleLanguageNamesArray "English": "en", // # __CFBundleLanguageNamesArray "Esperanto": "eo", // # __CFBundleLanguageNamesArray "Estonian": "et", // # __CFBundleLanguageNamesArray "Faroese": "fo", // # __CFBundleLanguageNamesArray "Farsi": "fa", // # __CFBundleLanguageNamesArray "Finnish": "fi", // # __CFBundleLanguageNamesArray "Flemish": "nl-BE", // # handle other names "French": "fr", // # __CFBundleLanguageNamesArray "Galician": "gl", // # __CFBundleLanguageNamesArray "Gallegan": "gl", // # handle other names "Georgian": "ka", // # __CFBundleLanguageNamesArray "German": "de", // # __CFBundleLanguageNamesArray "Greek": "el", // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") "Greenlandic": "kl", // # __CFBundleLanguageNamesArray "Guarani": "gn", // # __CFBundleLanguageNamesArray "Gujarati": "gu", // # __CFBundleLanguageNamesArray "Hawaiian": "haw", // # handle new languages "Hebrew": "he", // # __CFBundleLanguageNamesArray "Hindi": "hi", // # __CFBundleLanguageNamesArray "Hungarian": "hu", // # __CFBundleLanguageNamesArray "Icelandic": "is", // # __CFBundleLanguageNamesArray "Indonesian": "id", // # __CFBundleLanguageNamesArray "Inuktitut": "iu", // # __CFBundleLanguageNamesArray "Irish": "ga", // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") "Italian": "it", // # __CFBundleLanguageNamesArray "Japanese": "ja", // # __CFBundleLanguageNamesArray "Javanese": "jv", // # __CFBundleLanguageNamesArray "Kalaallisut": "kl", // # handle other names "Kannada": "kn", // # __CFBundleLanguageNamesArray "Kashmiri": "ks", // # __CFBundleLanguageNamesArray "Kazakh": "kk", // # __CFBundleLanguageNamesArray "Khmer": "km", // # __CFBundleLanguageNamesArray "Kinyarwanda": "rw", // # __CFBundleLanguageNamesArray "Kirghiz": "ky", // # __CFBundleLanguageNamesArray "Korean": "ko", // # __CFBundleLanguageNamesArray "Kurdish": "ku", // # __CFBundleLanguageNamesArray "Lao": "lo", // # __CFBundleLanguageNamesArray "Latin": "la", // # __CFBundleLanguageNamesArray "Latvian": "lv", // # __CFBundleLanguageNamesArray "Lithuanian": "lt", // # __CFBundleLanguageNamesArray "Macedonian": "mk", // # __CFBundleLanguageNamesArray "Malagasy": "mg", // # __CFBundleLanguageNamesArray "Malay": "ms", // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") "Malayalam": "ml", // # __CFBundleLanguageNamesArray "Maltese": "mt", // # __CFBundleLanguageNamesArray "Manx": "gv", // # __CFBundleLanguageNamesArray "Marathi": "mr", // # __CFBundleLanguageNamesArray "Moldavian": "mo", // # __CFBundleLanguageNamesArray "Mongolian": "mn", // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") "Nepali": "ne", // # __CFBundleLanguageNamesArray "Norwegian": "nb", // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") "Nyanja": "ny", // # __CFBundleLanguageNamesArray "Nynorsk": "nn", // # handle other names (no entry in __CFBundleLanguageNamesArray) "Oriya": "or", // # __CFBundleLanguageNamesArray "Oromo": "om", // # __CFBundleLanguageNamesArray "Panjabi": "pa", // # handle other names "Pashto": "ps", // # __CFBundleLanguageNamesArray "Persian": "fa", // # handle other names "Polish": "pl", // # __CFBundleLanguageNamesArray "Portuguese": "pt", // # __CFBundleLanguageNamesArray "Portuguese, Brazilian": "pt-BR", // # handle other names "Punjabi": "pa", // # __CFBundleLanguageNamesArray "Pushto": "ps", // # handle other names "Quechua": "qu", // # __CFBundleLanguageNamesArray "Romanian": "ro", // # __CFBundleLanguageNamesArray "Ruanda": "rw", // # handle other names "Rundi": "rn", // # __CFBundleLanguageNamesArray "Russian": "ru", // # __CFBundleLanguageNamesArray "Sami": "se", // # __CFBundleLanguageNamesArray "Sanskrit": "sa", // # __CFBundleLanguageNamesArray "Scottish": "gd", // # __CFBundleLanguageNamesArray "Serbian": "sr", // # __CFBundleLanguageNamesArray "Simplified Chinese": "zh-Hans", // # handle other names "Sindhi": "sd", // # __CFBundleLanguageNamesArray "Sinhalese": "si", // # __CFBundleLanguageNamesArray "Slovak": "sk", // # __CFBundleLanguageNamesArray "Slovenian": "sl", // # __CFBundleLanguageNamesArray "Somali": "so", // # __CFBundleLanguageNamesArray "Spanish": "es", // # __CFBundleLanguageNamesArray "Sundanese": "su", // # __CFBundleLanguageNamesArray "Swahili": "sw", // # __CFBundleLanguageNamesArray "Swedish": "sv", // # __CFBundleLanguageNamesArray "Tagalog": "fil", // # __CFBundleLanguageNamesArray "Tajik": "tg", // # handle other names "Tajiki": "tg", // # __CFBundleLanguageNamesArray "Tamil": "ta", // # __CFBundleLanguageNamesArray "Tatar": "tt", // # __CFBundleLanguageNamesArray "Telugu": "te", // # __CFBundleLanguageNamesArray "Thai": "th", // # __CFBundleLanguageNamesArray "Tibetan": "bo", // # __CFBundleLanguageNamesArray "Tigrinya": "ti", // # __CFBundleLanguageNamesArray "Tongan": "to", // # __CFBundleLanguageNamesArray "Traditional Chinese": "zh-Hant", // # handle other names "Turkish": "tr", // # __CFBundleLanguageNamesArray "Turkmen": "tk", // # __CFBundleLanguageNamesArray "Uighur": "ug", // # __CFBundleLanguageNamesArray "Ukrainian": "uk", // # __CFBundleLanguageNamesArray "Urdu": "ur", // # __CFBundleLanguageNamesArray "Uzbek": "uz", // # __CFBundleLanguageNamesArray "Vietnamese": "vi", // # __CFBundleLanguageNamesArray "Welsh": "cy", // # __CFBundleLanguageNamesArray "Yiddish": "yi", // # __CFBundleLanguageNamesArray "ar_??": "ar", // # from old MapScriptInfoAndISOCodes "az.Ar": "az-Arab", // # from old LocaleRefGetPartString "az.Cy": "az-Cyrl", // # from old LocaleRefGetPartString "az.La": "az", // # from old LocaleRefGetPartString "be_??": "be_BY", // # from old MapScriptInfoAndISOCodes "bn_??": "bn", // # from old LocaleRefGetPartString "bo_??": "bo", // # from old MapScriptInfoAndISOCodes "br_??": "br", // # from old MapScriptInfoAndISOCodes "cy_??": "cy", // # from old MapScriptInfoAndISOCodes "de-96": "de-1996", // # from old MapScriptInfoAndISOCodes // <1.9> "de_96": "de-1996", // # from old MapScriptInfoAndISOCodes // <1.9> "de_??": "de-1996", // # from old MapScriptInfoAndISOCodes "el.El-P": "grc", // # from old LocaleRefGetPartString "en-ascii": "en_001", // # from earlier version of tables in this file! "en_??": "en_001", // # from old MapScriptInfoAndISOCodes "eo_??": "eo", // # from old MapScriptInfoAndISOCodes "es_??": "es_419", // # from old MapScriptInfoAndISOCodes "es_XL": "es_419", // # from earlier version of tables in this file! "fr_??": "fr_001", // # from old MapScriptInfoAndISOCodes "ga-dots": "ga-Latg", // # from earlier version of tables in this file! // <1.8> "ga-dots_IE": "ga-Latg_IE", // # from earlier version of tables in this file! // <1.8> "ga.Lg": "ga-Latg", // # from old LocaleRefGetPartString // <1.8> "ga.Lg_IE": "ga-Latg_IE", // # from old LocaleRefGetPartString // <1.8> "gd_??": "gd", // # from old MapScriptInfoAndISOCodes "gv_??": "gv", // # from old MapScriptInfoAndISOCodes "jv.La": "jv", // # logical extension // <1.9> "jw.La": "jv", // # from old LocaleRefGetPartString "kk.Cy": "kk", // # from old LocaleRefGetPartString "kl.La": "kl", // # from old LocaleRefGetPartString "kl.La_GL": "kl_GL", // # from old LocaleRefGetPartString // <1.9> "lp_??": "se", // # from old MapScriptInfoAndISOCodes "mk_??": "mk_MK", // # from old MapScriptInfoAndISOCodes "mn.Cy": "mn", // # from old LocaleRefGetPartString "mn.Mn": "mn-Mong", // # from old LocaleRefGetPartString "ms.Ar": "ms-Arab", // # from old LocaleRefGetPartString "ms.La": "ms", // # from old LocaleRefGetPartString "nl-be": "nl-BE", // # from old LocaleRefGetPartString "nl-be_BE": "nl_BE", // # from old LocaleRefGetPartString "no-NO": "nb-NO", // # not handled by localeStringPrefixToCanonical "no-NO_NO": "nb-NO_NO", // # not handled by localeStringPrefixToCanonical "pa_??": "pa", // # from old LocaleRefGetPartString "sa.Dv": "sa", // # from old LocaleRefGetPartString "sl_??": "sl_SI", // # from old MapScriptInfoAndISOCodes "sr_??": "sr_RS", // # from old MapScriptInfoAndISOCodes // <1.18> "su.La": "su", // # from old LocaleRefGetPartString "yi.He": "yi", // # from old LocaleRefGetPartString "zh-simp": "zh-Hans", // # from earlier version of tables in this file! "zh-trad": "zh-Hant", // # from earlier version of tables in this file! "zh.Ha-S": "zh-Hans", // # from old LocaleRefGetPartString "zh.Ha-S_CN": "zh_CN", // # from old LocaleRefGetPartString "zh.Ha-T": "zh-Hant", // # from old LocaleRefGetPartString "zh.Ha-T_TW": "zh_TW", // # from old LocaleRefGetPartString } ================================================ FILE: modules/locale/locale_js.go ================================================ //go:build ignore package locale var detectors = []detector{ detectViaEnvLanguage, detectViaEnvLc, } ================================================ FILE: modules/locale/locale_posix.go ================================================ //go:build aix || dragonfly || freebsd || hurd || illumos || linux || nacl || netbsd || openbsd || plan9 || solaris || zos package locale import ( "bufio" "os" "path" "strings" ) var detectors = []detector{ detectViaEnvLanguage, detectViaEnvLc, detectViaLocaleConf, } func detectViaLocaleConf() (_ []string, err error) { defer func() { if err != nil { err = &Error{"detect via locale conf", err} } }() fp := getLocaleConfPath() if fp == "" { return nil, ErrNotDetected } f, err := os.Open(fp) if err != nil { return nil, err } // Output should be like: // // LANG=en_US.UTF-8 // LC_CTYPE="en_US.UTF-8" // LC_NUMERIC="en_US.UTF-8" // LC_TIME="en_US.UTF-8" // LC_COLLATE="en_US.UTF-8" // LC_MONETARY="en_US.UTF-8" // LC_MESSAGES= // LC_PAPER="en_US.UTF-8" // LC_NAME="en_US.UTF-8" // LC_ADDRESS="en_US.UTF-8" // LC_TELEPHONE="en_US.UTF-8" // LC_MEASUREMENT="en_US.UTF-8" // LC_IDENTIFICATION="en_US.UTF-8" // LC_ALL= m := make(map[string]string) s := bufio.NewScanner(f) for s.Scan() { value := strings.Split(s.Text(), "=") // Ignore not set locale value. if len(value) != 2 || value[1] == "" { continue } m[value[0]] = strings.Trim(value[1], "\"") } for _, v := range envs { x, ok := m[v] if ok { return []string{parseEnvLc(x)}, nil } } return nil, ErrNotDetected } // getLocaleConfPath will try to get correct locale conf path. // // Following path could be returned: // - "$XDG_CONFIG_HOME/locale.conf" (follow XDG Base Directory specification) // - "$HOME/.config/locale.conf" (user level locale config) // - "/etc/locale.conf" (system level locale config) // - "" (empty means no valid path found, caller need to handle this.) // // ref: // - POSIX Locale: https://pubs.opengroup.org/onlinepubs/9699919799/ // - XDG Base Directory: https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html func getLocaleConfPath() string { // Try to loading from $XDG_CONFIG_HOME/locale.conf xdg, ok := os.LookupEnv("XDG_CONFIG_HOME") if ok { fp := path.Join(xdg, "locale.conf") _, err := os.Stat(fp) if err == nil { return fp } } // Try to loading from $HOME/.config/locale.conf home, ok := os.LookupEnv("HOME") if ok { fp := path.Join(home, ".config", "locale.conf") _, err := os.Stat(fp) if err == nil { return fp } } // Try to loading from /etc/locale.conf fp := "/etc/locale.conf" _, err := os.Stat(fp) if err == nil { return fp } return "" } ================================================ FILE: modules/locale/locale_shared.go ================================================ package locale import ( "os" "strings" ) // Unless we call LookupEnv more than 9 times, we should not use Environ. // // goos: linux // goarch: amd64 // pkg: github.com/Xuanwo/go-locale // BenchmarkLookupEnv // BenchmarkLookupEnv-8 37024654 32.4 ns/op // BenchmarkEnviron // BenchmarkEnviron-8 4275735 281 ns/op // PASS // envs is the env to be checked. // // LC_ALL will overwrite all LC_* options. // FIXME: LC_ALL=C should overwrite $LANGUAGE env // // LC_MESSAGES is the config for messages. // FIXME: LC_MESSAGES=C should overwrite $LANGUAGE env // // LANG is the default locale. var envs = []string{"LC_ALL", "LC_MESSAGES", "LANG"} // detectViaEnvLanguage checks env LANGUAGE // // Program use gettext will respect LANGUAGE env func detectViaEnvLanguage() ([]string, error) { s, ok := os.LookupEnv("LANGUAGE") if !ok || s == "" { return nil, &Error{"detect via env language", ErrNotDetected} } return parseEnvLanguage(s), nil } // detectViaEnvLc checks LC_* in order which decided by // unix convention // // ref: // - http://man7.org/linux/man-pages/man7/locale.7.html // - https://linux.die.net/man/3/gettext // - https://wiki.archlinux.org/index.php/Locale func detectViaEnvLc() ([]string, error) { for _, v := range envs { s, ok := os.LookupEnv(v) if ok && s != "" { return []string{parseEnvLc(s)}, nil } } return nil, &Error{"detect via env lc", ErrNotDetected} } // parseEnvLanguage will parse LANGUAGE env. // Input could be: "en_AU:en_GB:en" func parseEnvLanguage(s string) []string { return strings.Split(s, ":") } // parseEnvLc will parse LC_* env. // Input could be: "en_US.UTF-8" func parseEnvLc(s string) string { x := strings.Split(s, ".") // "C" means "ANSI-C" and "POSIX", if locale set to C, we can simple // set returned language to "en_US" if x[0] == "C" { return "en_US" } return x[0] } ================================================ FILE: modules/locale/locale_windows.go ================================================ //go:build windows package locale import ( "golang.org/x/sys/windows/registry" ) var detectors = []detector{ detectViaEnvLanguage, detectViaEnvLc, detectViaRegistry, } // detectViaRegistry will detect language via Windows Registry // // ref: https://renenyffenegger.ch/notes/Windows/registry/tree/HKEY_CURRENT_USER/Control-Panel/International/index func detectViaRegistry() (langs []string, err error) { defer func() { if err != nil { err = &Error{"detect via registry", err} } }() key, err := registry.OpenKey(registry.CURRENT_USER, `Control Panel\International`, registry.QUERY_VALUE) if err != nil { return nil, err } defer key.Close() // nolint lang, _, err := key.GetStringValue("LocaleName") if err != nil { return nil, err } return []string{lang}, nil } ================================================ FILE: modules/merkletrie/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2018 Sourced Technologies, S.L. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: modules/merkletrie/change.go ================================================ package merkletrie import ( "context" "errors" "fmt" "io" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) var ( ErrEmptyFileName = errors.New("empty filename in tree entry") ) // Action values represent the kind of things a Change can represent: // insertion, deletions or modifications of files. type Action int // The set of possible actions in a change. const ( _ Action = iota Insert Delete Modify ) func (a Action) Byte() byte { switch a { case Insert: return 'A' case Delete: return 'D' case Modify: return 'M' } return ' ' } // String returns the action as a human readable text. func (a Action) String() string { switch a { case Insert: return "Insert" case Delete: return "Delete" case Modify: return "Modify" default: panic(fmt.Sprintf("unsupported action: %d", a)) } } // A Change value represent how a noder has change between to merkletrie. type Change struct { // The noder before the change or nil if it was inserted. From noder.Path // The noder after the change or nil if it was deleted. To noder.Path } // Action is convenience method that returns what Action c represents. func (c *Change) Action() (Action, error) { if c.From == nil && c.To == nil { return Action(0), errors.New("malformed change: nil from and to") } if c.From == nil { return Insert, nil } if c.To == nil { return Delete, nil } return Modify, nil } // NewInsert returns a new Change representing the insertion of n. func NewInsert(n noder.Path) Change { return Change{To: n} } // NewDelete returns a new Change representing the deletion of n. func NewDelete(n noder.Path) Change { return Change{From: n} } // NewModify returns a new Change representing that a has been modified and // it is now b. func NewModify(a, b noder.Path) Change { return Change{ From: a, To: b, } } // String returns a single change in human readable form, using the // format: '<' + action + space + path + '>'. The contents of the file // before or after the change are not included in this format. // // Example: inserting a file at the path a/b/c.txt will return "". func (c Change) String() string { action, err := c.Action() if err != nil { panic(err) } var path string if action == Delete { path = c.From.String() } else { path = c.To.String() } return fmt.Sprintf("<%s %s>", action, path) } // Changes is a list of changes between to merkletries. type Changes []Change // NewChanges returns an empty list of changes. func NewChanges() Changes { return Changes{} } // Add adds the change c to the list of changes. func (l *Changes) Add(c Change) { *l = append(*l, c) } // AddRecursiveInsert adds the required changes to insert all the // file-like noders found in root, recursively. func (l *Changes) AddRecursiveInsert(ctx context.Context, root noder.Path) error { return l.addRecursive(ctx, root, NewInsert) } // AddRecursiveDelete adds the required changes to delete all the // file-like noders found in root, recursively. func (l *Changes) AddRecursiveDelete(ctx context.Context, root noder.Path) error { return l.addRecursive(ctx, root, NewDelete) } type noderToChangeFn func(noder.Path) Change // NewInsert or NewDelete func (l *Changes) addRecursive(ctx context.Context, root noder.Path, ctor noderToChangeFn) error { if root.String() == "" { return ErrEmptyFileName } if !root.IsDir() { if !root.Skip() { l.Add(ctor(root)) } return nil } i, err := NewIterFromPath(ctx, root) if err != nil { return err } var current noder.Path for { if current, err = i.Step(ctx); err != nil { if errors.Is(err, io.EOF) { break } return err } if current.IsDir() || current.Skip() { continue } l.Add(ctor(current)) } return nil } ================================================ FILE: modules/merkletrie/difftree.go ================================================ package merkletrie // The focus of this difftree implementation is to save time by // skipping whole directories if their hash is the same in both // trees. // // The diff algorithm implemented here is based on the doubleiter // type defined in this same package; we will iterate over both // trees at the same time, while comparing the current noders in // each iterator. Depending on how they differ we will output the // corresponding changes and move the iterators further over both // trees. // // The table bellow show all the possible comparison results, along // with what changes should we produce and how to advance the // iterators. // // The table is implemented by the switches in this function, // diffTwoNodes, diffTwoNodesSameName and diffTwoDirs. // // Many Bothans died to bring us this information, make sure you // understand the table before modifying this code. // # Cases // // When comparing noders in both trees you will find yourself in // one of 169 possible cases, but if we ignore moves, we can // simplify a lot the search space into the following table: // // - "-": nothing, no file or directory // - a<>: an empty file named "a". // - a<1>: a file named "a", with "1" as its contents. // - a<2>: a file named "a", with "2" as its contents. // - a(): an empty dir named "a". // - a(...): a dir named "a", with some files and/or dirs inside (possibly // empty). // - a(;;;): a dir named "a", with some other files and/or dirs inside // (possibly empty), which different from the ones in "a(...)". // // \ to - a<> a<1> a<2> a() a(...) a(;;;) // from \ // - 00 01 02 03 04 05 06 // a<> 10 11 12 13 14 15 16 // a<1> 20 21 22 23 24 25 26 // a<2> 30 31 32 33 34 35 36 // a() 40 41 42 43 44 45 46 // a(...) 50 51 52 53 54 55 56 // a(;;;) 60 61 62 63 64 65 66 // // Every (from, to) combination in the table is a special case, but // some of them can be merged into some more general cases, for // instance 11 and 22 can be merged into the general case: both // noders are equal. // // Here is a full list of all the cases that are similar and how to // merge them together into more general cases. Each general case // is labeled with an uppercase letter for further reference, and it // is followed by the pseudocode of the checks you have to perfrom // on both noders to see if you are in such a case, the actions to // perform (i.e. what changes to output) and how to advance the // iterators of each tree to continue the comparison process. // // ## A. Impossible: 00 // // ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66 // - check: `SameName() && SameHash()` // - action: do nothing. // - advance: `FromNext(); ToNext()` // // ### C. To was created: 01, 02, 03, 04, 05, 06 // - check: `DifferentName() && ToBeforeFrom()` // - action: insertRecursively(to) // - advance: `ToNext()` // // ### D. From was deleted: 10, 20, 30, 40, 50, 60 // - check: `DifferentName() && FromBeforeTo()` // - action: `DeleteRecursively(from)` // - advance: `FromNext()` // // ### E. Empty file to file with contents: 12, 13 // - check: `SameName() && DifferentHash() && FromIsFile() && // ToIsFile() && FromIsEmpty()` // - action: `modifyFile(from, to)` // - advance: `FromNext()` or `FromStep()` // // ### E'. file with contents to empty file: 21, 31 // - check: `SameName() && DifferentHash() && FromIsFile() && // ToIsFile() && ToIsEmpty()` // - action: `modifyFile(from, to)` // - advance: `FromNext()` or `FromStep()` // // ### F. empty file to empty dir with the same name: 14 // - check: `SameName() && FromIsFile() && FromIsEmpty() && // ToIsDir() && ToIsEmpty()` // - action: `DeleteFile(from); InsertEmptyDir(to)` // - advance: `FromNext(); ToNext()` // // ### F'. empty dir to empty file of the same name: 41 // - check: `SameName() && FromIsDir() && FromIsEmpty && // ToIsFile() && ToIsEmpty()` // - action: `DeleteEmptyDir(from); InsertFile(to)` // - advance: `FromNext(); ToNext()` or step for any of them. // // ### G. empty file to non-empty dir of the same name: 15, 16 // - check: `SameName() && FromIsFile() && ToIsDir() && // FromIsEmpty() && ToIsNotEmpty()` // - action: `DeleteFile(from); InsertDirRecursively(to)` // - advance: `FromNext(); ToNext()` // // ### G'. non-empty dir to empty file of the same name: 51, 61 // - check: `SameName() && FromIsDir() && FromIsNotEmpty() && // ToIsFile() && FromIsEmpty()` // - action: `DeleteDirRecursively(from); InsertFile(to)` // - advance: `FromNext(); ToNext()` // // ### H. modify file contents: 23, 32 // - check: `SameName() && FromIsFile() && ToIsFile() && // FromIsNotEmpty() && ToIsNotEmpty()` // - action: `ModifyFile(from, to)` // - advance: `FromNext(); ToNext()` // // ### I. file with contents to empty dir: 24, 34 // - check: `SameName() && DifferentHash() && FromIsFile() && // FromIsNotEmpty() && ToIsDir() && ToIsEmpty()` // - action: `DeleteFile(from); InsertEmptyDir(to)` // - advance: `FromNext(); ToNext()` // // ### I'. empty dir to file with contents: 42, 43 // - check: `SameName() && DifferentHash() && FromIsDir() && // FromIsEmpty() && ToIsFile() && ToIsEmpty()` // - action: `DeleteDir(from); InsertFile(to)` // - advance: `FromNext(); ToNext()` // // ### J. file with contents to dir with contents: 25, 26, 35, 36 // - check: `SameName() && DifferentHash() && FromIsFile() && // FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` // - action: `DeleteFile(from); InsertDirRecursively(to)` // - advance: `FromNext(); ToNext()` // // ### J'. dir with contents to file with contents: 52, 62, 53, 63 // - check: `SameName() && DifferentHash() && FromIsDir() && // FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()` // - action: `DeleteDirRecursively(from); InsertFile(to)` // - advance: `FromNext(); ToNext()` // // ### K. empty dir to dir with contents: 45, 46 // - check: `SameName() && DifferentHash() && FromIsDir() && // FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` // - action: `InsertChildrenRecursively(to)` // - advance: `FromNext(); ToNext()` // // ### K'. dir with contents to empty dir: 54, 64 // - check: `SameName() && DifferentHash() && FromIsDir() && // FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` // - action: `DeleteChildrenRecursively(from)` // - advance: `FromNext(); ToNext()` // // ### L. dir with contents to dir with different contents: 56, 65 // - check: `SameName() && DifferentHash() && FromIsDir() && // FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` // - action: nothing // - advance: `FromStep(); ToStep()` // // // All these cases can be further simplified by a truth table // reduction process, in which we gather similar checks together to // make the final code easier to read and understand. // // The first 6 columns are the outputs of the checks to perform on // both noders. I have labeled them 1 to 6, this is what they mean: // // 1: SameName() // 2: SameHash() // 3: FromIsDir() // 4: ToIsDir() // 5: FromIsEmpty() // 6: ToIsEmpty() // // The from and to columns are a fsnoder example of the elements // that you will find on each tree under the specified comparison // results (columns 1 to 6). // // The type column identifies the case we are into, from the list above. // // The type' column identifies the new set of reduced cases, using // lowercase letters, and they are explained after the table. // // The last column is the set of actions and advances for each case. // // "---" means impossible except in case of hash collision. // // advance meaning: // - NN: from.Next(); to.Next() // - SS: from.Step(); to.Step() // // 1 2 3 4 5 6 | from | to |type|type'|action ; advance // ------------+--------+--------+----+------------------------------------ // 0 0 0 0 0 0 | | | | | if !SameName() { // . | | | | | if FromBeforeTo() { // . | | | D | d | delete(from); from.Next() // . | | | | | } else { // . | | | C | c | insert(to); to.Next() // . | | | | | } // 0 1 1 1 1 1 | | | | | } // 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN // 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN // 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN // 1 0 0 0 1 1 | ---- | ---- | | e | // 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN // 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN // 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN // 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN // 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN // 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN // 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN // 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN // 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS // 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChildren(from); NN // 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN // 1 0 1 1 1 1 | ---- | ---- | | | // 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN // 1 1 0 0 0 1 | ---- | ---- | | b | // 1 1 0 0 1 0 | ---- | ---- | | b | // 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN // 1 1 0 1 0 0 | ---- | ---- | | b | // 1 1 0 1 0 1 | ---- | ---- | | b | // 1 1 0 1 1 0 | ---- | ---- | | b | // 1 1 0 1 1 1 | ---- | ---- | | b | // 1 1 1 0 0 0 | ---- | ---- | | b | // 1 1 1 0 0 1 | ---- | ---- | | b | // 1 1 1 0 1 0 | ---- | ---- | | b | // 1 1 1 0 1 1 | ---- | ---- | | b | // 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN // 1 1 1 1 0 1 | ---- | ---- | | b | // 1 1 1 1 1 0 | ---- | ---- | | b | // 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN // // c and d: // if !SameName() // d if FromBeforeTo() // c else // b: SameName) && sameHash() // e: SameName() && !sameHash() && BothAreFiles() // f: SameName() && !sameHash() && FileAndDir() // g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty // i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty // h: else of i import ( "context" "errors" "fmt" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) var ( // ErrCanceled is returned whenever the operation is canceled. ErrCanceled = errors.New("operation canceled") ) // DiffTreeContext calculates the list of changes between two merkletries. It // uses the provided hashEqual callback to compare noders. // Error will be returned if context expires // Provided context must be non nil func DiffTreeContext(ctx context.Context, fromTree, toTree noder.Noder, hashEqual noder.Equal) (Changes, error) { ret := NewChanges() ii, err := newDoubleIter(ctx, fromTree, toTree, hashEqual) if err != nil { return nil, err } for { select { case <-ctx.Done(): return nil, ctx.Err() default: } from := ii.from.current to := ii.to.current switch r := ii.remaining(); r { case noMoreNoders: return ret, nil case onlyFromRemains: if !from.Skip() { if err = ret.AddRecursiveDelete(ctx, from); err != nil { return nil, err } } if err = ii.nextFrom(ctx); err != nil { return nil, err } case onlyToRemains: if !to.Skip() { if err = ret.AddRecursiveInsert(ctx, to); err != nil { return nil, err } } if err = ii.nextTo(ctx); err != nil { return nil, err } case bothHaveNodes: var err error switch { case from.Skip(): if from.Name() == to.Name() { err = ii.nextBoth(ctx) } else { err = ii.nextFrom(ctx) } case to.Skip(): if from.Name() == to.Name() { err = ii.nextBoth(ctx) } else { err = ii.nextTo(ctx) } default: err = diffNodes(ctx, &ret, ii) } if err != nil { return nil, err } default: panic(fmt.Sprintf("unknown remaining value: %d", r)) } } } func diffNodes(ctx context.Context, changes *Changes, ii *doubleIter) error { from := ii.from.current to := ii.to.current var err error // compare their full paths as strings switch from.Compare(to) { case -1: if err = changes.AddRecursiveDelete(ctx, from); err != nil { return err } if err = ii.nextFrom(ctx); err != nil { return err } case 1: if err = changes.AddRecursiveInsert(ctx, to); err != nil { return err } if err = ii.nextTo(ctx); err != nil { return err } default: if err := diffNodesSameName(ctx, changes, ii); err != nil { return err } } return nil } func diffNodesSameName(ctx context.Context, changes *Changes, ii *doubleIter) error { from := ii.from.current to := ii.to.current status, err := ii.compare(ctx) if err != nil { return err } switch { case status.sameHash: // do nothing if err = ii.nextBoth(ctx); err != nil { return err } case status.bothAreFiles: changes.Add(NewModify(from, to)) if err = ii.nextBoth(ctx); err != nil { return err } case status.fileAndDir: if err = changes.AddRecursiveDelete(ctx, from); err != nil { return err } if err = changes.AddRecursiveInsert(ctx, to); err != nil { return err } if err = ii.nextBoth(ctx); err != nil { return err } case status.bothAreDirs: if err = diffDirs(ctx, changes, ii); err != nil { return err } default: return errors.New("bad status from double iterator") } return nil } func diffDirs(ctx context.Context, changes *Changes, ii *doubleIter) error { from := ii.from.current to := ii.to.current status, err := ii.compare(ctx) if err != nil { return err } switch { case status.fromIsEmptyDir: if err = changes.AddRecursiveInsert(ctx, to); err != nil { return err } if err = ii.nextBoth(ctx); err != nil { return err } case status.toIsEmptyDir: if err = changes.AddRecursiveDelete(ctx, from); err != nil { return err } if err = ii.nextBoth(ctx); err != nil { return err } case !status.fromIsEmptyDir && !status.toIsEmptyDir: // do nothing if err = ii.stepBoth(ctx); err != nil { return err } default: return errors.New("both dirs are empty but has different hash") } return nil } ================================================ FILE: modules/merkletrie/doc.go ================================================ /* Package merkletrie provides support for n-ary trees that are at the same time Merkle trees and Radix trees (tries). Git trees are Radix n-ary trees in virtue of the names of their tree entries. At the same time, git trees are Merkle trees thanks to their hashes. This package defines Merkle tries as nodes that should have: - a hash: the Merkle part of the Merkle trie - a key: the Radix part of the Merkle trie The Merkle hash condition is not enforced by this package though. This means that the hash of a node doesn't have to take into account the hashes of their children, which is good for testing purposes. Nodes in the Merkle trie are abstracted by the Noder interface. The intended use is that git trees implements this interface, either directly or using a simple wrapper. This package provides an iterator for merkletrie that can skip whole directory-like noders and an efficient merkletrie comparison algorithm. When comparing git trees, the simple approach of alphabetically sorting their elements and comparing the resulting lists is too slow as it depends linearly on the number of files in the trees: When a directory has lots of files but none of them has been modified, this approach is very expensive. We can do better by pruning whole directories that have not change, just by looking at their hashes. This package provides the tools to do exactly that. */ package merkletrie ================================================ FILE: modules/merkletrie/doubleiter.go ================================================ package merkletrie import ( "context" "errors" "fmt" "io" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // A doubleIter is a convenience type to keep track of the current // noders in two merkletrie that are going to be iterated in parallel. // It has methods for: // // - iterating over the merkletrie, both at the same time or // individually: nextFrom, nextTo, nextBoth, stepBoth // // - checking if there are noders left in one or both of them with the // remaining method and its associated returned type. // // - comparing the current noders of both merkletrie in several ways, // with the compare method and its associated returned type. type doubleIter struct { from struct { iter *Iter current noder.Path // nil if no more nodes } to struct { iter *Iter current noder.Path // nil if no more nodes } hashEqual noder.Equal } // NewdoubleIter returns a new doubleIter for the merkletrie "from" and // "to". The hashEqual callback function will be used by the doubleIter // to compare the hash of the noders in the merkletrie. The doubleIter // will be initialized to the first elements in each merkletrie if any. func newDoubleIter(ctx context.Context, from, to noder.Noder, hashEqual noder.Equal) ( *doubleIter, error) { var ii doubleIter var err error if ii.from.iter, err = NewIter(ctx, from); err != nil { return nil, fmt.Errorf("from: %w", err) } if ii.from.current, err = ii.from.iter.Next(ctx); turnEOFIntoNil(err) != nil { return nil, fmt.Errorf("from: %w", err) } if ii.to.iter, err = NewIter(ctx, to); err != nil { return nil, fmt.Errorf("to: %w", err) } if ii.to.current, err = ii.to.iter.Next(ctx); turnEOFIntoNil(err) != nil { return nil, fmt.Errorf("to: %w", err) } ii.hashEqual = hashEqual return &ii, nil } func turnEOFIntoNil(e error) error { if e != nil && !errors.Is(e, io.EOF) { return e } return nil } // NextBoth makes d advance to the next noder in both merkletries. If // any of them is a directory, it skips its contents. func (d *doubleIter) nextBoth(ctx context.Context) error { if err := d.nextFrom(ctx); err != nil { return err } if err := d.nextTo(ctx); err != nil { return err } return nil } // NextFrom makes d advance to the next noder in the "from" merkletrie, // skipping its contents if it is a directory. func (d *doubleIter) nextFrom(ctx context.Context) (err error) { d.from.current, err = d.from.iter.Next(ctx) return turnEOFIntoNil(err) } // NextTo makes d advance to the next noder in the "to" merkletrie, // skipping its contents if it is a directory. func (d *doubleIter) nextTo(ctx context.Context) (err error) { d.to.current, err = d.to.iter.Next(ctx) return turnEOFIntoNil(err) } // StepBoth makes d advance to the next noder in both merkletries, // getting deeper into directories if that is the case. func (d *doubleIter) stepBoth(ctx context.Context) (err error) { if d.from.current, err = d.from.iter.Step(ctx); turnEOFIntoNil(err) != nil { return err } if d.to.current, err = d.to.iter.Step(ctx); turnEOFIntoNil(err) != nil { return err } return nil } // Remaining returns if there are no more noders in the tree, if both // have noders or if one of them doesn't. func (d *doubleIter) remaining() remaining { if d.from.current == nil && d.to.current == nil { return noMoreNoders } if d.from.current == nil && d.to.current != nil { return onlyToRemains } if d.from.current != nil && d.to.current == nil { return onlyFromRemains } return bothHaveNodes } // Remaining values tells you whether both trees still have noders, or // only one of them or none of them. type remaining int const ( noMoreNoders remaining = iota onlyToRemains onlyFromRemains bothHaveNodes ) func (d *doubleIter) sameHash() bool { from := d.from.current.Last() to := d.to.current.Last() a, fromOK := from.(noder.Comparators) b, toOK := to.(noder.Comparators) if fromOK && toOK { if a.Mode() == b.Mode() && a.ModifiedAt().Equal(b.ModifiedAt()) { return true } } return d.hashEqual(d.from.current, d.to.current) } // Compare returns the comparison between the current elements in the // merkletries. func (d *doubleIter) compare(ctx context.Context) (s comparison, err error) { s.sameHash = d.sameHash() fromIsDir := d.from.current.IsDir() toIsDir := d.to.current.IsDir() s.bothAreDirs = fromIsDir && toIsDir s.bothAreFiles = !fromIsDir && !toIsDir s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles fromNumChildren, err := d.from.current.NumChildren(ctx) if err != nil { return comparison{}, fmt.Errorf("from: %w", err) } toNumChildren, err := d.to.current.NumChildren(ctx) if err != nil { return comparison{}, fmt.Errorf("to: %w", err) } s.fromIsEmptyDir = fromIsDir && fromNumChildren == 0 s.toIsEmptyDir = toIsDir && toNumChildren == 0 return } // Answers to a lot of questions you can ask about how to noders are // equal or different. type comparison struct { // the following are only valid if both nodes have the same name // (i.e. nameComparison == 0) // Do both nodes have the same hash? sameHash bool // Are both nodes files? bothAreFiles bool // the following are only valid if any of the noders are dirs, // this is, if !bothAreFiles // Is one a file and the other a dir? fileAndDir bool // Are both nodes dirs? bothAreDirs bool // Is the from node an empty dir? fromIsEmptyDir bool // Is the to Node an empty dir? toIsEmptyDir bool } ================================================ FILE: modules/merkletrie/filesystem/node.go ================================================ package filesystem import ( "context" "os" "path" "path/filepath" "time" "github.com/antgroup/hugescm/modules/merkletrie/noder" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" "github.com/antgroup/hugescm/modules/streamio" ) var ignore = map[string]bool{ ".zeta": true, } // The Node represents a file or a directory in a billy.Filesystem. It // implements the interface noder.Noder of merkletrie package. // // This implementation implements a "standard" hash method being able to be // compared with any other noder.Noder implementation inside of go-git. type Node struct { root string path string hash []byte children []noder.Noder isDir bool mode os.FileMode size int64 modifiedAt time.Time m noder.Matcher } // NewRootNode returns the root node based on a given billy.Filesystem. // // In order to provide the submodule hash status, a map[string]plumbing.Hash // should be provided where the key is the path of the submodule and the commit // of the submodule HEAD func NewRootNode(root string, m noder.Matcher) noder.Noder { return &Node{root: root, isDir: true, m: m} } func (n Node) fsPath(p string) string { return filepath.Join(n.root, p) } // Hash the hash of a filesystem is the result of concatenating the computed // plumbing.Hash of the file as a Blob and its plumbing.FileMode; that way the // difftree algorithm will detect changes in the contents of files and also in // their mode. // // The hash of a directory is always a 36-bytes slice of zero values func (n *Node) Hash() []byte { if len(n.hash) == 0 { n.calculateHash() } return n.hash } func (n *Node) Mode() filemode.FileMode { m, _ := filemode.NewFromOS(n.mode) return m } // UnifyMode overrides the file mode with the given mode. // This is used on Windows to unify POSIX permission modes to eliminate // false-positive changes, since Windows doesn't use the POSIX permission model. func (n *Node) UnifyMode(mode filemode.FileMode) { n.mode, _ = mode.ToOSFileMode() } func (n *Node) ModifiedAt() time.Time { return n.modifiedAt } func (n *Node) Size() int64 { return n.size } func (n *Node) HashRaw() plumbing.Hash { hash := n.Hash() var oid plumbing.Hash copy(oid[:], hash) return oid } func (n *Node) Name() string { return path.Base(n.path) } func (n *Node) IsDir() bool { return n.isDir } func (n *Node) Skip() bool { return false } func (n *Node) Children(ctx context.Context) ([]noder.Noder, error) { if err := n.calculateChildren(); err != nil { return nil, err } return n.children, nil } func (n *Node) NumChildren(ctx context.Context) (int, error) { if err := n.calculateChildren(); err != nil { return -1, err } return len(n.children), nil } func (n *Node) calculateChildren() error { if !n.IsDir() { return nil } if len(n.children) != 0 { return nil } dirs, err := os.ReadDir(filepath.Join(n.root, n.path)) if err != nil { if os.IsNotExist(err) { return nil } return err } for _, d := range dirs { if _, ok := ignore[d.Name()]; ok { continue } fi, err := d.Info() if err != nil { return err } if fi.Mode()&os.ModeSocket != 0 { continue } c, err := n.newChildNode(fi) if err != nil { return err } if c != nil { n.children = append(n.children, c) } } return nil } func (n *Node) newChildNode(fi os.FileInfo) (*Node, error) { var m noder.Matcher var ok bool if fi.IsDir() && n.m != nil && n.m.Len() != 0 { if m, ok = n.m.Match(fi.Name()); !ok { return nil, nil } } node := &Node{ root: n.root, path: path.Join(n.path, fi.Name()), isDir: fi.IsDir(), size: fi.Size(), mode: fi.Mode(), modifiedAt: fi.ModTime(), m: m, } return node, nil } func (n *Node) calculateHash() { if n.isDir { n.hash = make([]byte, plumbing.HASH_DIGEST_SIZE+4) return } mode, err := filemode.NewFromOS(n.mode) if err != nil { n.hash = make([]byte, plumbing.HASH_DIGEST_SIZE+4) return } var hash plumbing.Hash if n.mode&os.ModeSymlink != 0 { hash = n.doCalculateHashForSymlink() } else { hash = n.doCalculateHashForRegular() } n.hash = append(hash[:], mode.Bytes()...) } func (n *Node) doCalculateHashForRegular() plumbing.Hash { f, err := os.Open(n.fsPath(n.path)) if err != nil { return plumbing.ZeroHash } defer f.Close() // nolint h := plumbing.NewHasher() if _, err := streamio.Copy(h, f); err != nil { return plumbing.ZeroHash } return h.Sum() } func (n *Node) doCalculateHashForSymlink() plumbing.Hash { target, err := os.Readlink(n.fsPath(n.path)) if err != nil { return plumbing.ZeroHash } h := plumbing.NewHasher() if _, err := h.Write([]byte(target)); err != nil { return plumbing.ZeroHash } return h.Sum() } func (n *Node) String() string { return n.path } func (n *Node) Type() string { return "fs" } ================================================ FILE: modules/merkletrie/filesystem/node_test.go ================================================ package filesystem import ( "context" "fmt" "os" "testing" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) func WalkNode(ctx context.Context, n noder.Noder) { nodes, err := n.Children(ctx) if err != nil { fmt.Fprintf(os.Stderr, "walk error: %s\n", err) return } for _, a := range nodes { if a.IsDir() { WalkNode(ctx, a) continue } fmt.Fprintf(os.Stderr, "%s\n", a.String()) } } func TestNode(t *testing.T) { n := NewRootNode("/tmp/fsnode", noder.NewSparseTreeMatcher([]string{"a", "a/a", "c"})) WalkNode(t.Context(), n) } func TestNode2(t *testing.T) { n := NewRootNode("/tmp/fsnode", noder.NewSparseTreeMatcher([]string{})) WalkNode(t.Context(), n) } func TestNode3(t *testing.T) { n := NewRootNode("/tmp/xh5", noder.NewSparseTreeMatcher([]string{"dir1", "dir3"})) WalkNode(t.Context(), n) } ================================================ FILE: modules/merkletrie/index/node.go ================================================ package index import ( "context" "path" "strings" "time" "github.com/antgroup/hugescm/modules/merkletrie/noder" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" "github.com/antgroup/hugescm/modules/plumbing/format/index" ) // The Node represents a index.Entry or a directory inferred from the path // of all entries. It implements the interface noder.Noder of merkletrie // package. // // This implementation implements a "standard" hash method being able to be // compared with any other noder.Noder implementation inside of go-git type Node struct { path string entry *index.Entry children []noder.Noder isDir bool skip bool fragments plumbing.Hash } type FragmentsGetter func(ctx context.Context, e *index.Entry) *index.Entry // NewRootNode returns the root node of a computed tree from a index.Index, func NewRootNode(ctx context.Context, idx *index.Index, fn FragmentsGetter) noder.Noder { const rootNode = "" m := map[string]*Node{rootNode: {isDir: true}} for _, e := range idx.Entries { parts := strings.Split(e.Name, "/") var fullpath string for _, part := range parts { parent := fullpath fullpath = path.Join(fullpath, part) if _, ok := m[fullpath]; ok { continue } n := &Node{path: fullpath} if fullpath == e.Name { if e.Mode&filemode.Fragments != 0 { n.fragments = e.Hash n.entry = fn(ctx, e) } else { n.entry = e } n.skip = e.SkipWorktree } else { n.isDir = true } m[n.path] = n m[parent].children = append(m[parent].children, n) } } return m[rootNode] } func (n *Node) String() string { return n.path } func (n *Node) Skip() bool { return n.skip } // Hash the hash of a filesystem is a 36-byte slice, is the result of // concatenating the computed plumbing.Hash of the file as a Blob and its // plumbing.FileMode; that way the difftree algorithm will detect changes in the // contents of files and also in their mode. // // If the node is computed and not based on a index.Entry the hash is equals // to a 36-bytes slices of zero values. func (n *Node) Hash() []byte { if n.entry == nil { return make([]byte, plumbing.HASH_DIGEST_SIZE+4) } return append(n.entry.Hash[:], n.entry.Mode.Bytes()...) } // HashRaw: Get the original Hash of Entry. If it is fragments, get the hash of fragments, otherwise get the hash of blob func (n *Node) HashRaw() plumbing.Hash { if n.entry == nil { return plumbing.ZeroHash } if !n.fragments.IsZero() { return n.fragments } return n.entry.Hash } func (n *Node) Mode() filemode.FileMode { if n.entry == nil { return filemode.Empty } return n.entry.Mode // origin mode. not fragments mode } func (n *Node) TrueMode() filemode.FileMode { if n.entry == nil { return filemode.Empty } if !n.fragments.IsZero() { return n.entry.Mode | filemode.Fragments } return n.entry.Mode } func (n *Node) ModifiedAt() time.Time { if n.entry == nil { return time.Time{} } return n.entry.ModifiedAt } func (n *Node) IsFragments() bool { return !n.fragments.IsZero() } func (n *Node) Size() int64 { if n.entry == nil { return 0 } return int64(n.entry.Size) } func (n *Node) Name() string { return path.Base(n.path) } func (n *Node) IsDir() bool { return n.isDir } func (n *Node) Children(ctx context.Context) ([]noder.Noder, error) { return n.children, nil } func (n *Node) NumChildren(ctx context.Context) (int, error) { return len(n.children), nil } ================================================ FILE: modules/merkletrie/internal/frame/frame.go ================================================ package frame import ( "bytes" "context" "fmt" "sort" "strings" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // A Frame is a collection of siblings in a trie, sorted alphabetically // by name. type Frame struct { // siblings, sorted in reverse alphabetical order by name stack []noder.Noder } type byName []noder.Noder func (a byName) Len() int { return len(a) } func (a byName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byName) Less(i, j int) bool { return strings.Compare(a[i].Name(), a[j].Name()) < 0 } // New returns a frame with the children of the provided node. func New(ctx context.Context, n noder.Noder) (*Frame, error) { children, err := n.Children(ctx) if err != nil { return nil, err } sort.Sort(sort.Reverse(byName(children))) return &Frame{ stack: children, }, nil } // String returns the quoted names of the noders in the frame sorted in // alphabetical order by name, surrounded by square brackets and // separated by comas. // // Examples: // // [] // ["a", "b"] func (f *Frame) String() string { var buf bytes.Buffer _ = buf.WriteByte('[') sep := "" for i := f.Len() - 1; i >= 0; i-- { _, _ = buf.WriteString(sep) sep = ", " _, _ = fmt.Fprintf(&buf, "%q", f.stack[i].Name()) } _ = buf.WriteByte(']') return buf.String() } // First returns, but dont extract, the noder with the alphabetically // smaller name in the frame and true if the frame was not empty. // Otherwise it returns nil and false. func (f *Frame) First() (noder.Noder, bool) { if f.Len() == 0 { return nil, false } top := f.Len() - 1 return f.stack[top], true } // Drop extracts the noder with the alphabetically smaller name in the // frame or does nothing if the frame was empty. func (f *Frame) Drop() { if f.Len() == 0 { return } top := f.Len() - 1 f.stack[top] = nil f.stack = f.stack[:top] } // Len returns the number of noders in the frame. func (f *Frame) Len() int { return len(f.stack) } ================================================ FILE: modules/merkletrie/internal/fsnoder/dir.go ================================================ package fsnoder import ( "bytes" "context" "errors" "hash/fnv" "sort" "strings" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // Dir values implement directory-like noders. type dir struct { name string // relative children []noder.Noder // sorted by name hash []byte // memoized } type byName []noder.Noder func (a byName) Len() int { return len(a) } func (a byName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byName) Less(i, j int) bool { return strings.Compare(a[i].Name(), a[j].Name()) < 0 } // copies the children slice, so nobody can modify the order of its // elements from the outside. func newDir(name string, children []noder.Noder) (*dir, error) { cloned := make([]noder.Noder, len(children)) _ = copy(cloned, children) sort.Sort(byName(cloned)) if hasChildrenWithNoName(cloned) { return nil, errors.New("non-root inner nodes cannot have empty names") } if hasDuplicatedNames(cloned) { return nil, errors.New("children cannot have duplicated names") } return &dir{ name: name, children: cloned, }, nil } func hasChildrenWithNoName(children []noder.Noder) bool { for _, c := range children { if c.Name() == "" { return true } } return false } func hasDuplicatedNames(children []noder.Noder) bool { if len(children) < 2 { return false } for i := 1; i < len(children); i++ { if children[i].Name() == children[i-1].Name() { return true } } return false } func (d *dir) Hash() []byte { if d.hash == nil { d.calculateHash() } return d.hash } // hash is calculated as the hash of "dir " plus the concatenation, for // each child, of its name, a space and its hash. Children are sorted // alphabetically before calculating the hash, so the result is unique. func (d *dir) calculateHash() { h := fnv.New64a() _, _ = h.Write([]byte("dir ")) for _, c := range d.children { _, _ = h.Write([]byte(c.Name())) _, _ = h.Write([]byte(" ")) _, _ = h.Write(c.Hash()) } d.hash = h.Sum([]byte{}) } func (d *dir) Name() string { return d.name } func (d *dir) IsDir() bool { return true } // returns a copy so nobody can alter the order of its elements from the // outside. func (d *dir) Children(ctx context.Context) ([]noder.Noder, error) { clon := make([]noder.Noder, len(d.children)) _ = copy(clon, d.children) return clon, nil } func (d *dir) NumChildren(ctx context.Context) (int, error) { return len(d.children), nil } func (d *dir) Skip() bool { return false } const ( dirStartMark = '(' dirEndMark = ')' dirElementSep = ' ' ) // The string generated by this method is unique for each tree, as the // children of each node are sorted alphabetically by name when // generating the string. func (d *dir) String() string { var buf bytes.Buffer buf.WriteString(d.name) buf.WriteRune(dirStartMark) for i, c := range d.children { if i != 0 { buf.WriteRune(dirElementSep) } buf.WriteString(c.String()) } buf.WriteRune(dirEndMark) return buf.String() } ================================================ FILE: modules/merkletrie/internal/fsnoder/doc.go ================================================ /* Package fsnoder allows to create merkletrie noders that resemble file systems, from human readable string descriptions. Its intended use is generating noders in tests in a readable way. For example: root, _ = New("(a<1> b<2>, B(c<3> d()))") will create a noder as follows: root - "root" is an unnamed dir containing "a", "b" and "B". / | \ - "a" is a file containing the string "1". / | \ - "b" is a file containing the string "2". a b B - "B" is a directory containing "c" and "d". / \ - "c" is a file containing the string "3". c d - "D" is an empty directory. Files are expressed as: - one or more letters and dots for the name of the file - a single number, between angle brackets, for the contents of the file. - examples: a<1>, foo.go<2>. Directories are expressed as: - one or more letters for the name of the directory. - its elements between parents, separated with spaces, in any order. - (optionally) the root directory can be unnamed, by skipping its name. Examples: - D(a<1> b<2>) : two files, "a" and "b", having "1" and "2" as their respective contents, inside a directory called "D". - A() : An empty directory called "A". - A(b<>) : An directory called "A", with an empty file inside called "b": - (b(c<1> d(e<2>)) f<>) : an unamed directory containing: ├── b --> directory │   ├── c --> file containing "1" │   └── d --> directory │   └── e --> file containing "2" └── f --> empty file */ package fsnoder ================================================ FILE: modules/merkletrie/internal/fsnoder/file.go ================================================ package fsnoder import ( "bytes" "context" "errors" "hash/fnv" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // file values represent file-like noders in a merkle trie. type file struct { name string // relative contents string hash []byte // memoized } // newFile returns a noder representing a file with the given contents. func newFile(name, contents string) (*file, error) { if name == "" { return nil, errors.New("files cannot have empty names") } return &file{ name: name, contents: contents, }, nil } // The hash of a file is just its contents. // Empty files will have the fnv64 basis offset as its hash. func (f *file) Hash() []byte { if f.hash == nil { h := fnv.New64a() h.Write([]byte(f.contents)) // it nevers returns an error. f.hash = h.Sum(nil) } return f.hash } func (f *file) Name() string { return f.name } func (f *file) IsDir() bool { return false } func (f *file) Children(ctx context.Context) ([]noder.Noder, error) { return noder.NoChildren, nil } func (f *file) NumChildren(ctx context.Context) (int, error) { return 0, nil } func (f *file) Skip() bool { return false } const ( fileStartMark = '<' fileEndMark = '>' ) // String returns a string formatted as: name. func (f *file) String() string { var buf bytes.Buffer buf.WriteString(f.name) buf.WriteRune(fileStartMark) buf.WriteString(f.contents) buf.WriteRune(fileEndMark) return buf.String() } ================================================ FILE: modules/merkletrie/internal/fsnoder/new.go ================================================ package fsnoder import ( "bytes" "errors" "fmt" "io" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // New function creates a full merkle trie from the string description of // a filesystem tree. See examples of the string format in the package // description. func New(s string) (noder.Noder, error) { return decodeDir([]byte(s), root) } const ( root = true nonRoot = false ) // Expected data: a fsnoder description, for example: A(foo bar qux ...). // When isRoot is true, unnamed dirs are supported, for example: (foo // bar qux ...) func decodeDir(data []byte, isRoot bool) (*dir, error) { data = bytes.TrimSpace(data) if len(data) == 0 { return nil, io.EOF } // get the name of the dir and remove it from the data. In case the // there is no name and isRoot is true, just use "" as the name. var name string switch end := bytes.IndexRune(data, dirStartMark); end { case -1: return nil, fmt.Errorf("%c not found", dirStartMark) case 0: if isRoot { name = "" } else { return nil, fmt.Errorf("inner unnamed dirs not allowed: %s", data) } default: name = string(data[0:end]) data = data[end:] } // check data ends with the dirEndMark if data[len(data)-1] != dirEndMark { return nil, fmt.Errorf("malformed data: last %q not found", dirEndMark) } data = data[1 : len(data)-1] // remove initial '(' and last ')' children, err := decodeChildren(data) if err != nil { return nil, err } return newDir(name, children) } func isNumber(b rune) bool { return '0' <= b && b <= '9' } func isLetter(b rune) bool { return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') } func decodeChildren(data []byte) ([]noder.Noder, error) { data = bytes.TrimSpace(data) if len(data) == 0 { return nil, nil } chunks := split(data) ret := make([]noder.Noder, len(chunks)) var err error for i, c := range chunks { ret[i], err = decodeChild(c) if err != nil { return nil, fmt.Errorf("malformed element %d (%s): %w", i, c, err) } } return ret, nil } // returns the description of the elements of a dir. It is just looking // for spaces if they are not part of inner dirs. func split(data []byte) [][]byte { chunks := [][]byte{} start := 0 dirDepth := 0 for i, b := range data { switch b { case dirStartMark: dirDepth++ case dirEndMark: dirDepth-- case dirElementSep: if dirDepth == 0 { chunks = append(chunks, data[start:i+1]) start = i + 1 } } } chunks = append(chunks, data[start:]) return chunks } // A child can be a file or a dir. func decodeChild(data []byte) (noder.Noder, error) { clean := bytes.TrimSpace(data) if len(data) < 3 { return nil, fmt.Errorf("element too short: %s", clean) } fileNameEnd := bytes.IndexRune(data, fileStartMark) dirNameEnd := bytes.IndexRune(data, dirStartMark) switch { case fileNameEnd == -1 && dirNameEnd == -1: return nil, errors.New("malformed child, no file or dir start mark found") case fileNameEnd == -1: return decodeDir(clean, nonRoot) case dirNameEnd == -1: return decodeFile(clean) case dirNameEnd < fileNameEnd: return decodeDir(clean, nonRoot) case dirNameEnd > fileNameEnd: return decodeFile(clean) } return nil, errors.New("unreachable") } func decodeFile(data []byte) (noder.Noder, error) { nameEnd := bytes.IndexRune(data, fileStartMark) if nameEnd == -1 { return nil, fmt.Errorf("malformed file, no %c found", fileStartMark) } contentStart := nameEnd + 1 contentEnd := bytes.IndexRune(data, fileEndMark) if contentEnd == -1 { return nil, fmt.Errorf("malformed file, no %c found", fileEndMark) } switch { case nameEnd > contentEnd: return nil, fmt.Errorf("malformed file, found %c before %c", fileEndMark, fileStartMark) case contentStart == contentEnd: name := string(data[:nameEnd]) if !validFileName(name) { return nil, errors.New("invalid file name") } return newFile(name, "") default: name := string(data[:nameEnd]) if !validFileName(name) { return nil, errors.New("invalid file name") } contents := string(data[contentStart:contentEnd]) if !validFileContents(contents) { return nil, errors.New("invalid file contents") } return newFile(name, contents) } } func validFileName(s string) bool { for _, c := range s { if !isLetter(c) && c != '.' { return false } } return true } func validFileContents(s string) bool { for _, c := range s { if !isNumber(c) { return false } } return true } // HashEqual returns if a and b have the same hash. func HashEqual(a, b noder.Hasher) bool { return bytes.Equal(a.Hash(), b.Hash()) } ================================================ FILE: modules/merkletrie/iter.go ================================================ package merkletrie import ( "context" "fmt" "io" "github.com/antgroup/hugescm/modules/merkletrie/internal/frame" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // Iter is an iterator for merkletries (only the trie part of the // merkletrie is relevant here, it does not use the Hasher interface). // // The iteration is performed in depth-first pre-order. Entries at each // depth are traversed in (case-sensitive) alphabetical order. // // This is the kind of traversal you will expect when listing ordinary // files and directories recursively, for example: // // Trie Traversal order // ---- --------------- // . // / | \ c // / | \ d/ // d c z ===> d/a // / \ d/b // b a z // // This iterator is somewhat especial as you can chose to skip whole // "directories" when iterating: // // - The Step method will iterate normally. // // - the Next method will not descend deeper into the tree. // // For example, if the iterator is at `d/`, the Step method will return // `d/a` while the Next would have returned `z` instead (skipping `d/` // and its descendants). The name of the these two methods are based on // the well known "next" and "step" operations, quite common in // debuggers, like gdb. // // The paths returned by the iterator will be relative, if the iterator // was created from a single node, or absolute, if the iterator was // created from the path to the node (the path will be prefixed to all // returned paths). type Iter struct { // Tells if the iteration has started. hasStarted bool // The top of this stack has the current node and its siblings. The // rest of the stack keeps the ancestors of the current node and // their corresponding siblings. The current element is always the // top element of the top frame. // // When "step"ping into a node, its children are pushed as a new // frame. // // When "next"ing pass a node, the current element is dropped by // popping the top frame. frameStack []*frame.Frame // The base path used to turn the relative paths used internally by // the iterator into absolute paths used by external applications. // For relative iterator this will be nil. base noder.Path } // NewIter returns a new relative iterator using the provider noder as // its unnamed root. When iterating, all returned paths will be // relative to node. func NewIter(ctx context.Context, n noder.Noder) (*Iter, error) { return newIter(ctx, n, nil) } // NewIterFromPath returns a new absolute iterator from the noder at the // end of the path p. When iterating, all returned paths will be // absolute, using the root of the path p as their root. func NewIterFromPath(ctx context.Context, p noder.Path) (*Iter, error) { return newIter(ctx, p, p) // Path implements Noder } func newIter(ctx context.Context, root noder.Noder, base noder.Path) (*Iter, error) { ret := &Iter{ base: base, } if root == nil { return ret, nil } frame, err := frame.New(ctx, root) if err != nil { return nil, err } ret.push(frame) return ret, nil } func (iter *Iter) top() (*frame.Frame, bool) { if len(iter.frameStack) == 0 { return nil, false } top := len(iter.frameStack) - 1 return iter.frameStack[top], true } func (iter *Iter) push(f *frame.Frame) { iter.frameStack = append(iter.frameStack, f) } const ( doDescend = true dontDescend = false ) // Next returns the path of the next node without descending deeper into // the trie and nil. If there are no more entries in the trie it // returns nil and io.EOF. In case of error, it will return nil and the // error. func (iter *Iter) Next(ctx context.Context) (noder.Path, error) { return iter.advance(ctx, dontDescend) } // Step returns the path to the next node in the trie, descending deeper // into it if needed, and nil. If there are no more nodes in the trie, // it returns nil and io.EOF. In case of error, it will return nil and // the error. func (iter *Iter) Step(ctx context.Context) (noder.Path, error) { return iter.advance(ctx, doDescend) } // Advances the iterator in the desired direction: descend or // dontDescend. // // Returns the new current element and a nil error on success. If there // are no more elements in the trie below the base, it returns nil, and // io.EOF. Returns nil and an error in case of errors. func (iter *Iter) advance(ctx context.Context, wantDescend bool) (noder.Path, error) { current, err := iter.current() if err != nil { return nil, err } // The first time we just return the current node. if !iter.hasStarted { iter.hasStarted = true return current, nil } // Advances means getting a next current node, either its first child or // its next sibling, depending if we must descend or not. numChildren, err := current.NumChildren(ctx) if err != nil { return nil, err } mustDescend := numChildren != 0 && wantDescend if mustDescend { // descend: add a new frame with the current's children. frame, err := frame.New(ctx, current) if err != nil { return nil, err } iter.push(frame) } else { // don't descend: just drop the current node iter.drop() } return iter.current() } // Returns the path to the current node, adding the base if there was // one, and a nil error. If there were no noders left, it returns nil // and io.EOF. If an error occurred, it returns nil and the error. func (iter *Iter) current() (noder.Path, error) { if topFrame, ok := iter.top(); !ok { return nil, io.EOF } else if _, ok := topFrame.First(); !ok { return nil, io.EOF } ret := make(noder.Path, 0, len(iter.base)+len(iter.frameStack)) // concat the base... ret = append(ret, iter.base...) // ... and the current node and all its ancestors for i, f := range iter.frameStack { t, ok := f.First() if !ok { panic(fmt.Sprintf("frame %d is empty", i)) } ret = append(ret, t) } return ret, nil } // removes the current node if any, and all the frames that become empty as a // consequence of this action. func (iter *Iter) drop() { frame, ok := iter.top() if !ok { return } frame.Drop() // if the frame is empty, remove it and its parent, recursively if frame.Len() == 0 { top := len(iter.frameStack) - 1 iter.frameStack[top] = nil iter.frameStack = iter.frameStack[:top] iter.drop() } } ================================================ FILE: modules/merkletrie/noder/noder.go ================================================ // Package noder provide an interface for defining nodes in a // merkletrie, their hashes and their paths (a noders and its // ancestors). // // The hasher interface is easy to implement naively by elements that // already have a hash, like git blobs and trees. More sophisticated // implementations can implement the Equal function in exotic ways // though: for instance, comparing the modification time of directories // in a filesystem. package noder import ( "context" "fmt" "time" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" ) // Hasher interface is implemented by types that can tell you // their hash. type Hasher interface { Hash() []byte } type OriginHasher interface { HashRaw() plumbing.Hash } type Comparators interface { Mode() filemode.FileMode ModifiedAt() time.Time } // Equal functions take two hashers and return if they are equal. // // These functions are expected to be faster than reflect.Equal or // reflect.DeepEqual because they can compare just the hash of the // objects, instead of their contents, so they are expected to be O(1). type Equal func(a, b Hasher) bool // The Noder interface is implemented by the elements of a Merkle Trie. // // There are two types of elements in a Merkle Trie: // // - file-like nodes: they cannot have children. // // - directory-like nodes: they can have 0 or more children and their // hash is calculated by combining their children hashes. type Noder interface { Hasher fmt.Stringer // for testing purposes // Name returns the name of an element (relative, not its full // path). Name() string // IsDir returns true if the element is a directory-like node or // false if it is a file-like node. IsDir() bool // Children returns the children of the element. Note that empty // directory-like noders and file-like noders will both return // NoChildren. Children(ctx context.Context) ([]Noder, error) // NumChildren returns the number of children this element has. // // This method is an optimization: the number of children is easily // calculated as the length of the value returned by the Children // method (above); yet, some implementations will be able to // implement NumChildren in O(1) while Children is usually more // complex. NumChildren(ctx context.Context) (int, error) Skip() bool } // NoChildren represents the children of a noder without children. var NoChildren = []Noder{} ================================================ FILE: modules/merkletrie/noder/path.go ================================================ package noder import ( "bytes" "context" "strings" ) // Path values represent a noder and its ancestors. The root goes first // and the actual final noder the path is referring to will be the last. // // A path implements the Noder interface, redirecting all the interface // calls to its final noder. // // Paths build from an empty Noder slice are not valid paths and should // not be used. type Path []Noder func (p Path) Skip() bool { if len(p) > 0 { return p.Last().Skip() } return false } // String returns the full path of the final noder as a string, using // "/" as the separator. func (p Path) String() string { var buf bytes.Buffer sep := "" for _, e := range p { _, _ = buf.WriteString(sep) sep = "/" _, _ = buf.WriteString(e.Name()) } return buf.String() } // Last returns the final noder in the path. func (p Path) Last() Noder { return p[len(p)-1] } // Hash returns the hash of the final noder of the path. func (p Path) Hash() []byte { return p.Last().Hash() } // Name returns the name of the final noder of the path. func (p Path) Name() string { return p.Last().Name() } // IsDir returns if the final noder of the path is a directory-like // noder. func (p Path) IsDir() bool { return p.Last().IsDir() } // Children returns the children of the final noder in the path. func (p Path) Children(ctx context.Context) ([]Noder, error) { return p.Last().Children(ctx) } // NumChildren returns the number of children the final noder of the // path has. func (p Path) NumChildren(ctx context.Context) (int, error) { return p.Last().NumChildren(ctx) } // Compare returns -1, 0 or 1 if the path p is smaller, equal or bigger // than other, in "directory order"; for example: // // "a" < "b" // "a/b/c/d/z" < "b" // "a/b/a" > "a/b" func (p Path) Compare(other Path) int { i := 0 for { switch { case len(other) == len(p) && i == len(p): return 0 case i == len(other): return 1 case i == len(p): return -1 default: // We do *not* normalize Unicode here. CGit doesn't. // https://github.com/src-d/go-git/issues/1057 cmp := strings.Compare(p[i].Name(), other[i].Name()) if cmp != 0 { return cmp } } i++ } } ================================================ FILE: modules/merkletrie/noder/sparse.go ================================================ package noder import ( "path" "strings" "github.com/antgroup/hugescm/modules/strengthen" ) type Matcher interface { Len() int Match(name string) (Matcher, bool) } type sparseTreeMatcher struct { entries map[string]*sparseTreeMatcher } func (m *sparseTreeMatcher) Len() int { return len(m.entries) } func (m *sparseTreeMatcher) Match(name string) (Matcher, bool) { sm, ok := m.entries[name] return sm, ok } func (m *sparseTreeMatcher) insert(p string) { dv := strengthen.StrSplitSkipEmpty(p, '/', 10) current := m for _, d := range dv { e, ok := current.entries[d] if !ok { e = &sparseTreeMatcher{entries: make(map[string]*sparseTreeMatcher)} current.entries[d] = e } current = e } } func NewSparseTreeMatcher(dirs []string) Matcher { root := &sparseTreeMatcher{entries: make(map[string]*sparseTreeMatcher)} for _, d := range dirs { root.insert(d) } return root } type SparseMatcher interface { Match(string) bool } type sparseMatcher struct { sparseEntries []string } const ( dot = "." ) // isSparseMatch: sparse match dir // eg: // // sparseDir: foo/bar // parent: foo/bar/abc --> match // parent: foo/abc --> not match // parent: foo --> match func isSparseMatch(sparseDir, parent string) bool { parent += "/" return strings.HasPrefix(parent, sparseDir) || strings.HasPrefix(sparseDir, parent) } func (m *sparseMatcher) Match(name string) bool { if len(m.sparseEntries) == 0 { return true } parent := path.Dir(name) if parent == dot { return true } for _, sparseDir := range m.sparseEntries { if isSparseMatch(sparseDir, parent) { return true } } return false } func NewSparseMatcher(dirs []string) SparseMatcher { entries := make([]string, 0, len(dirs)) for _, d := range dirs { p := path.Clean(d) if p == dot { continue } entries = append(entries, p+"/") } return &sparseMatcher{sparseEntries: entries} } ================================================ FILE: modules/merkletrie/noder/sparse_test.go ================================================ package noder import ( "fmt" "os" "path" "testing" ) func TestNewSparseTreeMatcher(t *testing.T) { tt := NewSparseTreeMatcher([]string{"dir3", "dir4/abc", "abcd/efgh/mnopq"}) fmt.Fprintf(os.Stderr, "%d\n", tt.Len()) } func TestPathDir(t *testing.T) { dirs := []string{ "a.txt", "abc/abc.txt", } for _, d := range dirs { fmt.Fprintf(os.Stderr, "%s\n", path.Dir(d)) } } func TestSparseMatcher(t *testing.T) { ss := []string{".aci.yml", ".dailyCheck.aci.yml", ".dailyTest.aci.yml", ".gitignore", ".ignore_pr.yml", "sigma/appops/OWNERS", "sigma/appops/intelligent_engine/abc.txt", "sigma/appops/intelligent_engine/business_intelligence-recommendation_engine/tapeargo/OWNERS", "sigma/appops/intelligent_engine/business_intelligence-recommendation_engine/tapeargo/README.md", "sigma/appops/intelligent_engine/business_intelligence-recommendation_engine/tapeargo/base/base.k", "sigma/appops/jackson/business_intelligence-recommendation_engine/tapeargo/OWNERS", "sigma/appops/jackson/business_intelligence-recommendation_engine/tapeargo/README.md", "sigma/appops/jackson/business_intelligence-recommendation_engine/tapeargo/base/base.k", "docs/dev.md", } m := NewSparseMatcher([]string{"sigma/appops/intelligent_engine"}) for _, s := range ss { fmt.Fprintf(os.Stderr, "Matched: %v %s\n", m.Match(s), s) } } ================================================ FILE: modules/mime/LICENSE ================================================ MIT License Copyright (c) 2018-2020 Gabriel Vasile Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/mime/README.md ================================================ # MIME Port from [https://github.com/gabriel-vasile/mimetype](https://github.com/gabriel-vasile/mimetype) 主要改进:浏览器安全。 ================================================ FILE: modules/mime/VERSION ================================================ https://github.com/gabriel-vasile/mimetype 59c8d109cb663c6ebe9f46ee1f97a1a825eeb5dd # misc: add SECURITY.md file ================================================ FILE: modules/mime/internal/charset/charset.go ================================================ package charset import ( "bytes" "strings" "unicode/utf8" "github.com/antgroup/hugescm/modules/chardet" "github.com/antgroup/hugescm/modules/mime/internal/markup" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) const ( F = 0 /* character never appears in text */ T = 1 /* character appears in plain ASCII text */ I = 2 /* character appears in ISO-8859 text */ X = 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ ) var ( boms = []struct { bom []byte enc string }{ {[]byte{0xEF, 0xBB, 0xBF}, "utf-8"}, {[]byte{0x00, 0x00, 0xFE, 0xFF}, "utf-32be"}, {[]byte{0xFF, 0xFE, 0x00, 0x00}, "utf-32le"}, {[]byte{0xFE, 0xFF}, "utf-16be"}, {[]byte{0xFF, 0xFE}, "utf-16le"}, } // https://github.com/file/file/blob/fa93fb9f7d21935f1c7644c47d2975d31f12b812/src/encoding.c#L241 textChars = [256]byte{ /* BEL BS HT LF VT FF CR */ F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */ /* ESC */ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ /* NEL */ X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xfX */ } ) // FromBOM returns the charset declared in the BOM of content. func FromBOM(content []byte) string { for _, b := range boms { if bytes.HasPrefix(content, b.bom) { return b.enc } } return "" } var ( defaultDetector = chardet.NewTextDetector() ) // FromPlain returns the charset of a plain text. It relies on BOM presence // and it falls back on checking each byte in content. func FromPlain(content []byte) string { if len(content) == 0 { return "" } if cset := FromBOM(content); cset != "" { return cset } origContent := content // Try to detect UTF-8. // First eliminate any partial rune at the end. for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- { b := content[i] if b < 0x80 { break } if utf8.RuneStart(b) { content = content[:i] break } } hasHighBit := false for _, c := range content { if c >= 0x80 { hasHighBit = true break } } if hasHighBit && utf8.Valid(content) { return "utf-8" } // ASCII is a subset of UTF8. Follow W3C recommendation and replace with UTF8. if ascii(origContent) { return "utf-8" } // Fallback use chardet if r, err := defaultDetector.DetectBest(origContent); err == nil { return r.Charset } return latin(origContent) } func latin(content []byte) string { hasControlBytes := false for _, b := range content { t := textChars[b] if t != T && t != I { return "" } if b >= 0x80 && b <= 0x9F { hasControlBytes = true } } // Code range 0x80 to 0x9F is reserved for control characters in ISO-8859-1 // (so-called C1 Controls). Windows 1252, however, has printable punctuation // characters in this range. if hasControlBytes { return "windows-1252" } return "iso-8859-1" } func ascii(content []byte) bool { for _, b := range content { if textChars[b] != T { return false } } return true } // FromXML returns the charset of an XML document. It relies on the XML // header and falls back on the plain // text content. func FromXML(content []byte) string { if cset := fromXML(content); cset != "" { return cset } return FromPlain(content) } func fromXML(s scan.Bytes) string { xml := []byte(" and falls back on the // plain text content. func FromHTML(content []byte) string { if cset := FromBOM(content); cset != "" { return cset } if cset := fromHTML(content); cset != "" { return cset } return FromPlain(content) } func fromHTML(s scan.Bytes) string { const ( dontKnow = iota doNeedPragma doNotNeedPragma ) meta := []byte(" ", "", }, { "`, "", }, { ``, "", }, { ``, "iso-8859-15", }, { ``, "щ", }, { ``, "щ", }} func TestFromHTML(t *testing.T) { for _, tc := range fromHTMLTestCases { t.Run(tc.in, func(t *testing.T) { got := fromHTML([]byte(tc.in)) if got != tc.out { t.Errorf("got: %s, want: %s", got, tc.out) } }) } } func FuzzFromHTML(f *testing.F) { for _, tc := range fromHTMLTestCases { f.Add([]byte(tc.in)) } f.Fuzz(func(t *testing.T, d []byte) { fromHTML(d) }) } var fromXMLTestCases = []struct { in string out string }{{ "", "", }, { " not `, "c", }, { ``, "c", }, { ` `, "c", }} func TestFromXML(t *testing.T) { for _, tc := range fromXMLTestCases { t.Run(tc.in, func(t *testing.T) { got := fromXML([]byte(tc.in)) if got != tc.out { t.Errorf("got: %s, want: %s", got, tc.out) } }) } } func FuzzFromXML(f *testing.F) { for _, s := range fromXMLTestCases { f.Add([]byte(s.in)) } f.Fuzz(func(t *testing.T, d []byte) { if charset := FromXML(d); charset == "" { t.Skip() } }) } func TestFromPlain(t *testing.T) { tcases := []struct { raw []byte charset string }{ {[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85}, "windows-1252"}, {[]byte{0xe6, 0xf8, 0xe5}, "iso-8859-1"}, {[]byte("æøå"), "utf-8"}, {[]byte{}, ""}, } for _, tc := range tcases { if cs := FromPlain(tc.raw); cs != tc.charset { t.Errorf("in: %v; expected: %s; got: %s", tc.raw, tc.charset, cs) } } } func FuzzFromPlain(f *testing.F) { samples := [][]byte{ {0xe6, 0xf8, 0xe5, 0x85, 0x85}, {0xe6, 0xf8, 0xe5}, []byte("æøå"), } for _, s := range samples { f.Add(s) } f.Fuzz(func(t *testing.T, d []byte) { if charset := FromPlain(d); charset == "" { t.Skip() } }) } const xmlDoc = ` Tove Jani Reminder Don't forget me this weekend! ` const htmlDoc = ` ` func BenchmarkFromHTML(b *testing.B) { b.ReportAllocs() doc := []byte(htmlDoc) for b.Loop() { FromHTML(doc) } } func BenchmarkFromXML(b *testing.B) { b.ReportAllocs() doc := []byte(xmlDoc) for b.Loop() { FromXML(doc) } } func BenchmarkFromPlain(b *testing.B) { b.ReportAllocs() doc := []byte(xmlDoc) for b.Loop() { FromPlain(doc) } } ================================================ FILE: modules/mime/internal/csv/parser.go ================================================ package csv import ( "bytes" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // Parser is a CSV reader that only counts fields. // It avoids allocating/copying memory and to verify behaviour, it is tested // and fuzzed against encoding/csv parser. type Parser struct { comma byte comment byte s scan.Bytes } func NewParser(comma, comment byte, s scan.Bytes) *Parser { return &Parser{ comma: comma, comment: comment, s: s, } } func (r *Parser) readLine() (line []byte, cutShort bool) { line = r.s.ReadSlice('\n') n := len(line) if n > 0 && line[n-1] == '\r' { return line[:n-1], false // drop \r at end of line } // This line is problematic. The logic from CountFields comes from // encoding/csv.Reader which relies on mutating the input bytes. // https://github.com/golang/go/blob/b3251514531123d7fd007682389bce7428d159a0/src/encoding/csv/reader.go#L275-L279 // To avoid mutating the input, we return cutShort. #680 if n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' { return line[:n-2], true } return line, false } // CountFields reads one CSV line and counts how many records that line contained. // hasMore reports whether there are more lines in the input. // collectIndexes makes CountFields return a list of indexes where CSV fields // start in the line. These indexes are used to test the correctness against the // encoding/csv parser. func (r *Parser) CountFields(collectIndexes bool) (fields int, fieldPos []int, hasMore bool) { finished := false var line scan.Bytes var cutShort bool for { line, cutShort = r.readLine() if finished { return 0, nil, false } finished = len(r.s) == 0 && len(line) == 0 if len(line) == lengthNL(line) { line = nil continue // Skip empty lines. } if len(line) > 0 && line[0] == r.comment { line = nil continue } break } indexes := []int{} originalLine := line parseField: for { if len(line) == 0 || line[0] != '"' { // non-quoted string field fields++ if collectIndexes { indexes = append(indexes, len(originalLine)-len(line)) } i := bytes.IndexByte(line, r.comma) if i >= 0 { line.Advance(i + 1) // 1 to get over ending comma continue parseField } break parseField } else { // Quoted string field. if collectIndexes { indexes = append(indexes, len(originalLine)-len(line)) } line.Advance(1) // get over starting quote for { i := bytes.IndexByte(line, '"') if i >= 0 { line.Advance(i + 1) // 1 for ending quote switch rn := line.Peek(); { case rn == '"': line.Advance(1) case rn == r.comma: line.Advance(1) fields++ continue parseField case lengthNL(line) == len(line): fields++ break parseField } } else if len(line) > 0 || cutShort { line, cutShort = r.readLine() originalLine = line } else { fields++ break parseField } } } } return fields, indexes, fields != 0 } // lengthNL reports the number of bytes for the trailing \n. func lengthNL(b []byte) int { if len(b) > 0 && b[len(b)-1] == '\n' { return 1 } return 0 } ================================================ FILE: modules/mime/internal/csv/parser_test.go ================================================ package csv import ( "encoding/csv" "fmt" "io" "reflect" "strings" "testing" "unicode" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) type line struct { fields int // indexes[i] says at which index in the line the i-th field starts at. indexes []int hasMore bool } var testcases = []struct { name string csv string comma byte comment byte }{{ "empty", "", ',', '#', }, { "simple", `foo,bar,baz 1,2,3 "1","a",b`, ',', '#', }, { "crlf line endings", "foo,bar,baz\r\n1,2,3\r\n", ',', '#', }, { "leading and trailing space", `1, abc ,3`, ',', '#', }, { "empty quote", `1,"",3`, ',', '#', }, { "quotes with comma", `1,",",3`, ',', '#', }, { "quotes with quote", `1,""",3`, ',', '#', }, { "fewer fields", `foo,bar,baz 1,2`, ',', '#', }, { "more fields", `1,2,3,4`, ',', '#', }, { "forgot quote", `1,"Forgot,3`, ',', '#', }, { "unescaped quote", `1,"abc"def",3`, ',', '#', }, { "unescaped quote", `1,"abc"def",3`, ',', '#', }, { "unescaped quote2", `1,abc"quote"def,3`, ',', '#', }, { "escaped quote", `1,abc""def,3`, ',', '#', }, { "new line", `1,abc def,3`, ',', '#', }, { "new line quotes", `1,"abc def",3`, ',', '#', }, { "quoted field at end", `1,"abc"`, ',', '#', }, { "not ended quoted field at end", `1,"abc`, ',', '#', }, { "empty field", `1,,3`, ',', '#', }, { "unicode fields", `💁,👌,🎍,😍`, ',', '#', }, { "comment", `#comment`, ',', '#', }, { "line with \\r at the end", "123\r\n456\r", ',', '#', }, { `from fuzz \"\"\r\n0`, "\"\"\r\n0", ',', '\x11', }} // Test our parser against the one from encoding/csv. func TestParser(t *testing.T) { for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { expected, recs, _ := stdlibLines(tc.csv, tc.comma, tc.comment) got := ourLines(tc.csv, tc.comma, tc.comment) if !reflect.DeepEqual(expected, got) { t.Errorf(`%s expected: %v got: %v records: %v`, tc.csv, expected, got, recs) } }) } } func ourLines(data string, comma, comment byte) []line { p := NewParser(comma, comment, scan.Bytes(data)) lines := []line{} for { fields, indexes, hasMore := p.CountFields(true) if !hasMore { break } lines = append(lines, line{fields, indexes, hasMore}) } return lines } // stdlibLines returns the []line records obtained using the stdlib CSV parser. func stdlibLines(data string, comma, comment byte) ([]line, [][]string, error) { if comma > unicode.MaxASCII || comment > unicode.MaxASCII { return nil, nil, fmt.Errorf("comma or comment not ASCII") } if strings.IndexByte(data, 0) != -1 { return nil, nil, fmt.Errorf("CSV contains null byte 0x00") } r := csv.NewReader(strings.NewReader(data)) r.Comma = rune(comma) r.ReuseRecord = true r.FieldsPerRecord = -1 // we don't care about lines having same number of fields r.LazyQuotes = true r.Comment = rune(comment) var err error lines := []line{} // To ease debugging, we keep records to print in tests. records := [][]string{} for { l, err := r.Read() if err == io.EOF { break } if err != nil { return nil, nil, err } indexes := []int{} for i := range l { _, c := r.FieldPos(i) // FieldPos starts counting from 1, but our parser counts from 0. // Adjust -1 so tests match. indexes = append(indexes, c-1) } lines = append(lines, line{len(l), indexes, err != io.EOF}) records = append(records, l) } return lines, records, err } var sample = ` 1,2,3 "a", "b", "c" a,b,c` + "\r\n1,2,3\r\na,b,c\r" func BenchmarkCSVStdlibDecoder(b *testing.B) { b.ReportAllocs() // Reuse a single reader to prevent allocs inside the benchmark function. r := strings.NewReader(sample) for b.Loop() { _, err := r.Seek(0, 0) if err != nil { b.Fatalf("reader cannot seek: %s", err) } d := csv.NewReader(r) d.ReuseRecord = true d.FieldsPerRecord = -1 // we don't care about lines having same number of fields d.LazyQuotes = true for { _, err := d.Read() if err == io.EOF { break } else if err != nil { b.Fatalf("error parsing CSV: %s", err) } } } } func BenchmarkCSVOurParser(b *testing.B) { b.ReportAllocs() // Reuse a single reader to prevent allocs inside the benchmark function. r := scan.Bytes(sample) p := NewParser(',', '#', r) for b.Loop() { p.s = r for { _, _, hasMore := p.CountFields(false) if !hasMore { break } } } } func FuzzParser(f *testing.F) { for _, p := range testcases { f.Add(p.csv, byte(','), byte('#')) } f.Fuzz(func(t *testing.T, data string, comma, comment byte) { expected, _, err := stdlibLines(data, comma, comment) // The sddlib CSV parser can accept UTF8 runes for comma and comment. // Our parser does not need that functionality, so it returns different // results for UTF8 inputs. Skip fuzzing when the generated data is UTF8. if err != nil { t.Skipf("not testable: %v", err) } got := ourLines(data, comma, comment) if !reflect.DeepEqual(got, expected) { t.Logf("input: %v, comma: %c, comment: %c", data, comma, comment) t.Errorf(` expected: %v, got: %v`, expected, got) } }) } ================================================ FILE: modules/mime/internal/json/parser.go ================================================ package json import ( "bytes" "sync" ) const ( QueryNone = "json" QueryGeo = "geo" QueryHAR = "har" QueryGLTF = "gltf" QueryCDX = "cdx" maxRecursion = 4096 ) var queries = map[string][]query{ QueryNone: nil, QueryGeo: {{ SearchPath: [][]byte{[]byte("type")}, SearchVals: [][]byte{ []byte(`"Feature"`), []byte(`"FeatureCollection"`), []byte(`"Point"`), []byte(`"LineString"`), []byte(`"Polygon"`), []byte(`"MultiPoint"`), []byte(`"MultiLineString"`), []byte(`"MultiPolygon"`), []byte(`"GeometryCollection"`), }, }}, QueryHAR: {{ SearchPath: [][]byte{[]byte("log"), []byte("version")}, }, { SearchPath: [][]byte{[]byte("log"), []byte("creator")}, }, { SearchPath: [][]byte{[]byte("log"), []byte("entries")}, }}, QueryGLTF: {{ SearchPath: [][]byte{[]byte("asset"), []byte("version")}, SearchVals: [][]byte{[]byte(`"1.0"`), []byte(`"2.0"`)}, }}, QueryCDX: {{ SearchPath: [][]byte{[]byte("bomFormat")}, SearchVals: [][]byte{[]byte(`"CycloneDX"`)}, }}, } var parserPool = sync.Pool{ New: func() any { return &parserState{maxRecursion: maxRecursion} }, } // parserState holds the state of JSON parsing. The number of inspected bytes, // the current path inside the JSON object, etc. type parserState struct { // ib represents the number of inspected bytes. // Because mimetype limits itself to only reading the header of the file, // it means sometimes the input JSON can be truncated. In that case, we want // to still detect it as JSON, even if it's invalid/truncated. // When ib == len(input) it means the JSON was valid (at least the header). ib int maxRecursion int // currPath keeps a track of the JSON keys parsed up. // It works only for JSON objects. JSON arrays are ignored // mainly because the functionality is not needed. currPath [][]byte // firstToken stores the first JSON token encountered in input. firstToken int // querySatisfied is true if both path and value of any queries passed to // consumeAny are satisfied. querySatisfied bool } // query holds information about a combination of {"key": "val"} that we're trying // to search for inside the JSON. type query struct { // SearchPath represents the whole path to look for inside the JSON. // ex: [][]byte{[]byte("foo"), []byte("bar")} matches {"foo": {"bar": "baz"}} SearchPath [][]byte // SearchVals represents values to look for when the SearchPath is found. // Each SearchVal element is tried until one of them matches (logical OR.) SearchVals [][]byte } func eq(path1, path2 [][]byte) bool { if len(path1) != len(path2) { return false } for i := range path1 { if !bytes.Equal(path1[i], path2[i]) { return false } } return true } // Parse will take out a parser from the pool depending on queryType and tries // to parse raw bytes as JSON. func Parse(queryType string, raw []byte) (parsed, inspected, firstToken int, querySatisfied bool) { p := parserPool.Get().(*parserState) defer func() { // Avoid hanging on to too much memory in extreme input cases. if len(p.currPath) > 128 { p.currPath = nil } parserPool.Put(p) }() p.reset() qs := queries[queryType] got := p.consumeAny(raw, qs, 0) return got, p.ib, p.firstToken, p.querySatisfied } func (p *parserState) reset() { p.ib = 0 p.currPath = p.currPath[0:0] p.firstToken = TokInvalid p.querySatisfied = false } func (p *parserState) consumeSpace(b []byte) (n int) { for len(b) > 0 && isSpace(b[0]) { b = b[1:] n++ p.ib++ } return n } func (p *parserState) consumeConst(b, cnst []byte) int { lb := len(b) for i, c := range cnst { if lb > i && b[i] == c { p.ib++ } else { return 0 } } return len(cnst) } func (p *parserState) consumeString(b []byte) (n int) { var c byte for len(b[n:]) > 0 { c, n = b[n], n+1 p.ib++ switch c { case '\\': if len(b[n:]) == 0 { return 0 } switch b[n] { case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': n++ p.ib++ continue case 'u': n++ p.ib++ for j := 0; j < 4 && len(b[n:]) > 0; j++ { if !isXDigit(b[n]) { return 0 } n++ p.ib++ } continue default: return 0 } case '"': return n default: continue } } return 0 } func (p *parserState) consumeNumber(b []byte) (n int) { got := false var i int if len(b) == 0 { goto out } if b[0] == '-' { b, i = b[1:], i+1 p.ib++ } for len(b) > 0 { if !isDigit(b[0]) { break } got = true b, i = b[1:], i+1 p.ib++ } if len(b) == 0 { goto out } if b[0] == '.' { b, i = b[1:], i+1 p.ib++ } for len(b) > 0 { if !isDigit(b[0]) { break } got = true b, i = b[1:], i+1 p.ib++ } if len(b) == 0 { goto out } if got && (b[0] == 'e' || b[0] == 'E') { b, i = b[1:], i+1 p.ib++ got = false if len(b) == 0 { goto out } if b[0] == '+' || b[0] == '-' { b, i = b[1:], i+1 p.ib++ } for len(b) > 0 { if !isDigit(b[0]) { break } got = true b, i = b[1:], i+1 p.ib++ } } out: if got { return i } return 0 } // openArray is used instead of an inline []byte{'['} to avoid mem alllocs. var openArray = []byte{'['} func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) { p.appendPath(openArray, qs) if len(b) == 0 { return 0 } for n < len(b) { n += p.consumeSpace(b[n:]) if len(b[n:]) == 0 { return 0 } if b[n] == ']' { p.ib++ p.popLastPath(qs) return n + 1 } innerParsed := p.consumeAny(b[n:], qs, lvl) if innerParsed == 0 { return 0 } n += innerParsed if len(b[n:]) == 0 { return 0 } switch b[n] { case ',': n += 1 p.ib++ continue case ']': p.ib++ return n + 1 default: return 0 } } return 0 } func queryPathMatch(qs []query, path [][]byte) int { for i := range qs { if eq(qs[i].SearchPath, path) { return i } } return -1 } // appendPath will append a path fragment if queries is not empty. // If we don't need query functionality (just checking if a JSON is valid), // then we can skip keeping track of the path we're currently in. func (p *parserState) appendPath(path []byte, qs []query) { if len(qs) != 0 { p.currPath = append(p.currPath, path) } } func (p *parserState) popLastPath(qs []query) { if len(qs) != 0 { p.currPath = p.currPath[:len(p.currPath)-1] } } func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) { for n < len(b) { n += p.consumeSpace(b[n:]) if len(b[n:]) == 0 { return 0 } if b[n] == '}' { p.ib++ return n + 1 } if b[n] != '"' { return 0 } else { n += 1 p.ib++ } // queryMatched stores the index of the query satisfying the current path. queryMatched := -1 if keyLen := p.consumeString(b[n:]); keyLen == 0 { return 0 } else { p.appendPath(b[n:n+keyLen-1], qs) if !p.querySatisfied { queryMatched = queryPathMatch(qs, p.currPath) } n += keyLen } n += p.consumeSpace(b[n:]) if len(b[n:]) == 0 { return 0 } if b[n] != ':' { return 0 } else { n += 1 p.ib++ } n += p.consumeSpace(b[n:]) if len(b[n:]) == 0 { return 0 } if valLen := p.consumeAny(b[n:], qs, lvl); valLen == 0 { return 0 } else { if queryMatched != -1 { q := qs[queryMatched] if len(q.SearchVals) == 0 { p.querySatisfied = true } for _, val := range q.SearchVals { if bytes.Equal(val, bytes.TrimSpace(b[n:n+valLen])) { p.querySatisfied = true } } } n += valLen } if len(b[n:]) == 0 { return 0 } switch b[n] { case ',': p.popLastPath(qs) n++ p.ib++ continue case '}': p.popLastPath(qs) p.ib++ return n + 1 default: return 0 } } return 0 } func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) { // Avoid too much recursion. if p.maxRecursion != 0 && lvl > p.maxRecursion { return 0 } if len(qs) == 0 { p.querySatisfied = true } n += p.consumeSpace(b) if len(b[n:]) == 0 { return 0 } var t, rv int switch b[n] { case '"': n++ p.ib++ rv = p.consumeString(b[n:]) t = TokString case '[': n++ p.ib++ rv = p.consumeArray(b[n:], qs, lvl+1) t = TokArray case '{': n++ p.ib++ rv = p.consumeObject(b[n:], qs, lvl+1) t = TokObject case 't': rv = p.consumeConst(b[n:], []byte("true")) t = TokTrue case 'f': rv = p.consumeConst(b[n:], []byte("false")) t = TokFalse case 'n': rv = p.consumeConst(b[n:], []byte("null")) t = TokNull default: rv = p.consumeNumber(b[n:]) t = TokNumber } if lvl == 0 { p.firstToken = t } if rv <= 0 { return n } n += rv n += p.consumeSpace(b[n:]) return n } func isSpace(c byte) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' } func isDigit(c byte) bool { return '0' <= c && c <= '9' } func isXDigit(c byte) bool { if isDigit(c) { return true } return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') } const ( TokInvalid = 0 TokNull = 1 << iota TokTrue TokFalse TokNumber TokString TokArray TokObject TokComma ) ================================================ FILE: modules/mime/internal/json/parser_test.go ================================================ package json import ( "bytes" "encoding/json" "strings" "testing" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // These samples come from https://github.com/nst/JSONTestSuite. var positives = []struct { json string stdlib bool }{ {`[[] ]`, true}, {`[]`, true}, {`[""]`, true}, {`["a"]`, true}, {`[false]`, true}, {`[null, 1, "1", {}]`, true}, {`[null]`, true}, {`[1 ]`, true}, {` [1]`, true}, {`[1,null,null,null,2]`, true}, {`[2] `, true}, {`[0e+1]`, true}, {`[0e1]`, true}, {`[ 4]`, true}, {`[-0.000000000000000000000000000000000000000000000000000000000000000000000000000001] `, true}, {`[20e1]`, true}, {`[123e65]`, true}, {`[-0]`, true}, {`[-123]`, true}, {`[-1]`, true}, {`[-0]`, true}, {`[1E22]`, true}, {`[1E-2]`, true}, {`[1E+2]`, true}, {`[123e45]`, true}, {`[123.456e78]`, true}, {`[1e-2]`, true}, {`[1e+2]`, true}, {`[123]`, true}, {`[123.456789]`, true}, {`{"asd":"sdf"}`, true}, {`{"a":"b","a":"b"}`, true}, {`{"a":"b","a":"c"}`, true}, {`{}`, true}, {`{"":0}`, true}, {`{"foo\u0000bar": 42}`, true}, {`{ "min": -1.0e+28, "max": 1.0e+28 }`, true}, {`{"asd":"sdf", "dfg":"fgh"}`, true}, {`{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}`, true}, {`{"a":[]}`, true}, {`{"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" }`, true}, {`{ "a": "b" }`, true}, {`["\u0060\u012a\u12AB"]`, true}, {`["\uD801\udc37"]`, true}, {`["\ud83d\ude39\ud83d\udc8d"]`, true}, {`["\"\\\/\b\f\n\r\t"]`, true}, {`["\\u0000"]`, true}, {`["\""]`, true}, {`["a/*b*/c/*d//e"]`, true}, {`["\\a"]`, true}, {`["\\n"]`, true}, {`["\u0012"]`, true}, {`["\uFFFF"]`, true}, {`["asd"]`, true}, {`[ "asd"]`, true}, {`["\uDBFF\uDFFF"]`, true}, {`["new\u00A0line"]`, true}, {`["􏿿"]`, true}, {`["￿"]`, true}, {`["\u0000"]`, true}, {`["\u002c"]`, true}, {`["π"]`, true}, {`["𛿿"]`, true}, {`["asd "]`, true}, {`" "`, true}, {`["\uD834\uDd1e"]`, true}, {`["\u0821"]`, true}, {`["\u0123"]`, true}, {`["
"]`, true}, {`["
"]`, true}, {`["new\u000Aline"]`, true}, {`["\u0061\u30af\u30EA\u30b9"]`, true}, {`[""]`, true}, {`["⍂㈴⍂"]`, true}, {`["\u005C"]`, true}, {`["\u0022"]`, true}, {`["\uA66D"]`, true}, {`["\uDBFF\uDFFE"]`, true}, {`["\uD83F\uDFFE"]`, true}, {`["\u200B"]`, true}, {`["\u2064"]`, true}, {`["\uFDD0"]`, true}, {`["\uFFFE"]`, true}, {`["€𝄞"]`, true}, {`["aa"]`, true}, {`false`, true}, {`42`, true}, {`-0.1`, true}, {`null`, true}, {`"asd"`, true}, {`true`, true}, {`""`, true}, {`["a"] `, true}, {`[true]`, true}, {` [] `, true}, // Bug: following samples are invalid JSONs but they are parsed successfully. {` `, false}, {`["",]`, false}, {`[1,]`, false}, {`[-01]`, false}, {`[-2.]`, false}, {`[.2e-3]`, false}, {`[0.e1]`, false}, {`[2.e+3]`, false}, {`[2.e-3]`, false}, {`[2.e3]`, false}, {`[-012]`, false}, {`[-.123]`, false}, {`[1.]`, false}, {`[.123]`, false}, {`[012]`, false}, {`{"�":"0",}`, false}, {`{"id":0,}`, false}, {`"`, false}, {`["new line"]`, false}, {`[" "]`, false}, {`[`, false}, {`[[`, false}, {`{`, false}, } var negatives = []struct { name string json string expectParse int expectInspect int }{ {"array_1_true_without_comma", `[1 true]`, 1, 3}, {"array_a_invalid_utf8", `[a�]`, 1, 1}, {"array_colon_instead_of_comma", `["": 1]`, 1, 3}, {"array_comma_after_close", `[""],`, 4, 4}, {"array_comma_and_number", `[,1]`, 1, 1}, {"array_double_comma", `[1,,2]`, 1, 3}, {"array_double_extra_comma", `["x",,]`, 1, 5}, {"array_extra_close", `["x"]]`, 5, 5}, {"array_incomplete_invalid_value", `[x`, 1, 1}, {"array_incomplete", `["x"`, 1, 4}, {"array_inner_array_no_comma", `[3[4]]`, 1, 2}, {"array_invalid_utf8", `[�]`, 1, 1}, {"array_items_separated_by_semicolon", `[1:2]`, 1, 2}, {"array_just_comma", `[,]`, 1, 1}, {"array_just_minus", `[-]`, 1, 2}, {"array_missing_value", `[ , ""]`, 1, 4}, {"array_newlines_unclosed", `["a", 4 ,1,`, 1, 11}, {"array_number_and_several_commas", `[1,,]`, 1, 3}, {"array_spaces_vertical_tab_formfeed", "\x5b\x22\x0b\x61\x22\x5c\x66\x5d", 1, 5}, {"array_star_inside", `[*]`, 1, 1}, {"array_unclosed", `[""`, 1, 3}, {"array_unclosed_trailing_comma", `[1,`, 1, 3}, {"array_unclosed_with_new_lines", "\x5b\x31\x2c\x0a\x31\x0a\x2c\x31", 1, 8}, {"array_unclosed_with_object_inside", `[{}`, 1, 3}, {"incomplete_false", `[fals]`, 1, 5}, {"incomplete_null", `[nul]`, 1, 4}, {"incomplete_true", `[tru]`, 1, 4}, {"multidigit_number_then_00", "\x31\x32\x33\x00", 3, 3}, {"number_0.1.2", `[0.1.2]`, 1, 4}, {"number_0.3e+", `[0.3e+]`, 1, 6}, {"number_0.3e", `[0.3e]`, 1, 5}, {"number_0_capital_E+", `[0E+]`, 1, 4}, {"number_0_capital_E", `[0E]`, 1, 3}, {"number_0e+", `[0e+]`, 1, 4}, {"number_0e", `[0e]`, 1, 3}, {"number_1_000", `[1 000.0]`, 1, 3}, {"number_1.0e+", `[1.0e+]`, 1, 6}, {"number_1.0e-", `[1.0e-]`, 1, 6}, {"number_1.0e", `[1.0e]`, 1, 5}, {"number_-1.0.", `[-1.0.]`, 1, 5}, {"number_1eE2", `[1eE2]`, 1, 3}, {"number_+1", `[+1]`, 1, 1}, {"number_.-1", `[.-1]`, 1, 2}, {"number_9.e+", `[9.e+]`, 1, 5}, {"number_expression", `[1+2]`, 1, 2}, {"number_hex_1_digit", `[0x1]`, 1, 2}, {"number_hex_2_digits", `[0x42]`, 1, 2}, {"number_infinity", `[Infinity]`, 1, 1}, {"number_+Inf", `[+Inf]`, 1, 1}, {"number_Inf", `[Inf]`, 1, 1}, {"number_invalid+-", `[0e+-1]`, 1, 4}, {"number_invalid-negative-real", `[-123.123foo]`, 1, 9}, {"number_invalid-utf-8-in-bigger-int", `[123�]`, 1, 4}, {"number_invalid-utf-8-in-exponent", `[1e1�]`, 1, 4}, {"number_invalid-utf-8-in-int", "\x5b\x30\xe5\x5d\x0a", 1, 2}, {"number_++", `[++1234]`, 1, 1}, {"number_minus_infinity", `[-Infinity]`, 1, 2}, {"number_minus_sign_with_trailing_garbage", `[-foo]`, 1, 2}, {"number_minus_space_1", `[- 1]`, 1, 2}, {"number_-NaN", `[-NaN]`, 1, 2}, {"number_NaN", `[NaN]`, 1, 1}, {"number_neg_with_garbage_at_end", `[-1x]`, 1, 3}, {"number_real_garbage_after_e", `[1ea]`, 1, 3}, {"number_real_with_invalid_utf8_after_e", `[1e�]`, 1, 3}, {"number_U+FF11_fullwidth_digit_one", `[1]`, 1, 1}, {"number_with_alpha_char", `[1.8011670033376514H-308]`, 1, 19}, {"number_with_alpha", `[1.2a-3]`, 1, 4}, {"object_bad_value", `["x", truth]`, 1, 9}, {"object_bracket_key", "\x7b\x5b\x3a\x20\x22\x78\x22\x7d\x0a", 1, 1}, {"object_comma_instead_of_colon", `{"x", null}`, 1, 4}, {"object_double_colon", `{"x"::"b"}`, 1, 5}, {"object_emoji", `{🇨🇭}`, 1, 1}, {"object_garbage_at_end", `{"a":"a" 123}`, 1, 9}, {"object_key_with_single_quotes", `{key: 'value'}`, 1, 1}, {"object_missing_colon", `{"a" b}`, 1, 5}, {"object_missing_key", `{:"b"}`, 1, 1}, {"object_missing_semicolon", `{"a" "b"}`, 1, 5}, {"object_missing_value", `{"a":`, 1, 5}, {"object_no-colon", `{"a"`, 1, 4}, {"object_non_string_key_but_huge_number_instead", `{9999E9999:1}`, 1, 1}, {"object_non_string_key", `{1:1}`, 1, 1}, {"object_repeated_null_null", `{null:null,null:null}`, 1, 1}, {"object_several_trailing_commas", `{"id":0,,,,,}`, 1, 8}, {"object_single_quote", `{'a':0}`, 1, 1}, {"object_trailing_comment", `{"a":"b"}/**/`, 9, 9}, {"object_trailing_comment_open", `{"a":"b"}/**//`, 9, 9}, {"object_trailing_comment_slash_open_incomplete", `{"a":"b"}/`, 9, 9}, {"object_trailing_comment_slash_open", `{"a":"b"}//`, 9, 9}, {"object_two_commas_in_a_row", `{"a":"b",,"c":"d"}`, 1, 9}, {"object_unquoted_key", `{a: "b"}`, 1, 1}, {"object_unterminated-value", `{"a":"a`, 1, 7}, {"object_with_single_string", `{ "foo" : "bar", "a" }`, 1, 21}, {"object_with_trailing_garbage", `{"a":"b"}#`, 9, 9}, {"single_space", ` `, 0, 1}, {"string_1_surrogate_then_escape", `["\uD800\"]`, 1, 11}, {"string_1_surrogate_then_escape_u1", `["\uD800\u1"]`, 1, 11}, {"string_1_surrogate_then_escape_u1x", `["\uD800\u1x"]`, 1, 11}, {"string_1_surrogate_then_escape_u", `["\uD800\u"]`, 1, 10}, {"string_accentuated_char_no_quotes", `[é]`, 1, 1}, {"string_backslash_00", "\x5b\x22\x5c\x00\x22\x5d", 1, 3}, {"string_escaped_backslash_bad", `["\\\"]`, 1, 7}, {"string_escaped_ctrl_char_tab", "\x5b\x22\x5c\x09\x22\x5d", 1, 3}, {"string_escaped_emoji", `["\🌀"]`, 1, 3}, {"string_escape_x", `["\x00"]`, 1, 3}, {"string_incomplete_escaped_character", `["\u00A"]`, 1, 7}, {"string_incomplete_escape", `["\"]`, 1, 5}, {"string_incomplete_surrogate_escape_invalid", `["\uD800\uD800\x"]`, 1, 15}, {"string_incomplete_surrogate", `["\uD834\uDd"]`, 1, 12}, {"string_invalid_backslash_esc", `["\a"]`, 1, 3}, {"string_invalid_unicode_escape", `["\uqqqq"]`, 1, 4}, {"string_invalid_utf8_after_escape", `["\�"]`, 1, 3}, {"string_invalid-utf-8-in-escape", `["\u�"]`, 1, 4}, {"string_leading_uescaped_thinspace", `[\u0020"asd"]`, 1, 1}, {"string_no_quotes_with_bad_escape", `[\n]`, 1, 1}, {"string_single_quote", `['single quote']`, 1, 1}, {"string_single_string_no_double_quotes", `abc`, 0, 0}, {"string_start_escape_unclosed", `["\`, 1, 3}, {"string_unicode_CapitalU", `"\UA66D"`, 1, 2}, {"string_with_trailing_garbage", `""x`, 2, 2}, {"structure_angle_bracket_.", `<.>`, 0, 0}, {"structure_angle_bracket_null", `[]`, 1, 1}, {"structure_array_trailing_garbage", `[1]x`, 3, 3}, {"structure_array_with_extra_array_close", `[1]]`, 3, 3}, {"structure_array_with_unclosed_string", `["asd]`, 1, 6}, {"structure_ascii-unicode-identifier", `aå`, 0, 0}, {"structure_capitalized_True", `[True]`, 1, 1}, {"structure_close_unopened_array", `1]`, 1, 1}, {"structure_comma_instead_of_closing_brace", `{"x": true,`, 1, 11}, {"structure_double_array", `[][]`, 2, 2}, {"structure_end_array", `]`, 0, 0}, {"structure_incomplete_UTF8_BOM", `�{}`, 0, 0}, {"structure_lone-invalid-utf-8", `�`, 0, 0}, {"structure_null-byte-outside-string", "\x5b\x00\x5d", 1, 1}, {"structure_number_with_trailing_garbage", `2@`, 1, 1}, {"structure_object_followed_by_closing_object", `{}}`, 2, 2}, {"structure_object_unclosed_no_value", `{"":`, 1, 4}, {"structure_object_with_comment", `{"a":/*comment*/"b"}`, 1, 5}, {"structure_object_with_trailing_garbage", `{"a": true} "x"`, 12, 12}, {"structure_open_array_apostrophe", `['`, 1, 1}, {"structure_open_array_comma", `[,`, 1, 1}, {"structure_open_array_open_object", `[{`, 1, 2}, {"structure_open_array_open_string", `["a`, 1, 3}, {"structure_open_array_string", `["a"`, 1, 4}, {"structure_open_object_close_array", `{]`, 1, 1}, {"structure_open_object_comma", `{,`, 1, 1}, {"structure_open_object_open_array", `{[`, 1, 1}, {"structure_open_object_open_string", `{"a`, 1, 3}, {"structure_open_object_string_with_apostrophes", `{'a'`, 1, 1}, {"structure_open_open", `["\{["\{["\{["\{`, 1, 3}, {"structure_single_eacute", `�`, 0, 0}, {"structure_single_star", `*`, 0, 0}, {"structure_trailing_#", `{"a":"b"}#{}`, 9, 9}, {"structure_U+2060_word_joined", "\x5b\xe2\x81\xa0\x5d", 1, 1}, {"structure_uescaped_LF_before_string", `[\u000A""]`, 1, 1}, {"structure_unclosed_array", `[1`, 1, 2}, {"structure_unclosed_array_partial_null", `[ false, nul`, 1, 12}, {"structure_unclosed_array_unfinished_false", `[ true, fals`, 1, 12}, {"structure_unclosed_array_unfinished_true", `[ false, tru`, 1, 12}, {"structure_unclosed_object", `{"asd":"asd"`, 1, 12}, {"structure_unicode-identifier", `å`, 0, 0}, {"structure_UTF8_BOM_no_data", "\xef\xbb\xbf", 0, 0}, {"structure_whitespace_formfeed", "\x5b\x0c\x5d", 1, 1}, {"structure_whitespace_U+2060_word_joiner", "\x5b\xe2\x81\xa0\x5d", 1, 1}, } func TestConsumeString(t *testing.T) { tCases := []struct { name string data string expected int }{ {"ascii string", `foo"`, 4}, {"utf-8 string one char", `ß"`, 3}, {"utf-8 string multiple chars", `ßßßß"`, 9}, {"empty string", ``, 0}, {"non-ending ascii string", `a`, 0}, {"non-ending utf-8 string", `ß`, 0}, {"escaped ascii string", "\\b a\"", 5}, {"escaped utf-8 string", "\\b ß\"", 6}, } for _, tt := range tCases { t.Run(tt.name, func(t *testing.T) { p := &parserState{} got := p.consumeString([]byte(tt.data)) if got != tt.expected { t.Errorf("expected: %v, got: %v", tt.expected, got) } }) } } func TestConsumeNumber(t *testing.T) { tCases := []struct { data string expected int }{ {`123`, 3}, {`123.1`, 5}, {`123.`, 4}, {`.123`, 4}, {`.`, 0}, {`..`, 0}, {`e`, 0}, {`1e1`, 3}, {`1.1e1`, 5}, {`.1e1`, 4}, {"", 0}, {`"NaN"`, 0}, {`"Infinity"`, 0}, {`"-Infinity"`, 0}, {".0", 2}, {"0", 1}, {"-0", 2}, {"+0", 0}, {"1", 1}, {"-1", 2}, {"00", 2}, {"-00", 3}, {"01", 2}, {"-01", 3}, {"0i", 1}, {"-0i", 2}, {"0f", 1}, {"-0f", 2}, {"9876543210", 10}, {"-9876543210", 11}, {"9876543210x", 10}, {"-9876543210x", 11}, {" 9876543210", 0}, {"- 9876543210", 0}, {strings.Repeat("9876543210", 1000), 10000}, {"-" + strings.Repeat("9876543210", 1000), 1 + 10000}, {"0.", 2}, {"-0.", 3}, {"0e", 0}, {"-0e", 0}, {"0E", 0}, {"-0E", 0}, {"0.0", 3}, {"-0.0", 4}, {"0e0", 3}, {"-0e0", 4}, {"0E0", 3}, {"-0E0", 4}, {"0.0123456789", 12}, {"-0.0123456789", 13}, {"1.f", 2}, {"-1.f", 3}, {"1.e", 0}, {"-1.e", 0}, {"1e0", 3}, {"-1e0", 4}, {"1E0", 3}, {"-1E0", 4}, {"1Ex", 0}, {"-1Ex", 0}, {"1e-0", 4}, {"-1e-0", 5}, {"1e+0", 4}, {"-1e+0", 5}, {"1E-0", 4}, {"-1E-0", 5}, {"1E+0", 4}, {"-1E+0", 5}, {"1E+00500", 8}, {"-1E+00500", 9}, {"1E+00500x", 8}, {"-1E+00500x", 9}, {"9876543210.0123456789e+01234589x", 31}, {"-9876543210.0123456789e+01234589x", 32}, {"1_000_000", 1}, {"0x12ef", 1}, {"0x1p-2", 1}, } p := &parserState{} for _, tt := range tCases { tname := tt.data if len(tname) > 10 { tname = tname[:10] + "..." } t.Run(tname, func(t *testing.T) { got := p.consumeNumber([]byte(tt.data)) if got != tt.expected { t.Errorf("expected: %v, got: %v", tt.expected, got) } }) } } func TestConsumeArray(t *testing.T) { tCases := []struct { name string data string expected int }{ {"empty array", `]`, 1}, {"empty array spaces", ` ]`, 2}, {"one int array", `1]`, 2}, {"one int array spaces", ` 1 ]`, 4}, {"two ints array", `1,2]`, 4}, {"two ints array spaces", ` 1 , 2 ]`, 8}, {"everything array", `[], {}, true, false, null, 1, "abc"]`, 36}, {"everything array v2", `[1,2,3], {"a":"b"}, true, false, null, 1, "abc"]`, 48}, {"escaped \"", `"\""]`, 5}, {"hex", `"\uA66D"]`, 9}, {"unfinished string", `"\uFFF`, 0}, } p := &parserState{} for _, tt := range tCases { t.Run(tt.name, func(t *testing.T) { got := p.consumeArray([]byte(tt.data), nil, 1) if got != tt.expected { t.Errorf("expected: %v, got: %v", tt.expected, got) } }) } } func TestQueryObject(t *testing.T) { tCases := []struct { name string json string query query expectedFind bool }{{ name: "empty path", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte("")}, }, expectedFind: false, }, { name: "path not matching after", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte("fool")}, }, expectedFind: false, }, { name: "path not matching before", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte("afoo")}, }, expectedFind: false, }, { name: "empty segment followed by valid segment", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte(""), []byte("foo")}, }, expectedFind: false, }, { name: "inversed segments", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte("bar"), []byte("foo")}, }, expectedFind: false, }, { name: "foo is value, not path", json: `{"foo": {"bar": "foo"}}`, query: query{ SearchPath: [][]byte{[]byte("bar"), []byte("foo")}, }, expectedFind: false, }, { name: "not matching because it's array", json: `[{"foo": {"bar": "baz"}}]`, query: query{ SearchPath: [][]byte{[]byte("foo"), []byte("bar")}, }, expectedFind: false, }, { name: "match without value", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte("foo"), []byte("bar")}, }, expectedFind: true, }, { name: "match with value", json: `{"foo": {"bar": "baz"}}`, query: query{ SearchPath: [][]byte{[]byte("foo"), []byte("bar")}, SearchVals: [][]byte{[]byte(`"baz"`)}, }, expectedFind: true, }, { name: "no match because path is offset with one foo", json: `{"foo": {"foo": {"bar": "baz"}}}`, query: query{ SearchPath: [][]byte{[]byte("foo"), []byte("bar")}, SearchVals: [][]byte{[]byte(`"baz"`)}, }, expectedFind: false, }} for _, tt := range tCases { t.Run(tt.name, func(t *testing.T) { p := &parserState{} p.consumeAny([]byte(tt.json), []query{tt.query}, 0) if tt.expectedFind != p.querySatisfied { t.Errorf("expectedFind: %v, got: %v", tt.expectedFind, p.querySatisfied) } }) } } func TestConsumeObject(t *testing.T) { tCases := []struct { name string data string expected int }{ {"empty object", `}`, 1}, {"object", `"a":"b"}`, 8}, {"panic found with fuzz", "\"\":0", 0}, } p := &parserState{} for _, tt := range tCases { t.Run(tt.name, func(t *testing.T) { got := p.consumeObject([]byte(tt.data), nil, 1) if got != tt.expected { t.Errorf("expected: %v, got: %v", tt.expected, got) } }) } } func TestConsumeConst(t *testing.T) { tCases := []struct { b string cnst string expect int inspect int }{ {"", "", 0, 0}, {"", "true", 0, 0}, {"true", "", 0, 0}, {"t", "true", 0, 1}, {"tr", "true", 0, 2}, {"tru", "true", 0, 3}, {"true", "true", 4, 4}, {"truex", "true", 4, 4}, } for _, tt := range tCases { p := &parserState{} t.Run(tt.b+" -- "+tt.cnst, func(t *testing.T) { got := p.consumeConst([]byte(tt.b), []byte(tt.cnst)) if got != tt.expect { t.Errorf("expected: %v, got %v", tt.expect, got) } if p.ib != tt.inspect { t.Errorf("expected to inspect: %v, got %v", tt.inspect, p.ib) } }) } } // Truncate inputs at each possible index and test if decoder parses // the truncated part successfully. func testTruncating(t *testing.T, jsonString string) { t.Helper() p := &parserState{} for i := 1; i <= len(jsonString); i++ { b := scan.Bytes(jsonString[:i]) b.TrimRWS() p.reset() _ = p.consumeAny(b, nil, 0) if p.ib != len(b) { t.Errorf("truncated positives should be fully parsed %v \n"+ "got: %d want: %d", string(b), p.ib, len(b)) } } } func TestPositives(t *testing.T) { for _, tt := range positives { testTruncating(t, tt.json) } } func TestPositivesCompacted(t *testing.T) { for _, tt := range positives { if !tt.stdlib { continue } buf := &bytes.Buffer{} if err := json.Compact(buf, []byte(tt.json)); err != nil { t.Errorf("Compact should always be successful: %s %s", tt.json, err) } testTruncating(t, buf.String()) } } func TestPositivesIndented(t *testing.T) { indents := [][2]string{ {"", " "}, {" ", " "}, {" ", "\t"}, {"\t", "\t"}, {"\t", " \t"}, {"", "\r\n"}, {"", " \r\n"}, } for _, tt := range positives { if !tt.stdlib { continue } for _, indent := range indents { buf := &bytes.Buffer{} if err := json.Indent(buf, []byte(tt.json), indent[0], indent[1]); err != nil { t.Errorf("Indent should always be successful: %s %s", tt.json, err) } testTruncating(t, buf.String()) } } } func TestNegatives(t *testing.T) { p := &parserState{} for _, tt := range negatives { t.Run(tt.name, func(t *testing.T) { p.reset() got := p.consumeAny([]byte(tt.json), nil, 0) if got != tt.expectParse { t.Errorf("unexpected parsed length got: %d want:%d", got, tt.expectParse) } if p.ib != tt.expectInspect { t.Errorf("unexpected inspected length got: %d want:%d\nin:%s", p.ib, tt.expectInspect, tt.json) } }) } } func TestMaxRecursion(t *testing.T) { tCases := []struct { maxRecursion int input string expectParsed int expectInspected int }{ {0, `[]`, 2, 2}, {0, `[[[]]]`, 6, 6}, {0, strings.Repeat("[", 10000) + strings.Repeat("]", 10000), 20000, 20000}, {3, `[[[[[]]]]]`, 1, 4}, // max recursion is 3 so we need to inspect 4 opening brackets } for _, tt := range tCases { tname := tt.input if len(tname) > 10 { tname = tname[:10] + "..." } t.Run(tname, func(t *testing.T) { p := &parserState{ maxRecursion: tt.maxRecursion, } got := p.consumeAny([]byte(tt.input), nil, 0) if got != tt.expectParsed { t.Errorf("parsed: got: %d expected: %d", got, tt.expectParsed) } if p.ib != tt.expectInspected { t.Errorf("inspected: got: %d expected: %d", p.ib, tt.expectInspected) } }) } } func TestStack(t *testing.T) { tCases := []struct { name string data string expected string }{ {"empty", ` `, ""}, {"a string", `"abc"`, ""}, {"an int", `123`, ""}, {"true", `true`, ""}, {"false", `false`, ""}, // Input must be an incomplete JSON because the stack is popped otherwise. {"arr", `[`, "["}, // Put a § between each segment of the stack. {"arrr", `[[`, "[§["}, {"arrrr", `[[[`, "[§[§["}, {"arrr popped once", `[[[]`, "[§["}, {"obj", `{`, ""}, {"obj key", `{"abc":1`, "abc"}, {"obj key twice", `{"abc":{"def":1`, "abc§def"}, {"obj key twice but popped", `{"abc":{"def":1}`, "abc"}, {"obj key twice and arr", `{"abc":{"def":[`, "abc§def§["}, {"hacky", `{"abc":{"def[":`, "abc§def["}, } join := func(bs [][]byte) string { ret := make([]string, 0, len(bs)) for _, b := range bs { ret = append(ret, string(b)) } return strings.Join(ret, "§") } for _, tt := range tCases { t.Run(tt.name, func(t *testing.T) { p := &parserState{} p.consumeAny([]byte(tt.data), []query{{}}, 0) if got := join(p.currPath); got != tt.expected { t.Errorf("expected: %s, got: %s", tt.expected, got) } }) } } func TestCurrPathBounded(t *testing.T) { // currPath is bounded to 128. count := 129 // input has to be an incomplete json, so that currPath does not get popped. input := []byte(strings.Repeat("[", count)) for range 100 { Parse(QueryGeo, input) // It's not guaranteed that p is the same parser object used by the // Parse call above. Reason: go runs tests packages concurrently. If // another package calls Parse in tests, that can interfere with parserPool. // Running the test several times in loop mitigates that. p := parserPool.Get().(*parserState) if len(p.currPath) > 128 { t.Errorf("expected currPath be purged if >128") } } } var sample = []byte(`{"type":"Feature","fruit":[{},{"dummy":"data","another field":[false,null]},true,false],"size":"Large","color":"Red"}`) func BenchmarkParse(b *testing.B) { b.ReportAllocs() for b.Loop() { _, _, _, query := Parse(QueryGeo, sample) if !query { b.Error("query should be satisfied") } } } func BenchmarkJSONStdlibDecoder(b *testing.B) { b.ReportAllocs() for b.Loop() { d := json.NewDecoder(bytes.NewReader(sample)) for { _, err := d.Token() if err != nil { break } } } } func BenchmarkJSONOurParser(b *testing.B) { b.ReportAllocs() for b.Loop() { p := &parserState{} p.consumeAny(sample, nil, 0) } } func FuzzJson(f *testing.F) { for _, p := range positives { f.Add([]byte(p.json), true) } p := &parserState{} f.Fuzz(func(t *testing.T, data []byte, reset bool) { if reset { p.reset() } p.consumeString(data) p.consumeNumber(data) p.consumeArray(data, nil, 1) p.consumeObject(data, nil, 1) p.consumeAny(data, nil, 1) }) } ================================================ FILE: modules/mime/internal/magic/archive.go ================================================ package magic import ( "bytes" "encoding/binary" ) // SevenZ matches a 7z archive. func SevenZ(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}) } // Gzip matches gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer. func Gzip(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x1f, 0x8b}) } // Fits matches an Flexible Image Transport System file. func Fits(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{ 0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54, }) } // Xar matches an eXtensible ARchive format file. func Xar(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x78, 0x61, 0x72, 0x21}) } // Bz2 matches a bzip2 file. func Bz2(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x42, 0x5A, 0x68}) } // Ar matches an ar (Unix) archive file. func Ar(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E}) } // Deb matches a Debian package file. func Deb(raw []byte, _ uint32) bool { return offset(raw, []byte{ 0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D, 0x62, 0x69, 0x6E, 0x61, 0x72, 0x79, }, 8) } // Warc matches a Web ARChive file. func Warc(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("WARC/1.0")) || bytes.HasPrefix(raw, []byte("WARC/1.1")) } // Cab matches a Microsoft Cabinet archive file. func Cab(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("MSCF\x00\x00\x00\x00")) } // Xz matches an xz compressed stream based on https://tukaani.org/xz/xz-file-format.txt. func Xz(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}) } // Lzip matches an Lzip compressed file. func Lzip(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x4c, 0x5a, 0x49, 0x50}) } // RPM matches an RPM or Delta RPM package file. func RPM(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0xed, 0xab, 0xee, 0xdb}) || bytes.HasPrefix(raw, []byte("drpm")) } // RAR matches a RAR archive file. func RAR(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("Rar!\x1A\x07\x00")) || bytes.HasPrefix(raw, []byte("Rar!\x1A\x07\x01\x00")) } // InstallShieldCab matches an InstallShield Cabinet archive file. func InstallShieldCab(raw []byte, _ uint32) bool { return len(raw) > 7 && bytes.Equal(raw[0:4], []byte("ISc(")) && raw[6] == 0 && (raw[7] == 1 || raw[7] == 2 || raw[7] == 4) } // Zstd matches a Zstandard archive file. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md func Zstd(raw []byte, limit uint32) bool { if len(raw) < 4 { return false } sig := binary.LittleEndian.Uint32(raw) // Check for Zstandard frames and skippable frames. return (sig >= 0xFD2FB522 && sig <= 0xFD2FB528) || (sig >= 0x184D2A50 && sig <= 0x184D2A5F) } // CRX matches a Chrome extension file: a zip archive prepended by a package header. func CRX(raw []byte, limit uint32) bool { const minHeaderLen = 16 if len(raw) < minHeaderLen || !bytes.HasPrefix(raw, []byte("Cr24")) { return false } pubkeyLen := int64(binary.LittleEndian.Uint32(raw[8:12])) sigLen := int64(binary.LittleEndian.Uint32(raw[12:16])) zipOffset := minHeaderLen + pubkeyLen + sigLen if zipOffset < 0 || int64(len(raw)) < zipOffset { return false } return Zip(raw[zipOffset:], limit) } // Cpio matches a cpio archive file. func Cpio(raw []byte, _ uint32) bool { if len(raw) < 6 { return false } return binary.LittleEndian.Uint16(raw) == 070707 || // binary cpio bytes.HasPrefix(raw, []byte("070707")) || // portable ASCII cpios bytes.HasPrefix(raw, []byte("070701")) || bytes.HasPrefix(raw, []byte("070702")) } // Tar matches a (t)ape (ar)chive file. // Tar files are divided into 512 bytes records. First record contains a 257 // bytes header padded with NUL. func Tar(raw []byte, _ uint32) bool { const sizeRecord = 512 // The structure of a tar header: // type TarHeader struct { // Name [100]byte // Mode [8]byte // Uid [8]byte // Gid [8]byte // Size [12]byte // Mtime [12]byte // Chksum [8]byte // Linkflag byte // Linkname [100]byte // Magic [8]byte // Uname [32]byte // Gname [32]byte // Devmajor [8]byte // Devminor [8]byte // } if len(raw) < sizeRecord { return false } raw = raw[:sizeRecord] // First 100 bytes of the header represent the file name. // Check if file looks like Gentoo GLEP binary package. if bytes.Contains(raw[:100], []byte("/gpkg-1\x00")) { return false } // Get the checksum recorded into the file. recsum := tarParseOctal(raw[148:156]) if recsum == -1 { return false } sum1, sum2 := tarChksum(raw) return recsum == sum1 || recsum == sum2 } // tarParseOctal converts octal string to decimal int. func tarParseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need to skip leading NULs. // Fields may also be padded with spaces or NULs. // So we remove leading and trailing NULs and spaces to be sure. b = bytes.Trim(b, " \x00") if len(b) == 0 { return -1 } ret := int64(0) for _, b := range b { if b == 0 { break } if b < '0' || b > '7' { return -1 } ret = (ret << 3) | int64(b-'0') } return ret } // tarChksum computes the checksum for the header block b. // The actual checksum is written to same b block after it has been calculated. // Before calculation the bytes from b reserved for checksum have placeholder // value of ASCII space 0x20. // POSIX specifies a sum of the unsigned byte values, but the Sun tar used // signed byte values. We compute and return both. func tarChksum(b []byte) (unsigned, signed int64) { for i, c := range b { if 148 <= i && i < 156 { c = ' ' // Treat the checksum field itself as all spaces. } unsigned += int64(c) signed += int64(int8(c)) } return unsigned, signed } // Zlib matches zlib compressed files. func Zlib(raw []byte, _ uint32) bool { // https://www.ietf.org/rfc/rfc6713.txt // This check has one fault: ASCII code can satisfy it; for ex: []byte("x ") zlib := len(raw) > 1 && raw[0] == 'x' && binary.BigEndian.Uint16(raw)%31 == 0 // Check that the file is not a regular text to avoid false positives. return zlib && !Text(raw, 0) } ================================================ FILE: modules/mime/internal/magic/archive_test.go ================================================ package magic import "testing" func TestTarParseOctal(t *testing.T) { tests := []struct { in string want int64 }{ {"0000000\x00", 0}, {" \x0000000\x00", 0}, {" \x0000003\x00", 3}, {"00000000227\x00", 0227}, {"032033\x00 ", 032033}, {"320330\x00 ", 0320330}, {"0000660\x00 ", 0660}, {"\x00 0000660\x00 ", 0660}, {"0123456789abcdef", -1}, {"0123456789\x00abcdef", -1}, {"01234567\x0089abcdef", 01234567}, {"0123\x7e\x5f\x264123", -1}, } for _, tt := range tests { got := tarParseOctal([]byte(tt.in)) if got != tt.want { t.Errorf("parseOctal(%q): got %d, want %d", tt.in, got, tt.want) } } } ================================================ FILE: modules/mime/internal/magic/audio.go ================================================ package magic import ( "bytes" "encoding/binary" ) // Flac matches a Free Lossless Audio Codec file. func Flac(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x66\x4C\x61\x43\x00\x00\x00\x22")) } // Midi matches a Musical Instrument Digital Interface file. func Midi(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x4D\x54\x68\x64")) } // Ape matches a Monkey's Audio file. func Ape(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3")) } // MusePack matches a Musepack file. func MusePack(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("MPCK")) } // Au matches a Sun Microsystems au file. func Au(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x2E\x73\x6E\x64")) } // Amr matches an Adaptive Multi-Rate file. func Amr(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x23\x21\x41\x4D\x52")) } // Voc matches a Creative Voice file. func Voc(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("Creative Voice File")) } // M3U matches a Playlist file. func M3U(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("#EXTM3U\n")) || bytes.HasPrefix(raw, []byte("#EXTM3U\r\n")) } // AAC matches an Advanced Audio Coding file. func AAC(raw []byte, _ uint32) bool { return len(raw) > 1 && ((raw[0] == 0xFF && raw[1] == 0xF1) || (raw[0] == 0xFF && raw[1] == 0xF9)) } // Mp3 matches an mp3 file. func Mp3(raw []byte, limit uint32) bool { if len(raw) < 3 { return false } if bytes.HasPrefix(raw, []byte("ID3")) { // MP3s with an ID3v2 tag will start with "ID3" // ID3v1 tags, however appear at the end of the file. return true } // Match MP3 files without tags switch binary.BigEndian.Uint16(raw[:2]) & 0xFFFE { case 0xFFFA: // MPEG ADTS, layer III, v1 return true case 0xFFF2: // MPEG ADTS, layer III, v2 return true case 0xFFE2: // MPEG ADTS, layer III, v2.5 return true } return false } // Wav matches a Waveform Audio File Format file. func Wav(raw []byte, limit uint32) bool { return len(raw) > 12 && bytes.Equal(raw[:4], []byte("RIFF")) && bytes.Equal(raw[8:12], []byte{0x57, 0x41, 0x56, 0x45}) } // Aiff matches Audio Interchange File Format file. func Aiff(raw []byte, limit uint32) bool { return len(raw) > 12 && bytes.Equal(raw[:4], []byte{0x46, 0x4F, 0x52, 0x4D}) && bytes.Equal(raw[8:12], []byte{0x41, 0x49, 0x46, 0x46}) } // Qcp matches a Qualcomm Pure Voice file. func Qcp(raw []byte, limit uint32) bool { return len(raw) > 12 && bytes.Equal(raw[:4], []byte("RIFF")) && bytes.Equal(raw[8:12], []byte("QLCM")) } ================================================ FILE: modules/mime/internal/magic/binary.go ================================================ package magic import ( "bytes" "debug/macho" "encoding/binary" "slices" ) // Lnk matches Microsoft lnk binary format. func Lnk(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00}) } // Wasm matches a web assembly File Format file. func Wasm(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x00, 0x61, 0x73, 0x6D}) } // Exe matches a Windows/DOS executable file. func Exe(raw []byte, _ uint32) bool { return len(raw) > 1 && raw[0] == 0x4D && raw[1] == 0x5A } // Elf matches an Executable and Linkable Format file. func Elf(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x7F, 0x45, 0x4C, 0x46}) } // Nes matches a Nintendo Entertainment system ROM file. func Nes(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x4E, 0x45, 0x53, 0x1A}) } // SWF matches an Adobe Flash swf file. func SWF(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("CWS")) || bytes.HasPrefix(raw, []byte("FWS")) || bytes.HasPrefix(raw, []byte("ZWS")) } // Torrent has bencoded text in the beginning. func Torrent(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("d8:announce")) } // PAR1 matches a parquet file. func Par1(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x50, 0x41, 0x52, 0x31}) } // CBOR matches a Concise Binary Object Representation https://cbor.io/ func CBOR(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0xD9, 0xD9, 0xF7}) } // Java bytecode and Mach-O binaries share the same magic number. // More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe func classOrMachOFat(in []byte) bool { // There should be at least 8 bytes for both of them because the only way to // quickly distinguish them is by comparing byte at position 7 if len(in) < 8 { return false } return binary.BigEndian.Uint32(in) == macho.MagicFat } // Class matches a java class file. func Class(raw []byte, limit uint32) bool { return classOrMachOFat(raw) && raw[7] > 30 } // MachO matches Mach-O binaries format. func MachO(raw []byte, limit uint32) bool { if classOrMachOFat(raw) && raw[7] < 0x14 { return true } if len(raw) < 4 { return false } be := binary.BigEndian.Uint32(raw) le := binary.LittleEndian.Uint32(raw) return be == macho.Magic32 || le == macho.Magic32 || be == macho.Magic64 || le == macho.Magic64 } // Dbf matches a dBase file. // https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm func Dbf(raw []byte, limit uint32) bool { if len(raw) < 68 { return false } // 3rd and 4th bytes contain the last update month and day of month. if raw[2] == 0 || raw[2] > 12 || raw[3] == 0 || raw[3] > 31 { return false } // 12, 13, 30, 31 are reserved bytes and always filled with 0x00. if raw[12] != 0x00 || raw[13] != 0x00 || raw[30] != 0x00 || raw[31] != 0x00 { return false } // Production MDX flag; // 0x01 if a production .MDX file exists for this table; // 0x00 if no .MDX file exists. if raw[28] > 0x01 { return false } // dbf type is dictated by the first byte. dbfTypes := []byte{ 0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82, 0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB, } return slices.Contains(dbfTypes, raw[0]) } // ElfObj matches an object file. func ElfObj(raw []byte, limit uint32) bool { return len(raw) > 17 && ((raw[16] == 0x01 && raw[17] == 0x00) || (raw[16] == 0x00 && raw[17] == 0x01)) } // ElfExe matches an executable file. func ElfExe(raw []byte, limit uint32) bool { return len(raw) > 17 && ((raw[16] == 0x02 && raw[17] == 0x00) || (raw[16] == 0x00 && raw[17] == 0x02)) } // ElfLib matches a shared library file. func ElfLib(raw []byte, limit uint32) bool { return len(raw) > 17 && ((raw[16] == 0x03 && raw[17] == 0x00) || (raw[16] == 0x00 && raw[17] == 0x03)) } // ElfDump matches a core dump file. func ElfDump(raw []byte, limit uint32) bool { return len(raw) > 17 && ((raw[16] == 0x04 && raw[17] == 0x00) || (raw[16] == 0x00 && raw[17] == 0x04)) } // Dcm matches a DICOM medical format file. func Dcm(raw []byte, limit uint32) bool { return len(raw) > 131 && bytes.Equal(raw[128:132], []byte{0x44, 0x49, 0x43, 0x4D}) } // Marc matches a MARC21 (MAchine-Readable Cataloging) file. func Marc(raw []byte, limit uint32) bool { // File is at least 24 bytes ("leader" field size). if len(raw) < 24 { return false } // Fixed bytes at offset 20. if !bytes.Equal(raw[20:24], []byte("4500")) { return false } // First 5 bytes are ASCII digits. for i := range 5 { if raw[i] < '0' || raw[i] > '9' { return false } } // Field terminator is present in first 2048 bytes. return bytes.Contains(raw[:min(2048, len(raw))], []byte{0x1E}) } // GLB matches a glTF model format file. // GLB is the binary file format representation of 3D models saved in // the GL transmission Format (glTF). // GLB uses little endian and its header structure is as follows: // // <-- 12-byte header --> // | magic | version | length | // | (uint32) | (uint32) | (uint32) | // | \x67\x6C\x54\x46 | \x01\x00\x00\x00 | ... | // | g l T F | 1 | ... | // // Visit [glTF specification] and [IANA glTF entry] for more details. // // [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html // [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf-binary func GLB(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x67\x6C\x54\x46\x02\x00\x00\x00")) || bytes.HasPrefix(raw, []byte("\x67\x6C\x54\x46\x01\x00\x00\x00")) } // TzIf matches a Time Zone Information Format (TZif) file. // See more: https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html#rfc.section.3 // Its header structure is shown below: // // +---------------+---+ // | magic (4) | <-+-- version (1) // +---------------+---+---------------------------------------+ // | [unused - reserved for future use] (15) | // +---------------+---------------+---------------+-----------+ // | isutccnt (4) | isstdcnt (4) | leapcnt (4) | // +---------------+---------------+---------------+ // | timecnt (4) | typecnt (4) | charcnt (4) | func TzIf(raw []byte, limit uint32) bool { // File is at least 44 bytes (header size). if len(raw) < 44 { return false } if !bytes.HasPrefix(raw, []byte("TZif")) { return false } // Field "typecnt" MUST not be zero. if binary.BigEndian.Uint32(raw[36:40]) == 0 { return false } // Version has to be NUL (0x00), '2' (0x32) or '3' (0x33). return raw[4] == 0x00 || raw[4] == 0x32 || raw[4] == 0x33 } // Pyc matches a Python compiled file. // The signatures are sourced from libmagic v5.47 func Pyc(raw []byte, limit uint32) bool { if len(raw) < 8 { return false } // python 1.0 through 3.7 signatures, magic/Magdir/python:13:190 pycMagic := []uint32{ 0x02099900, 0x03099900, 0x892e0d0a, 0x04170d0a, 0x994e0d0a, 0xfcc40d0a, 0xfdc40d0a, 0x87c60d0a, 0x88c60d0a, 0x2aeb0d0a, 0x2beb0d0a, 0x2ded0d0a, 0x2eed0d0a, 0x3bf20d0a, 0x3cf20d0a, 0x45f20d0a, 0x59f20d0a, 0x63f20d0a, 0x6df20d0a, 0x6ef20d0a, 0x77f20d0a, 0x81f20d0a, 0x8bf20d0a, 0x8cf20d0a, 0x95f20d0a, 0x9ff20d0a, 0xa9f20d0a, 0xb3f20d0a, 0xb4f20d0a, 0xc7f20d0a, 0xd1f20d0a, 0xd2f20d0a, 0xdbf20d0a, 0xe5f20d0a, 0xeff20d0a, 0xf9f20d0a, 0x03f30d0a, 0x04f30d0a, 0x0af30d0a, 0xb80b0d0a, 0xc20b0d0a, 0xcc0b0d0a, 0xd60b0d0a, 0xe00b0d0a, 0xea0b0d0a, 0xf40b0d0a, 0xf50b0d0a, 0xff0b0d0a, 0x090c0d0a, 0x130c0d0a, 0x1d0c0d0a, 0x1f0c0d0a, 0x270c0d0a, 0x3b0c0d0a, 0x450c0d0a, 0x4f0c0d0a, 0x580c0d0a, 0x620c0d0a, 0x6c0c0d0a, 0x760c0d0a, 0x800c0d0a, 0x8a0c0d0a, 0x940c0d0a, 0x9e0c0d0a, 0xb20c0d0a, 0xbc0c0d0a, 0xc60c0d0a, 0xd00c0d0a, 0xda0c0d0a, 0xe40c0d0a, 0xee0c0d0a, 0xf80c0d0a, 0x020d0d0a, 0x0c0d0d0a, 0x160d0d0a, 0x170d0d0a, 0x200d0d0a, 0x210d0d0a, 0x2a0d0d0a, 0x2b0d0d0a, 0x2c0d0d0a, 0x2d0d0d0a, 0x2f0d0d0a, 0x300d0d0a, 0x310d0d0a, 0x320d0d0a, 0x330d0d0a, 0x3e0d0d0a, 0x3f0d0d0a, } n := binary.BigEndian.Uint32(raw) if slices.Contains(pycMagic, n) { return true } if raw[2] == 0x0d && raw[3] == 0x0a { // Only two bits of flag field are currently used. if l := binary.LittleEndian.Uint32(raw[4:]); l > 3 { return false } if raw[1] == 0x0d || raw[1] == 0x0e { return true } // PyPy magic numbers, magic/Magdir/python:233 n := binary.LittleEndian.Uint16(raw) return n == 240 || n == 256 || n == 336 || n == 384 || n == 416 } return false } ================================================ FILE: modules/mime/internal/magic/database.go ================================================ package magic import "bytes" // Sqlite matches an SQLite database file. func Sqlite(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{ 0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00, }) } // MsAccessAce matches Microsoft Access dababase file. func MsAccessAce(raw []byte, _ uint32) bool { return offset(raw, []byte("Standard ACE DB"), 4) } // MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier). func MsAccessMdb(raw []byte, _ uint32) bool { return offset(raw, []byte("Standard Jet DB"), 4) } ================================================ FILE: modules/mime/internal/magic/document.go ================================================ package magic import ( "bytes" "encoding/binary" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // Pdf matches a Portable Document Format file. // https://github.com/file/file/blob/11010cc805546a3e35597e67e1129a481aed40e8/magic/Magdir/pdf func Pdf(raw []byte, _ uint32) bool { // usual pdf signature return bytes.HasPrefix(raw, []byte("%PDF-")) || // new-line prefixed signature bytes.HasPrefix(raw, []byte("\012%PDF-")) || // UTF-8 BOM prefixed signature bytes.HasPrefix(raw, []byte("\xef\xbb\xbf%PDF-")) } // Fdf matches a Forms Data Format file. func Fdf(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("%FDF")) } // Mobi matches a Mobi file. func Mobi(raw []byte, _ uint32) bool { return offset(raw, []byte("BOOKMOBI"), 60) } // Lit matches a Microsoft Lit file. func Lit(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("ITOLITLS")) } // PDF matches a Portable Document Format file. // The %PDF- header should be the first thing inside the file but many // implementations don't follow the rule. The PDF spec at Appendix H says the // signature can be prepended by anything. // https://bugs.astron.com/view.php?id=446 func PDF(raw []byte, _ uint32) bool { raw = raw[:min(len(raw), 1024)] return bytes.Contains(raw, []byte("%PDF-")) } // DjVu matches a DjVu file. func DjVu(raw []byte, _ uint32) bool { if len(raw) < 12 { return false } if !bytes.HasPrefix(raw, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) { return false } return bytes.HasPrefix(raw[12:], []byte("DJVM")) || bytes.HasPrefix(raw[12:], []byte("DJVU")) || bytes.HasPrefix(raw[12:], []byte("DJVI")) || bytes.HasPrefix(raw[12:], []byte("THUM")) } // P7s matches an .p7s signature File (PEM, Base64). func P7s(raw []byte, _ uint32) bool { // Check for PEM Encoding. if bytes.HasPrefix(raw, []byte("-----BEGIN PKCS7")) { return true } // Check if DER Encoding is long enough. if len(raw) < 20 { return false } // Magic Bytes for the signedData ASN.1 encoding. startHeader := [][]byte{{0x30, 0x80}, {0x30, 0x81}, {0x30, 0x82}, {0x30, 0x83}, {0x30, 0x84}} signedDataMatch := []byte{0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07} // Check if Header is correct. There are multiple valid headers. for i, match := range startHeader { // If first bytes match, then check for ASN.1 Object Type. if bytes.HasPrefix(raw, match) { if bytes.HasPrefix(raw[i+2:], signedDataMatch) { return true } } } return false } // Lotus123 matches a Lotus 1-2-3 spreadsheet document. func Lotus123(raw []byte, _ uint32) bool { if len(raw) <= 20 { return false } version := binary.BigEndian.Uint32(raw) if version == 0x00000200 { return raw[6] != 0 && raw[7] == 0 } return version == 0x00001a00 && raw[20] > 0 && raw[20] < 32 } // CHM matches a Microsoft Compiled HTML Help file. func CHM(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("ITSF\003\000\000\000\x60\000\000\000")) } // Inf matches an OS/2 .inf file. func Inf(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("HSP\x01\x9b\x00")) } // Hlp matches an OS/2 .hlp file. func Hlp(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("HSP\x10\x9b\x00")) } // FrameMaker matches an Adobe FrameMaker file. func FrameMaker(raw []byte, _ uint32) bool { b := scan.Bytes(raw) if !bytes.HasPrefix(b, []byte(" 48 && bytes.HasPrefix(raw, []byte("OTTO\x00")) && bytes.Contains(raw[12:48], []byte("CFF ")) } // Ttf matches a TrueType font file. func Ttf(raw []byte, limit uint32) bool { if !bytes.HasPrefix(raw, []byte{0x00, 0x01, 0x00, 0x00}) { return false } return hasSFNTTable(raw) } func hasSFNTTable(raw []byte) bool { // 49 possible tables as explained below if len(raw) < 16 || binary.BigEndian.Uint16(raw[4:]) >= 49 { return false } // libmagic says there are 47 table names in specification, but it seems // they reached 49 in the meantime. // https://github.com/file/file/blob/5184ca2471c0e801c156ee120a90e669fe27b31d/magic/Magdir/fonts#L279 // At the same time, the TrueType docs seem misleading: // 1. https://developer.apple.com/fonts/TrueType-Reference-Manual/index.html // 2. https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6.html // Page 1. has 48 tables. Page 2. has 49 tables. The diff is the gcid table. // Take a permissive approach. possibleTables := []uint32{ 0x61636e74, // "acnt" 0x616e6b72, // "ankr" 0x61766172, // "avar" 0x62646174, // "bdat" 0x62686564, // "bhed" 0x626c6f63, // "bloc" 0x62736c6e, // "bsln" 0x636d6170, // "cmap" 0x63766172, // "cvar" 0x63767420, // "cvt " 0x45425343, // "EBSC" 0x66647363, // "fdsc" 0x66656174, // "feat" 0x666d7478, // "fmtx" 0x666f6e64, // "fond" 0x6670676d, // "fpgm" 0x66766172, // "fvar" 0x67617370, // "gasp" 0x67636964, // "gcid" 0x676c7966, // "glyf" 0x67766172, // "gvar" 0x68646d78, // "hdmx" 0x68656164, // "head" 0x68686561, // "hhea" 0x686d7478, // "hmtx" 0x6876676c, // "hvgl" 0x6876706d, // "hvpm" 0x6a757374, // "just" 0x6b65726e, // "kern" 0x6b657278, // "kerx" 0x6c636172, // "lcar" 0x6c6f6361, // "loca" 0x6c746167, // "ltag" 0x6d617870, // "maxp" 0x6d657461, // "meta" 0x6d6f7274, // "mort" 0x6d6f7278, // "morx" 0x6e616d65, // "name" 0x6f706264, // "opbd" 0x4f532f32, // "OS/2" } ourTable := binary.BigEndian.Uint32(raw[12:16]) return slices.Contains(possibleTables, ourTable) } // Eot matches an Embedded OpenType font file. func Eot(raw []byte, limit uint32) bool { return len(raw) > 35 && bytes.Equal(raw[34:36], []byte{0x4C, 0x50}) && (bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x01}) || bytes.Equal(raw[8:11], []byte{0x01, 0x00, 0x00}) || bytes.Equal(raw[8:11], []byte{0x02, 0x00, 0x02})) } // Ttc matches a TrueType Collection font file. func Ttc(raw []byte, limit uint32) bool { return len(raw) > 7 && bytes.HasPrefix(raw, []byte("ttcf")) && (bytes.Equal(raw[4:8], []byte{0x00, 0x01, 0x00, 0x00}) || bytes.Equal(raw[4:8], []byte{0x00, 0x02, 0x00, 0x00})) } ================================================ FILE: modules/mime/internal/magic/ftyp.go ================================================ package magic import ( "bytes" ) // AVIF matches an AV1 Image File Format still or animated. // Wikipedia page seems outdated listing image/avif-sequence for animations. // https://github.com/AOMediaCodec/av1-avif/issues/59 func AVIF(raw []byte, _ uint32) bool { return ftyp(raw, []byte("avif"), []byte("avis")) } // ThreeGP matches a 3GPP file. func ThreeGP(raw []byte, _ uint32) bool { return ftyp(raw, []byte("3gp1"), []byte("3gp2"), []byte("3gp3"), []byte("3gp4"), []byte("3gp5"), []byte("3gp6"), []byte("3gp7"), []byte("3gs7"), []byte("3ge6"), []byte("3ge7"), []byte("3gg6"), ) } // ThreeG2 matches a 3GPP2 file. func ThreeG2(raw []byte, _ uint32) bool { return ftyp(raw, []byte("3g24"), []byte("3g25"), []byte("3g26"), []byte("3g2a"), []byte("3g2b"), []byte("3g2c"), []byte("KDDI"), ) } // AMp4 matches an audio MP4 file. func AMp4(raw []byte, _ uint32) bool { return ftyp(raw, // audio for Adobe Flash Player 9+ []byte("F4A "), []byte("F4B "), // Apple iTunes AAC-LC (.M4A) Audio []byte("M4B "), []byte("M4P "), // MPEG-4 (.MP4) for SonyPSP []byte("MSNV"), // Nero Digital AAC Audio []byte("NDAS"), ) } // Mqv matches a Sony / Mobile QuickTime file. func Mqv(raw []byte, _ uint32) bool { return ftyp(raw, []byte("mqt ")) } // M4a matches an audio M4A file. func M4a(raw []byte, _ uint32) bool { return ftyp(raw, []byte("M4A ")) } // M4v matches an Appl4 M4V video file. func M4v(raw []byte, _ uint32) bool { return ftyp(raw, []byte("M4V "), []byte("M4VH"), []byte("M4VP")) } // Heic matches a High Efficiency Image Coding (HEIC) file. func Heic(raw []byte, _ uint32) bool { return ftyp(raw, []byte("heic"), []byte("heix")) } // HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence. func HeicSequence(raw []byte, _ uint32) bool { return ftyp(raw, []byte("hevc"), []byte("hevx")) } // Heif matches a High Efficiency Image File Format (HEIF) file. func Heif(raw []byte, _ uint32) bool { return ftyp(raw, []byte("mif1"), []byte("heim"), []byte("heis"), []byte("avic")) } // HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence. func HeifSequence(raw []byte, _ uint32) bool { return ftyp(raw, []byte("msf1"), []byte("hevm"), []byte("hevs"), []byte("avcs")) } // Mj2 matches a Motion JPEG 2000 file: https://en.wikipedia.org/wiki/Motion_JPEG_2000. func Mj2(raw []byte, _ uint32) bool { return ftyp(raw, []byte("mj2s"), []byte("mjp2"), []byte("MFSM"), []byte("MGSV")) } // Dvb matches a Digital Video Broadcasting file: https://dvb.org. // https://cconcolato.github.io/mp4ra/filetype.html // https://github.com/file/file/blob/512840337ead1076519332d24fefcaa8fac36e06/magic/Magdir/animation#L135-L154 func Dvb(raw []byte, _ uint32) bool { return ftyp(raw, []byte("dby1"), []byte("dsms"), []byte("dts1"), []byte("dts2"), []byte("dts3"), []byte("dxo "), []byte("dmb1"), []byte("dmpf"), []byte("drc1"), []byte("dv1a"), []byte("dv1b"), []byte("dv2a"), []byte("dv2b"), []byte("dv3a"), []byte("dv3b"), []byte("dvr1"), []byte("dvt1"), []byte("emsg")) } // TODO: add support for remaining video formats at ftyps.com. // QuickTime matches a QuickTime File Format file. // https://www.loc.gov/preservation/digital/formats/fdd/fdd000052.shtml // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap1/qtff1.html#//apple_ref/doc/uid/TP40000939-CH203-38190 // https://github.com/apache/tika/blob/0f5570691133c75ac4472c3340354a6c4080b104/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml#L7758-L7777 func QuickTime(raw []byte, _ uint32) bool { if len(raw) < 12 { return false } // First 4 bytes represent the size of the atom as unsigned int. // Next 4 bytes are the type of the atom. // For `ftyp` atoms check if first byte in size is 0, otherwise, a text file // which happens to contain 'ftypqt ' at index 4 will trigger a false positive. if bytes.Equal(raw[4:12], []byte("ftypqt ")) || bytes.Equal(raw[4:12], []byte("ftypmoov")) { return raw[0] == 0x00 } basicAtomTypes := [][]byte{ []byte("moov\x00"), []byte("mdat\x00"), []byte("free\x00"), []byte("skip\x00"), []byte("pnot\x00"), } for _, a := range basicAtomTypes { if bytes.Equal(raw[4:9], a) { return true } } return bytes.Equal(raw[:8], []byte("\x00\x00\x00\x08wide")) } // Mp4 detects an .mp4 file. Mp4 detections only does a basic ftyp check. // Mp4 has many registered and unregistered code points so it's hard to keep track // of all. Detection will default on video/mp4 for all ftyp files. // ISO_IEC_14496-12 is the specification for the iso container. func Mp4(raw []byte, _ uint32) bool { if len(raw) < 12 { return false } // ftyps are made out of boxes. The first 4 bytes of the box represent // its size in big-endian uint32. First box is the ftyp box and it is small // in size. Check most significant byte is 0 to filter out false positive // text files that happen to contain the string "ftyp" at index 4. if raw[0] != 0 { return false } return bytes.Equal(raw[4:8], []byte("ftyp")) } ================================================ FILE: modules/mime/internal/magic/geo.go ================================================ package magic import ( "bytes" "encoding/binary" "slices" ) // Shp matches a shape format file. // https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf func Shp(raw []byte, limit uint32) bool { if len(raw) < 112 { return false } if binary.BigEndian.Uint32(raw[0:4]) != 9994 || binary.BigEndian.Uint32(raw[4:8]) != 0 || binary.BigEndian.Uint32(raw[8:12]) != 0 || binary.BigEndian.Uint32(raw[12:16]) != 0 || binary.BigEndian.Uint32(raw[16:20]) != 0 || binary.BigEndian.Uint32(raw[20:24]) != 0 || binary.LittleEndian.Uint32(raw[28:32]) != 1000 { return false } shapeTypes := []int{ 0, // Null shape 1, // Point 3, // Polyline 5, // Polygon 8, // MultiPoint 11, // PointZ 13, // PolylineZ 15, // PolygonZ 18, // MultiPointZ 21, // PointM 23, // PolylineM 25, // PolygonM 28, // MultiPointM 31, // MultiPatch } return slices.Contains(shapeTypes, int(binary.LittleEndian.Uint32(raw[108:112]))) } // Shx matches a shape index format file. // https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf func Shx(raw []byte, limit uint32) bool { return bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x27, 0x0A}) } ================================================ FILE: modules/mime/internal/magic/image.go ================================================ package magic import ( "bytes" "encoding/binary" "slices" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // Png matches a Portable Network Graphics file. // https://www.w3.org/TR/PNG/ func Png(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}) } // Apng matches an Animated Portable Network Graphics file. // https://wiki.mozilla.org/APNG_Specification func Apng(raw []byte, _ uint32) bool { b := scan.Bytes(raw) b.Advance(8) // the first 8 bytes matched by regular png // PNG chunks are composed of: // 4 bytes: length in big endian // 4 bytes: chunk type // length bytes: chunk data // 4 bytes: CRC // // Limit to 32, so we don't waste time on huge inputs. // acTL chunk must come before any IDAT chunks. // https://www.w3.org/TR/png-3/#structure for i := 0; i < 32 && len(b) > 0; i++ { sz, _ := b.Uint32be() if bytes.HasPrefix(b, []byte("acTL")) { return true } if bytes.HasPrefix(b, []byte("IDAT")) { return false } if !b.Advance(int(sz + 8)) { return false } } return false } // Jpg matches a Joint Photographic Experts Group file. func Jpg(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0xFF, 0xD8, 0xFF}) } // Jp2 matches a JPEG 2000 Image file (ISO 15444-1). func Jp2(raw []byte, _ uint32) bool { return jpeg2k(raw, []byte{0x6a, 0x70, 0x32, 0x20}) } // Jpx matches a JPEG 2000 Image file (ISO 15444-2). func Jpx(raw []byte, _ uint32) bool { return jpeg2k(raw, []byte{0x6a, 0x70, 0x78, 0x20}) } // Jpm matches a JPEG 2000 Image file (ISO 15444-6). func Jpm(raw []byte, _ uint32) bool { return jpeg2k(raw, []byte{0x6a, 0x70, 0x6D, 0x20}) } // Gif matches a Graphics Interchange Format file. func Gif(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("GIF87a")) || bytes.HasPrefix(raw, []byte("GIF89a")) } // Bmp matches a bitmap image file. func Bmp(raw []byte, _ uint32) bool { if len(raw) < 18 { return false } if raw[0] != 'B' || raw[1] != 'M' { return false } bmpFormat := binary.LittleEndian.Uint32(raw[14:]) // sourced from libmagic Magdir/images possibleFormats := []uint32{ 48, // PC bitmap, OS/2 2.x format (DIB header size=48) 24, // PC bitmap, OS/2 2.x format (DIB header size=24) 16, // PC bitmap, OS/2 2.x format (DIB header size=16) 64, // PC bitmap, OS/2 2.x format 52, // PC bitmap, Adobe Photoshop 56, // PC bitmap, Adobe Photoshop with alpha channel mask 40, // PC bitmap, Windows 3.x format 124, // PC bitmap, Windows 98/2000 and newer format 108, // PC bitmap, Windows 95/NT4 and newer format } return slices.Contains(possibleFormats, bmpFormat) } // Ps matches a PostScript file. func Ps(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("%!PS-Adobe-")) } // Psd matches a Photoshop Document file. func Psd(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("8BPS")) } // Ico matches an ICO file. func Ico(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x01, 0x00}) || bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x02, 0x00}) } // Icns matches an ICNS (Apple Icon Image format) file. func Icns(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("icns")) } // Tiff matches a Tagged Image File Format file. func Tiff(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x49, 0x49, 0x2A, 0x00}) || bytes.HasPrefix(raw, []byte{0x4D, 0x4D, 0x00, 0x2A}) } // Bpg matches a Better Portable Graphics file. func Bpg(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x42, 0x50, 0x47, 0xFB}) } // Xcf matches GIMP image data. func Xcf(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("gimp xcf")) } // Pat matches GIMP pattern data. func Pat(raw []byte, _ uint32) bool { return offset(raw, []byte("GPAT"), 20) } // Gbr matches GIMP brush data. func Gbr(raw []byte, _ uint32) bool { return offset(raw, []byte("GIMP"), 20) } // Hdr matches Radiance HDR image. // https://web.archive.org/web/20060913152809/http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/ func Hdr(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("#?RADIANCE\n")) } // Xpm matches X PixMap image data. func Xpm(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x2F, 0x2A, 0x20, 0x58, 0x50, 0x4D, 0x20, 0x2A, 0x2F}) } // Jxs matches a JPEG XS coded image file (ISO/IEC 21122-3). func Jxs(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x53, 0x20, 0x0D, 0x0A, 0x87, 0x0A}) } // Jxr matches Microsoft HD JXR photo file. func Jxr(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x49, 0x49, 0xBC, 0x01}) } func jpeg2k(raw []byte, sig []byte) bool { if len(raw) < 24 { return false } if !bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x20, 0x20}) && !bytes.Equal(raw[4:8], []byte{0x6A, 0x50, 0x32, 0x20}) { return false } return bytes.Equal(raw[20:24], sig) } // Webp matches a WebP file. func Webp(raw []byte, _ uint32) bool { return len(raw) > 12 && bytes.Equal(raw[0:4], []byte("RIFF")) && bytes.Equal(raw[8:12], []byte{0x57, 0x45, 0x42, 0x50}) } // Dwg matches a CAD drawing file. func Dwg(raw []byte, _ uint32) bool { if len(raw) < 6 || raw[0] != 0x41 || raw[1] != 0x43 { return false } dwgVersions := [][]byte{ {0x31, 0x2E, 0x34, 0x30}, {0x31, 0x2E, 0x35, 0x30}, {0x32, 0x2E, 0x31, 0x30}, {0x31, 0x30, 0x30, 0x32}, {0x31, 0x30, 0x30, 0x33}, {0x31, 0x30, 0x30, 0x34}, {0x31, 0x30, 0x30, 0x36}, {0x31, 0x30, 0x30, 0x39}, {0x31, 0x30, 0x31, 0x32}, {0x31, 0x30, 0x31, 0x34}, {0x31, 0x30, 0x31, 0x35}, {0x31, 0x30, 0x31, 0x38}, {0x31, 0x30, 0x32, 0x31}, {0x31, 0x30, 0x32, 0x34}, {0x31, 0x30, 0x33, 0x32}, } for _, d := range dwgVersions { if bytes.Equal(raw[2:6], d) { return true } } return false } // Jxl matches JPEG XL image file. func Jxl(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0xFF, 0x0A}) || bytes.HasPrefix(raw, []byte("\x00\x00\x00\x0cJXL\x20\x0d\x0a\x87\x0a")) } // DXF matches Drawing Exchange Format AutoCAD file. // There does not seem to be a clear specification and the files in the wild // differ wildly. // https://images.autodesk.com/adsk/files/autocad_2012_pdf_dxf-reference_enu.pdf // // I collected these signatures by downloading a few dozen files from // http://cd.textfiles.com/amigaenv/DXF/OBJEKTE/ and // https://sembiance.com/fileFormatSamples/poly/dxf/ and then // xxd -l 16 {} | sort | uniq. // These signatures are only for the ASCII version of DXF. There is a binary version too. func DXF(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte(" 0\x0ASECTION\x0A")) || bytes.HasPrefix(raw, []byte(" 0\x0D\x0ASECTION\x0D\x0A")) || bytes.HasPrefix(raw, []byte("0\x0ASECTION\x0A")) || bytes.HasPrefix(raw, []byte("0\x0D\x0ASECTION\x0D\x0A")) } ================================================ FILE: modules/mime/internal/magic/magic.go ================================================ // Package magic holds the matching functions used to find MIME types. package magic import ( "bytes" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) type ( // Detector receiveѕ the raw data of a file and returns whether the data // meets any conditions. The limit parameter is an upper limit to the number // of bytes received and is used to tell if the byte slice represents the // whole file or is just the header of a file: len(raw) < limit or len(raw)>limit. Detector func(raw []byte, limit uint32) bool xmlSig struct { // the local name of the root tag localName []byte // the namespace of the XML document xmlns []byte } ) // offset returns true if the provided signature can be // found at offset in the raw input. func offset(raw []byte, sig []byte, offset int) bool { return len(raw) > offset && bytes.HasPrefix(raw[offset:], sig) } // ciPrefix is like prefix but the check is case insensitive. func ciPrefix(raw []byte, sigs ...[]byte) bool { for _, s := range sigs { if ciCheck(s, raw) { return true } } return false } func ciCheck(sig, raw []byte) bool { if len(raw) < len(sig)+1 { return false } // perform case insensitive check for i, b := range sig { db := raw[i] if 'A' <= b && b <= 'Z' { db &= 0xDF } if b != db { return false } } return true } // xml returns true if any of the provided XML signatures matches the raw input. func xml(b scan.Bytes, sigs ...xmlSig) bool { b.TrimLWS() if len(b) == 0 { return false } for _, s := range sigs { if xmlCheck(s, b) { return true } } return false } func xmlCheck(sig xmlSig, raw []byte) bool { raw = raw[:min(len(raw), 512)] if len(sig.localName) == 0 { return bytes.Index(raw, sig.xmlns) > 0 } if len(sig.xmlns) == 0 { return bytes.Index(raw, sig.localName) > 0 } localNameIndex := bytes.Index(raw, sig.localName) return localNameIndex != -1 && localNameIndex < bytes.Index(raw, sig.xmlns) } // markup returns true is any of the HTML signatures matches the raw input. func markup(b scan.Bytes, sigs ...[]byte) bool { if bytes.HasPrefix(b, []byte{0xEF, 0xBB, 0xBF}) { // We skip the UTF-8 BOM if present to ensure we correctly // process any leading whitespace. The presence of the BOM // is taken into account during charset detection in charset.go. b.Advance(3) } b.TrimLWS() if len(b) == 0 { return false } for _, s := range sigs { if markupCheck(s, b) { return true } } return false } func markupCheck(sig, raw []byte) bool { if len(raw) < len(sig)+1 { return false } // perform case insensitive check for i, b := range sig { db := raw[i] if 'A' <= b && b <= 'Z' { db &= 0xDF } if b != db { return false } } // Next byte must be space or right angle bracket. if db := raw[len(sig)]; !scan.ByteIsWS(db) && db != '>' { return false } return true } // ftyp returns true if any of the FTYP signatures matches the raw input. func ftyp(raw []byte, sigs ...[]byte) bool { if len(raw) < 12 { return false } for _, s := range sigs { if bytes.Equal(raw[8:12], s) { return true } } return false } type shebangSig struct { sig []byte flag scan.Flags } // A valid shebang starts with the "#!" characters, // followed by any number of spaces, // followed by the path to the interpreter, // and, optionally, followed by the arguments for the interpreter. // // Ex: // // #! /usr/bin/env php // // /usr/bin/env is the interpreter, php is the first and only argument. func shebang(b scan.Bytes, sigs ...shebangSig) bool { line := b.Line() if len(line) < 2 || line[0] != '#' || line[1] != '!' { return false } line = line[2:] line.TrimLWS() for _, s := range sigs { if line.Match(s.sig, s.flag) != -1 { return true } } return false } ================================================ FILE: modules/mime/internal/magic/magic_test.go ================================================ package magic import ( "testing" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) func TestShebangCheck(t *testing.T) { tests := []struct { name string sig []byte input string flags scan.Flags expected bool }{ // Valid shebangs { name: "valid bash shebang", sig: []byte("/bin/bash"), input: "#!/bin/bash", flags: scan.CompactWS, expected: true, }, { name: "valid bash shebang with spaces", sig: []byte("/bin/bash"), input: "#! /bin/bash", flags: scan.CompactWS, expected: true, }, { name: "valid bash shebang with multiple spaces", // #762 sig: []byte("/bin/env bash"), input: "#! /bin/env bash", flags: scan.CompactWS | scan.FullWord, expected: true, }, { name: "valid bash shebang with tabs", sig: []byte("/bin/bash"), input: "#!\t/bin/bash", flags: scan.CompactWS, expected: true, }, { name: "valid bash shebang with mixed whitespace", sig: []byte("/bin/bash"), input: "#! \t /bin/bash", flags: scan.CompactWS, expected: true, }, { name: "valid bash shebang with trailing whitespace", sig: []byte("/bin/bash"), input: "#! /bin/bash \t ", flags: scan.CompactWS, expected: true, }, { name: "valid bash shebang with arguments", sig: []byte("/bin/bash"), input: "#!/bin/bash -exu", flags: scan.CompactWS, expected: true, }, { name: "valid env/python shebang", sig: []byte("/usr/bin/env python"), input: "#!/usr/bin/env python", flags: scan.CompactWS, expected: true, }, { name: "valid env/python shebang with spaces", sig: []byte("/usr/bin/env python"), input: "#! /usr/bin/env python", flags: scan.CompactWS, expected: true, }, { name: "valid env -S/python shebang with arguments", sig: []byte("/usr/bin/env -S python"), input: "#!/usr/bin/env -S python -u", flags: scan.CompactWS, expected: true, }, { name: "valid env/python shebang with arguments", sig: []byte("/usr/bin/env python"), input: "#!/usr/bin/env python -u", flags: scan.CompactWS, expected: true, }, { name: "valid env/python shebang with arguments and trailing ws", sig: []byte("/usr/bin/env python"), input: "#!/usr/bin/env python -u \n", flags: scan.CompactWS, expected: true, }, // Invalid shebangs { name: "missing shebang prefix", sig: []byte("/bin/bash"), input: "/bin/bash", flags: scan.CompactWS, expected: false, }, { name: "wrong shebang prefix", sig: []byte("/bin/bash"), input: "##!/bin/bash", flags: scan.CompactWS, expected: false, }, { name: "wrong shebang prefix 2", sig: []byte("/bin/bash"), input: "!#/bin/bash", flags: scan.CompactWS, expected: false, }, { name: "wrong interpreter path", sig: []byte("/bin/bash"), input: "#!/bin/sh", flags: scan.CompactWS, expected: false, }, { name: "partial interpreter path", sig: []byte("/bin/bash"), input: "#!/bin/bas", flags: scan.CompactWS, expected: false, }, { name: "extra characters after interpreter", sig: []byte("/bin/bash"), input: "#!/bin/bashx", flags: scan.CompactWS, expected: true, }, { name: "extra characters after interpreter but FullWord", sig: []byte("/bin/bash"), input: "#!/bin/bashx", flags: scan.CompactWS | scan.FullWord, expected: false, }, { name: "extra characters after env interpreter", sig: []byte("/usr/bin/env bash"), input: "#!/usr/bin/env bash123", flags: scan.CompactWS, expected: true, }, { name: "extra characters after env interpreter but FullWord", sig: []byte("/usr/bin/env bash"), input: "#!/usr/bin/env bash123", flags: scan.CompactWS | scan.FullWord, expected: false, }, // Edge cases { name: "empty input", sig: []byte("/bin/bash"), input: "", flags: scan.CompactWS, expected: false, }, { name: "too short input", sig: []byte("/bin/bash"), input: "#!", flags: scan.CompactWS, expected: false, }, { name: "just shebang prefix", sig: []byte("/bin/bash"), input: "#!", flags: scan.CompactWS, expected: false, }, { name: "shebang with only spaces", sig: []byte("/bin/bash"), input: "#! ", flags: scan.CompactWS, expected: false, }, { name: "shebang with only tabs", sig: []byte("/bin/bash"), input: "#!\t\t", flags: scan.CompactWS, expected: false, }, { name: "empty signature", sig: []byte(""), input: "#!", flags: scan.CompactWS, expected: true, }, { name: "empty signature with spaces", sig: []byte(""), input: "#! ", flags: scan.CompactWS, expected: true, }, { name: "signature longer than input", sig: []byte("/very/long/path/to/interpreter"), input: "#!/bin/bash", flags: scan.CompactWS, expected: false, }, { name: "case sensitivity test", sig: []byte("/bin/bash"), input: "#!/BIN/BASH", flags: scan.CompactWS, expected: false, }, { name: "case sensitivity test 2", sig: []byte("/BIN/BASH"), input: "#!/bin/bash", flags: scan.CompactWS, expected: false, }, { name: "case sensitivity test 2", sig: []byte("/BIN/BASH"), input: "#!/bin/bash", flags: scan.CompactWS, expected: false, }, { name: "shebang split in multiple lines", sig: []byte("/bin/env bash"), input: "#!/bin/env\nbash", flags: scan.CompactWS | scan.FullWord, expected: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := shebang([]byte(tt.input), shebangSig{tt.sig, tt.flags}) if result != tt.expected { t.Errorf("shebang(%q, %q) = %v, want %v", tt.sig, tt.input, result, tt.expected) } }) } } ================================================ FILE: modules/mime/internal/magic/meteo.go ================================================ package magic import "bytes" // GRIB matches a GRIdded Binary meteorological file. // https://www.nco.ncep.noaa.gov/pmb/docs/on388/ // https://www.nco.ncep.noaa.gov/pmb/docs/grib2/grib2_doc/ func GRIB(raw []byte, _ uint32) bool { return len(raw) > 7 && bytes.HasPrefix(raw, []byte("GRIB")) && (raw[7] == 1 || raw[7] == 2) } // BUFR matches meteorological data format for storing point or time series data. // https://confluence.ecmwf.int/download/attachments/31064617/ecCodes_BUFR_in_a_nutshell.pdf?version=1&modificationDate=1457000352419&api=v2 func BUFR(raw []byte, _ uint32) bool { return len(raw) > 7 && bytes.HasPrefix(raw, []byte("BUFR")) && (raw[7] == 0x03 || raw[7] == 0x04) } ================================================ FILE: modules/mime/internal/magic/ms_office.go ================================================ package magic import ( "bytes" "encoding/binary" ) // Xlsx matches a Microsoft Excel 2007 file. func Xlsx(raw []byte, limit uint32) bool { return msoxml(raw, zipEntries{{ name: []byte("xl/"), dir: true, }}, 100) } // Docx matches a Microsoft Word 2007 file. func Docx(raw []byte, limit uint32) bool { return msoxml(raw, zipEntries{{ name: []byte("word/"), dir: true, }}, 100) } // Pptx matches a Microsoft PowerPoint 2007 file. func Pptx(raw []byte, limit uint32) bool { return msoxml(raw, zipEntries{{ name: []byte("ppt/"), dir: true, }}, 100) } // Visio matches a Microsoft Visio 2013+ file. func Visio(raw []byte, limit uint32) bool { return msoxml(raw, zipEntries{{ name: []byte("visio/"), dir: true, }}, 100) } // Ole matches an Open Linking and Embedding file. // // https://en.wikipedia.org/wiki/Object_Linking_and_Embedding func Ole(raw []byte, limit uint32) bool { return bytes.HasPrefix(raw, []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}) } // Doc matches a Microsoft Word 97-2003 file. // See: https://github.com/decalage2/oletools/blob/412ee36ae45e70f42123e835871bac956d958461/oletools/common/clsid.py func Doc(raw []byte, _ uint32) bool { clsids := [][]byte{ // Microsoft Word 97-2003 Document (Word.Document.8) {0x06, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}, // Microsoft Word 6.0-7.0 Document (Word.Document.6) {0x00, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}, // Microsoft Word Picture (Word.Picture.8) {0x07, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}, } for _, clsid := range clsids { if matchOleClsid(raw, clsid) { return true } } return false } // Ppt matches a Microsoft PowerPoint 97-2003 file or a PowerPoint 95 presentation. func Ppt(raw []byte, limit uint32) bool { // Root CLSID test is the safest way to detect identify OLE, however, the format // often places the root CLSID at the end of the file. if matchOleClsid(raw, []byte{ 0x10, 0x8d, 0x81, 0x64, 0x9b, 0x4f, 0xcf, 0x11, 0x86, 0xea, 0x00, 0xaa, 0x00, 0xb9, 0x29, 0xe8, }) || matchOleClsid(raw, []byte{ 0x70, 0xae, 0x7b, 0xea, 0x3b, 0xfb, 0xcd, 0x11, 0xa9, 0x03, 0x00, 0xaa, 0x00, 0x51, 0x0e, 0xa3, }) { return true } lin := len(raw) if lin < 520 { return false } pptSubHeaders := [][]byte{ {0xA0, 0x46, 0x1D, 0xF0}, {0x00, 0x6E, 0x1E, 0xF0}, {0x0F, 0x00, 0xE8, 0x03}, } for _, h := range pptSubHeaders { if bytes.HasPrefix(raw[512:], h) { return true } } if bytes.HasPrefix(raw[512:], []byte{0xFD, 0xFF, 0xFF, 0xFF}) && raw[518] == 0x00 && raw[519] == 0x00 { return true } return lin > 1152 && bytes.Contains(raw[1152:min(4096, lin)], []byte("P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00 D\x00o\x00c\x00u\x00m\x00e\x00n\x00t")) } // Xls matches a Microsoft Excel 97-2003 file. func Xls(raw []byte, limit uint32) bool { // Root CLSID test is the safest way to detect identify OLE, however, the format // often places the root CLSID at the end of the file. if matchOleClsid(raw, []byte{ 0x10, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, }) || matchOleClsid(raw, []byte{ 0x20, 0x08, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, }) { return true } lin := len(raw) if lin < 520 { return false } xlsSubHeaders := [][]byte{ {0x09, 0x08, 0x10, 0x00, 0x00, 0x06, 0x05, 0x00}, {0xFD, 0xFF, 0xFF, 0xFF, 0x10}, {0xFD, 0xFF, 0xFF, 0xFF, 0x1F}, {0xFD, 0xFF, 0xFF, 0xFF, 0x22}, {0xFD, 0xFF, 0xFF, 0xFF, 0x23}, {0xFD, 0xFF, 0xFF, 0xFF, 0x28}, {0xFD, 0xFF, 0xFF, 0xFF, 0x29}, } for _, h := range xlsSubHeaders { if bytes.HasPrefix(raw[512:], h) { return true } } return lin > 1152 && bytes.Contains(raw[1152:min(4096, lin)], []byte("W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k")) } // Pub matches a Microsoft Publisher file. func Pub(raw []byte, limit uint32) bool { return matchOleClsid(raw, []byte{ 0x01, 0x12, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, }) } // Msg matches a Microsoft Outlook email file. func Msg(raw []byte, limit uint32) bool { return matchOleClsid(raw, []byte{ 0x0B, 0x0D, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, }) } // Msi matches a Microsoft Windows Installer file. // http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File func Msi(raw []byte, limit uint32) bool { return matchOleClsid(raw, []byte{ 0x84, 0x10, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, }) } // One matches a Microsoft OneNote file. func One(raw []byte, limit uint32) bool { return bytes.HasPrefix(raw, []byte{ 0xe4, 0x52, 0x5c, 0x7b, 0x8c, 0xd8, 0xa7, 0x4d, 0xae, 0xb1, 0x53, 0x78, 0xd0, 0x29, 0x96, 0xd3, }) } // Helper to match by a specific CLSID of a compound file. // // http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File func matchOleClsid(in []byte, clsid []byte) bool { // Microsoft Compound files v3 have a sector length of 512, while v4 has 4096. // Change sector offset depending on file version. // https://www.loc.gov/preservation/digital/formats/fdd/fdd000392.shtml sectorLength := 512 if len(in) < sectorLength { return false } if in[26] == 0x04 && in[27] == 0x00 { sectorLength = 4096 } // SecID of first sector of the directory stream. firstSecID := int(binary.LittleEndian.Uint32(in[48:52])) // Expected offset of CLSID for root storage object. clsidOffset := sectorLength*(1+firstSecID) + 80 // #731 offset is outside in or wrapped around due to integer overflow. if len(in) <= clsidOffset+16 || clsidOffset < 0 { return false } return bytes.HasPrefix(in[clsidOffset:], clsid) } // WPD matches a WordPerfect document. func WPD(raw []byte, _ uint32) bool { if len(raw) < 10 { return false } if !bytes.HasPrefix(raw, []byte("\xffWPC")) { return false } return raw[8] == 1 && raw[9] == 10 } ================================================ FILE: modules/mime/internal/magic/netpbm.go ================================================ package magic import ( "bytes" "strconv" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // NetPBM matches a Netpbm Portable BitMap ASCII/Binary file. // // See: https://en.wikipedia.org/wiki/Netpbm func NetPBM(raw []byte, _ uint32) bool { return netp(raw, "P1\n", "P4\n") } // NetPGM matches a Netpbm Portable GrayMap ASCII/Binary file. // // See: https://en.wikipedia.org/wiki/Netpbm func NetPGM(raw []byte, _ uint32) bool { return netp(raw, "P2\n", "P5\n") } // NetPPM matches a Netpbm Portable PixMap ASCII/Binary file. // // See: https://en.wikipedia.org/wiki/Netpbm func NetPPM(raw []byte, _ uint32) bool { return netp(raw, "P3\n", "P6\n") } // NetPAM matches a Netpbm Portable Arbitrary Map file. // // See: https://en.wikipedia.org/wiki/Netpbm func NetPAM(raw []byte, _ uint32) bool { if !bytes.HasPrefix(raw, []byte("P7\n")) { return false } w, h, d, m, e := false, false, false, false, false s := scan.Bytes(raw) var l scan.Bytes // Read line by line. for range 128 { l = s.Line() // If the line is empty or a comment, skip. if len(l) == 0 || l.Peek() == '#' { if len(s) == 0 { return false } continue } else if bytes.HasPrefix(l, []byte("TUPLTYPE")) { continue } else if bytes.HasPrefix(l, []byte("WIDTH ")) { w = true } else if bytes.HasPrefix(l, []byte("HEIGHT ")) { h = true } else if bytes.HasPrefix(l, []byte("DEPTH ")) { d = true } else if bytes.HasPrefix(l, []byte("MAXVAL ")) { m = true } else if bytes.HasPrefix(l, []byte("ENDHDR")) { e = true } // When we reached header, return true if we collected all four required headers. // WIDTH, HEIGHT, DEPTH and MAXVAL. if e { return w && h && d && m } } return false } func netp(s scan.Bytes, prefixes ...string) bool { foundPrefix := "" for _, p := range prefixes { if bytes.HasPrefix(s, []byte(p)) { foundPrefix = p } } if foundPrefix == "" { return false } s.Advance(len(foundPrefix)) // jump over P1, P2, P3, etc. var l scan.Bytes // Read line by line. for range 128 { l = s.Line() // If the line is a comment, skip. if l.Peek() == '#' { continue } // If line has leading whitespace, then skip over whitespace. for scan.ByteIsWS(l.Peek()) { l.Advance(1) } if len(s) == 0 || len(l) > 0 { break } } // At this point l should be the two integers denoting the size of the matrix. width := l.PopUntil(scan.ASCIISpaces...) for scan.ByteIsWS(l.Peek()) { l.Advance(1) } height := l.PopUntil(scan.ASCIISpaces...) w, errw := strconv.ParseInt(string(width), 10, 64) h, errh := strconv.ParseInt(string(height), 10, 64) return errw == nil && errh == nil && w > 0 && h > 0 } ================================================ FILE: modules/mime/internal/magic/ogg.go ================================================ package magic import ( "bytes" ) /* NOTE: In May 2003, two Internet RFCs were published relating to the format. The Ogg bitstream was defined in RFC 3533 (which is classified as 'informative') and its Internet content type (application/ogg) in RFC 3534 (which is, as of 2006, a proposed standard protocol). In September 2008, RFC 3534 was obsoleted by RFC 5334, which added content types video/ogg, audio/ogg and filename extensions .ogx, .ogv, .oga, .spx. See: https://tools.ietf.org/html/rfc3533 https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type https://github.com/file/file/blob/master/magic/Magdir/vorbis */ // Ogg matches an Ogg file. func Ogg(raw []byte, limit uint32) bool { return bytes.HasPrefix(raw, []byte("\x4F\x67\x67\x53\x00")) } // OggAudio matches an audio ogg file. func OggAudio(raw []byte, limit uint32) bool { return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x7fFLAC")) || bytes.HasPrefix(raw[28:], []byte("\x01vorbis")) || bytes.HasPrefix(raw[28:], []byte("OpusHead")) || bytes.HasPrefix(raw[28:], []byte("Speex\x20\x20\x20"))) } // OggVideo matches a video ogg file. func OggVideo(raw []byte, limit uint32) bool { return len(raw) >= 37 && (bytes.HasPrefix(raw[28:], []byte("\x80theora")) || bytes.HasPrefix(raw[28:], []byte("fishead\x00")) || bytes.HasPrefix(raw[28:], []byte("\x01video\x00\x00\x00"))) // OGM video } ================================================ FILE: modules/mime/internal/magic/text.go ================================================ package magic import ( "bytes" "time" "github.com/antgroup/hugescm/modules/mime/internal/charset" "github.com/antgroup/hugescm/modules/mime/internal/json" mkup "github.com/antgroup/hugescm/modules/mime/internal/markup" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // HTML matches a Hypertext Markup Language file. func HTML(raw []byte, _ uint32) bool { return markup(raw, []byte(" 0 && firstNonWS == '[') hasTargetTok = hasTargetTok || (t&json.TokObject > 0 && firstNonWS == '{') } if !hasTargetTok { return false } lraw := len(raw) parsed, inspected, _, querySatisfied := json.Parse(q, raw) if !querySatisfied { return false } // If the full file content was provided, check that the whole input was parsed. if limit == 0 || lraw < int(limit) { return parsed == lraw } // If a section of the file was provided, check if all of it was inspected. // In other words, check that if there was a problem parsing, that problem // occurred after the last byte in the input. return inspected == lraw && lraw > 0 } // NdJSON matches a Newline delimited JSON file. All complete lines from raw // must be valid JSON documents meaning they contain one of the valid JSON data // types. func NdJSON(raw []byte, limit uint32) bool { lCount, objOrArr := 0, 0 s := scan.Bytes(raw) var l scan.Bytes for len(s) != 0 { l = s.Line() _, inspected, firstToken, _ := json.Parse(json.QueryNone, l) if len(l) != inspected { return false } if firstToken == json.TokArray || firstToken == json.TokObject { objOrArr++ } lCount++ } return lCount > 1 && objOrArr > 0 } // Svg matches a SVG file. func Svg(raw []byte, limit uint32) bool { return svgWithoutXMLDeclaration(raw) || svgWithXMLDeclaration(raw) } // svgWithoutXMLDeclaration matches a SVG image that does not have an XML header. // Example: // // // // // func svgWithoutXMLDeclaration(s scan.Bytes) bool { for scan.ByteIsWS(s.Peek()) { s.Advance(1) } for mkup.SkipAComment(&s) { } if !bytes.HasPrefix(s, []byte(" // // // func svgWithXMLDeclaration(s scan.Bytes) bool { for scan.ByteIsWS(s.Peek()) { s.Advance(1) } if !bytes.HasPrefix(s, []byte(" 4096 { s = s[:4096] } return hasVersion && bytes.Contains(s, []byte(" 00:02:19,376) limits second line // length to exactly 29 characters. if len(line) != 29 { return false } // Decimal separator of fractional seconds in the timestamps must be a // comma, not a period. if bytes.IndexByte(line, '.') != -1 { return false } sep := []byte(" --> ") i := bytes.Index(line, sep) if i == -1 { return false } const layout = "15:04:05,000" t0, err := time.Parse(layout, string(line[:i])) if err != nil { return false } t1, err := time.Parse(layout, string(line[i+len(sep):])) if err != nil { return false } if t0.After(t1) { return false } line = s.Line() // A third line must exist and not be empty. This is the actual subtitle text. return len(line) != 0 } // Vtt matches a Web Video Text Tracks (WebVTT) file. See // https://www.iana.org/assignments/media-types/text/vtt. func Vtt(raw []byte, limit uint32) bool { // Prefix match. prefixes := [][]byte{ {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // UTF-8 BOM, "WEBVTT" and a line feed {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // UTF-8 BOM, "WEBVTT" and a carriage return {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // UTF-8 BOM, "WEBVTT" and a space {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // UTF-8 BOM, "WEBVTT" and a horizontal tab {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // "WEBVTT" and a line feed {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // "WEBVTT" and a carriage return {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // "WEBVTT" and a space {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // "WEBVTT" and a horizontal tab } for _, p := range prefixes { if bytes.HasPrefix(raw, p) { return true } } // Exact match. return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT" bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT" } type rfc822Hint struct { h []byte matchFlags scan.Flags } // The hints come from libmagic, but the implementation is bit different. libmagic // only checks if the file starts with the hint, while we additionally look for // a secondary hint in the first few lines of input. func RFC822(raw []byte, limit uint32) bool { b := scan.Bytes(raw) // Keep hints here to avoid instantiating them several times in lineHasRFC822Hint. // The alternative is to make them a package level var, but then they'd go // on the heap. // Some of the hints are IgnoreCase, some not. I selected based on what libmagic // does and based on personal observations from sample files. hints := []rfc822Hint{ {[]byte("From: "), 0}, {[]byte("To: "), 0}, {[]byte("CC: "), scan.IgnoreCase}, {[]byte("Date: "), 0}, {[]byte("Subject: "), 0}, {[]byte("Received: "), 0}, {[]byte("Relay-Version: "), 0}, {[]byte("#! rnews"), 0}, {[]byte("N#! rnews"), 0}, {[]byte("Forward to"), 0}, {[]byte("Pipe to"), 0}, {[]byte("DELIVERED-TO: "), scan.IgnoreCase}, {[]byte("RETURN-PATH: "), scan.IgnoreCase}, {[]byte("Content-Type: "), 0}, {[]byte("Content-Transfer-Encoding: "), 0}, } if !lineHasRFC822Hint(b.Line(), hints) { return false } for range 20 { if lineHasRFC822Hint(b.Line(), hints) { return true } } return false } func lineHasRFC822Hint(b scan.Bytes, hints []rfc822Hint) bool { for _, h := range hints { if b.Match(h.h, h.matchFlags) > -1 { return true } } return false } ================================================ FILE: modules/mime/internal/magic/text_csv.go ================================================ package magic import ( "github.com/antgroup/hugescm/modules/mime/internal/csv" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // CSV matches a comma-separated values file. func CSV(raw []byte, limit uint32) bool { return sv(raw, ',', limit) } // TSV matches a tab-separated values file. func TSV(raw []byte, limit uint32) bool { return sv(raw, '\t', limit) } func sv(in []byte, comma byte, limit uint32) bool { s := scan.Bytes(in) s.DropLastLine(limit) r := csv.NewParser(comma, '#', s) headerFields, _, hasMore := r.CountFields(false) if headerFields < 2 || !hasMore { return false } csvLines := 1 // 1 for header for { fields, _, hasMore := r.CountFields(false) if !hasMore && fields == 0 { break } csvLines++ if fields != headerFields { return false } if csvLines >= 10 { return true } } return csvLines >= 2 } ================================================ FILE: modules/mime/internal/magic/text_test.go ================================================ package magic import ( "strings" "testing" ) // Benchmark JSON inputs that can cause slow-downs. func BenchmarkJSONPathological(b *testing.B) { const n = 1000 hugeArray := []byte( strings.Repeat("[1,", n) + `2,3,"abc",true,false,null` + strings.Repeat("]", n)) hugeObject := []byte( strings.Repeat(`{"a": 1, "b":`, n) + `{"c":[2,3,"abc",true,false,null]}` + strings.Repeat("}", n)) b.ReportAllocs() for b.Loop() { if !JSON(hugeArray, 0) { b.Fatal("huge array should be JSON") } if !JSON(hugeObject, 0) { b.Fatal("huge object should be JSON") } GeoJSON(hugeArray, 0) GeoJSON(hugeObject, 0) HAR(hugeArray, 0) HAR(hugeObject, 0) GLTF(hugeArray, 0) GLTF(hugeObject, 0) NdJSON(hugeArray, 0) NdJSON(hugeObject, 0) } } func TestRFC822(t *testing.T) { testcases := []struct { name string in string expected bool }{{ "empty", "", false, }, { "one hint", "Cc: cc@mail.com", false, }, { "two identical hints", "Cc: cc@mail.com\nCc: cc@mail.com", true, }, { "two different hints", "Cc: cc@mail.com\nTo: to@mail.com", true, }, { "junk at start", "junk\nCc: cc@mail.com\nTo: to@mail.com", false, }, { "junk later", "Cc: cc@mail.com\njunk To: to@mail.com", false, }} for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { got := RFC822([]byte(tc.in), 0) if tc.expected != got { t.Errorf("expected: %t, got: %t", tc.expected, got) } }) } } ================================================ FILE: modules/mime/internal/magic/video.go ================================================ package magic import ( "bytes" ) // Flv matches a Flash video file. func Flv(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte("\x46\x4C\x56\x01")) } // Asf matches an Advanced Systems Format file. func Asf(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{ 0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C, }) } // Rmvb matches a RealMedia Variable Bitrate file. func Rmvb(raw []byte, _ uint32) bool { return bytes.HasPrefix(raw, []byte{0x2E, 0x52, 0x4D, 0x46}) } // WebM matches a WebM file. func WebM(raw []byte, limit uint32) bool { return isMatroskaFileTypeMatched(raw, "webm") } // Mkv matches a mkv file. func Mkv(raw []byte, limit uint32) bool { return isMatroskaFileTypeMatched(raw, "matroska") } // isMatroskaFileTypeMatched is used for webm and mkv file matching. // It checks for .Eߣ sequence. If the sequence is found, // then it means it is Matroska media container, including WebM. // Then it verifies which of the file type it is representing by matching the // file specific string. func isMatroskaFileTypeMatched(in []byte, flType string) bool { if bytes.HasPrefix(in, []byte("\x1A\x45\xDF\xA3")) { return isFileTypeNamePresent(in, flType) } return false } // isFileTypeNamePresent accepts the matroska input data stream and searches // for the given file type in the stream. Return whether a match is found. // The logic of search is: find first instance of \x42\x82 and then // search for given string after n bytes of above instance. func isFileTypeNamePresent(in []byte, flType string) bool { maxInd, lenIn := 4096, len(in) if lenIn < maxInd { // restricting length to 4096 maxInd = lenIn } ind := bytes.Index(in[:maxInd], []byte("\x42\x82")) if ind > 0 && lenIn > ind+2 { ind += 2 // filetype name will be present exactly // n bytes after the match of the two bytes "\x42\x82" n := vintWidth(int(in[ind])) if lenIn > ind+n { return bytes.HasPrefix(in[ind+n:], []byte(flType)) } } return false } // vintWidth parses the variable-integer width in matroska containers func vintWidth(v int) int { mask, nTimes, num := 128, 8, 1 for num < nTimes && v&mask == 0 { mask >>= 1 num++ } return num } // Mpeg matches a Moving Picture Experts Group file. func Mpeg(raw []byte, limit uint32) bool { return len(raw) > 3 && bytes.HasPrefix(raw, []byte{0x00, 0x00, 0x01}) && raw[3] >= 0xB0 && raw[3] <= 0xBF } // Avi matches an Audio Video Interleaved file. func Avi(raw []byte, limit uint32) bool { return len(raw) > 16 && bytes.Equal(raw[:4], []byte("RIFF")) && bytes.Equal(raw[8:16], []byte("AVI LIST")) } ================================================ FILE: modules/mime/internal/magic/zip.go ================================================ package magic import ( "bytes" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // Odt matches an OpenDocument Text file. func Odt(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.text"), 30) } // Ott matches an OpenDocument Text Template file. func Ott(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.text-template"), 30) } // Ods matches an OpenDocument Spreadsheet file. func Ods(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"), 30) } // Ots matches an OpenDocument Spreadsheet Template file. func Ots(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"), 30) } // Odp matches an OpenDocument Presentation file. func Odp(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation"), 30) } // Otp matches an OpenDocument Presentation Template file. func Otp(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"), 30) } // Odg matches an OpenDocument Drawing file. func Odg(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics"), 30) } // Otg matches an OpenDocument Drawing Template file. func Otg(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"), 30) } // Odf matches an OpenDocument Formula file. func Odf(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.formula"), 30) } // Odc matches an OpenDocument Chart file. func Odc(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.oasis.opendocument.chart"), 30) } // Epub matches an EPUB file. func Epub(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/epub+zip"), 30) } // Sxc matches an OpenOffice Spreadsheet file. func Sxc(raw []byte, _ uint32) bool { return offset(raw, []byte("mimetypeapplication/vnd.sun.xml.calc"), 30) } // Zip matches a zip archive. func Zip(raw []byte, limit uint32) bool { return len(raw) > 3 && raw[0] == 0x50 && raw[1] == 0x4B && (raw[2] == 0x3 || raw[2] == 0x5 || raw[2] == 0x7) && (raw[3] == 0x4 || raw[3] == 0x6 || raw[3] == 0x8) } // Jar matches a Java archive file. There are two types of Jar files: // 1. the ones that can be opened with jexec and have 0xCAFE optional flag // https://stackoverflow.com/tags/executable-jar/info // 2. regular jars, same as above, just without the executable flag // https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=262278#c0 // There is an argument to only check for manifest, since it's the common nominator // for both executable and non-executable versions. But the traversing zip entries // is unreliable because it does linear search for signatures // (instead of relying on offsets told by the file.) func Jar(raw []byte, limit uint32) bool { return executableJar(raw) || // First entry must be an empty META-INF directory or the manifest. // There is no specification saying that, but the jar reader and writer // implementations from Java do it that way. // https://github.com/openjdk/jdk/blob/88c4678eed818cbe9380f35352e90883fed27d33/src/java.base/share/classes/java/util/jar/JarInputStream.java#L170-L173 zipHas(raw, zipEntries{{ name: []byte("META-INF/"), }, { name: []byte("META-INF/MANIFEST.MF"), }}, 1) } // KMZ matches a zipped KML file, which is "doc.kml" by convention. func KMZ(raw []byte, _ uint32) bool { return zipHas(raw, zipEntries{{ name: []byte("doc.kml"), }}, 100) } // An executable Jar has a 0xCAFE flag enabled in the first zip entry. // The rule from file/file is: // >(26.s+30) leshort 0xcafe Java archive data (JAR) func executableJar(b scan.Bytes) bool { b.Advance(0x1A) offset, ok := b.Uint16() if !ok { return false } b.Advance(int(offset) + 2) cafe, ok := b.Uint16() return ok && cafe == 0xCAFE } // zipIterator iterates over a zip file returning the name of the zip entries // in that file. type zipIterator struct { b scan.Bytes } type zipEntries []struct { name []byte dir bool // dir means checking just the prefix of the entry, not the whole path } func (z zipEntries) match(file []byte) bool { for i := range z { if z[i].dir { if bytes.HasPrefix(file, z[i].name) { return true } } else { if bytes.Equal(file, z[i].name) { return true } } } return false } func zipHas(raw scan.Bytes, searchFor zipEntries, stopAfter int) bool { iter := zipIterator{raw} for range stopAfter { f := iter.next() if len(f) == 0 { break } if searchFor.match(f) { return true } } return false } // msoxml behaves like zipHas, but it puts restrictions on what the first zip // entry can be. func msoxml(raw scan.Bytes, searchFor zipEntries, stopAfter int) bool { iter := zipIterator{raw} for i := range stopAfter { f := iter.next() if len(f) == 0 { break } if searchFor.match(f) { return true } // If the first is not one of the next usually expected entries, // then abort this check. if i == 0 { if !bytes.Equal(f, []byte("[Content_Types].xml")) && // this is a file !bytes.HasPrefix(f, []byte("_rels/")) && // these are directories !bytes.HasPrefix(f, []byte("docProps/")) && !bytes.HasPrefix(f, []byte("customXml/")) && !bytes.HasPrefix(f, []byte("[trash]/")) { return false } } } return false } var zipLocalFileHeader = []byte("PK\003\004") // next extracts the name of the next zip entry. func (i *zipIterator) next() []byte { n := bytes.Index(i.b, zipLocalFileHeader) if n == -1 { return nil } i.b.Advance(n) if !i.b.Advance(0x1A) { return nil } l, ok := i.b.Uint16() if !ok { return nil } if !i.b.Advance(0x02) { return nil } if len(i.b) < int(l) { return nil } return i.b[:l] } // skipZipflingerEntry tries to detect a Zipflinger virtual entry and skips it. // The detection is based on the following properties: // - compression method is 0 // - CRC32 is 0 // - compressed size is 0 // - uncompressed size is 0 // - file name is empty // Returns true if it was found and skipped. func (i *zipIterator) skipZipflingerEntry() (skipped bool) { // Make a backup of the data so the inspection does not loses it. b := i.b defer func() { // If no zipflinger was found, restore the original data. if !skipped { i.b = b } }() n := bytes.Index(i.b, zipLocalFileHeader) if n == -1 { return false } if !i.b.Advance(0x08) { return false } // Check compression method if cm, ok := i.b.Uint16(); !ok || cm != 0 { return false } // Advance up to the CRC32 field if !i.b.Advance(0x04) { return false } // Check CRC32 if crc32, ok := i.b.Uint32(); !ok || crc32 != 0 { return false } // Check compressed size if compressedSize, ok := i.b.Uint32(); !ok || compressedSize != 0 { return false } // Check uncompressed size if uncompressedSize, ok := i.b.Uint32(); !ok || uncompressedSize != 0 { return false } // Check for empty file name if l, ok := i.b.Uint16(); !ok || l != 0 { return false } // Reached a zipflinger virtual entry: skip extra data l, ok := i.b.Uint16() if !ok { return false } if !i.b.Advance(int(l)) { return false } return true } // APK matches an Android Package Archive. // The source of signatures is https://github.com/file/file/blob/1778642b8ba3d947a779a36fcd81f8e807220a19/magic/Magdir/archive#L1820-L1887 func APK(raw []byte, _ uint32) bool { iter := zipIterator{raw} // If a Zipflinger Virtual Entry is detected, then the data is considered APK if iter.skipZipflingerEntry() { return true } return zipHas(iter.b, zipEntries{{ name: []byte("AndroidManifest.xml"), }, { name: []byte("META-INF/com/android/build/gradle/app-metadata.properties"), }, { name: []byte("classes.dex"), }, { name: []byte("resources.arsc"), }, { name: []byte("res/drawable"), }}, 100) } ================================================ FILE: modules/mime/internal/magic/zip_test.go ================================================ package magic import ( "archive/zip" "bytes" "fmt" "io" "testing" ) func createZip(files []string) (*bytes.Buffer, error) { buf := bytes.NewBuffer(nil) w := zip.NewWriter(buf) for _, f := range files { _, err := w.Create(f) if err != nil { return nil, err } } return buf, w.Close() } func createZipUncompressed(content *bytes.Buffer) (*bytes.Buffer, error) { buf := bytes.NewBuffer(nil) w := zip.NewWriter(buf) for i := range 5 { file, err := w.CreateHeader(&zip.FileHeader{ Name: fmt.Sprintf("file%d", i), Method: zip.Store, // Store means 0 compression. }) if err != nil { return nil, err } if _, err := io.Copy(file, content); err != nil { return nil, err } } return buf, w.Close() } func TestZeroZip(t *testing.T) { tcases := []struct { name string files []string xlsx bool docx bool pptx bool jar bool }{{ name: "empty zip", files: nil, }, { name: "no customXml/", files: []string{"foo", "word/"}, }, { name: "customXml/, but no word/", files: []string{"customXml/"}, }, { name: "customXml/, and other files, but no word/", files: []string{"customXml/", "1", "2", "3"}, }, { name: "customXml/, and other files, but word/ is the 7th file", // we only check until 6th file files: []string{"customXml/", "1", "2", "3", "4", "5", "word/"}, docx: true, }, { name: "customXml/, word/ xl/ pptx/ after 5 files", files: []string{"1", "2", "3", "4", "5", "customXml/", "word/", "xl/", "ppt/"}, }, { name: "customXml/, word/", files: []string{"customXml/", "word/"}, docx: true, }, { name: "customXml/, word/with_suffix", files: []string{"customXml/", "word/with_suffix"}, docx: true, }, { name: "customXml/, word/", files: []string{"customXml/", "word/media"}, docx: true, }, { name: "customXml/, xl/", files: []string{"customXml/", "xl/media"}, xlsx: true, }, { name: "customXml/, ppt/", files: []string{"customXml/", "ppt/media"}, pptx: true, }, { name: "manifest file first", files: []string{"META-INF/MANIFEST.MF"}, jar: true, }, { name: "manifest dir first", files: []string{"META-INF/"}, jar: true, }, { name: "META-INF but not manifest first", files: []string{"META-INF/com.github.org", "META-INF/"}, jar: false, }, { name: "manifest second file", files: []string{"1", "META-INF/MANIFEST.MF"}, jar: false, }, { name: "ppt/ after 15 files", files: []string{ "[Content_Types].xml", "_rels/.rels", "customXml/_rels/item1.xml", "customXml/_rels/item2.xml.rels", "customXml/_rels/item3.xml.rels", "customXml/_rels/item4.xml.rels", "customXml/item1.xml", "customXml/item2.xml", "customXml/item3.xml", "customXml/itemProps1.xml", "customXml/itemProps2.xml", "customXml/itemProps3.xml", "docProps/app.xml", "docProps/core.xml", "docProps/custom.xml", "ppt/_rels/presentation.xml.rel", }, pptx: true, }, { // #728 - msoxml directories have to be compared with bytes.HasPrefix. // bytes.Equal worked fine for most office files because [Content_Types].xml // is a file. But for directories, sometimes the zip record is an empty // file, other times it is a file in that directory. To account for these // cases, bytes.HasPrefix is used. name: "docProps dir (not file) is first", files: []string{"docProps/custom.xml", "xl/"}, xlsx: true, }} for i, tc := range tcases { t.Run(tc.name, func(t *testing.T) { buf, err := createZip(tc.files) if err != nil { t.Fatal(err) } docx := Docx(buf.Bytes(), 0) xlsx := Xlsx(buf.Bytes(), 0) pptx := Pptx(buf.Bytes(), 0) jar := Jar(buf.Bytes(), 0) if tc.docx != docx || tc.xlsx != xlsx || tc.pptx != pptx || tc.jar != jar { t.Errorf(` docx xlsx pptx jar %d expected %t %t %t %t; got %t %t %t %t`, i, tc.docx, tc.xlsx, tc.pptx, tc.jar, docx, xlsx, pptx, jar) } // #400 - xlsx, docx, pptx put as is (compression lvl 0) inside a zip // It should continue to get detected as regular zip, not xlsx or docx or pptx. uncompressedZip, err := createZipUncompressed(buf) if err != nil { t.Fatal(err) } docx = Docx(uncompressedZip.Bytes(), 0) xlsx = Xlsx(uncompressedZip.Bytes(), 0) pptx = Pptx(uncompressedZip.Bytes(), 0) jar = Jar(uncompressedZip.Bytes(), 0) if docx || xlsx || pptx || jar { t.Errorf(` uncompressedZip: docx xlsx pptx jar %d expected false false false false got %t %t %t %t`, i, docx, xlsx, pptx, jar) } }) } } func BenchmarkZip(b *testing.B) { buf, err := createZip([]string{ "[Content_Types].xml", "_rels/.rels", "customXml/_rels/item1.xml", "customXml/_rels/item2.xml.rels", "customXml/_rels/item3.xml.rels", "customXml/_rels/item4.xml.rels", "customXml/item1.xml", "customXml/item2.xml", "customXml/item3.xml", "customXml/itemProps1.xml", "customXml/itemProps2.xml", "customXml/itemProps3.xml", "docProps/app.xml", "docProps/core.xml", "docProps/custom.xml", "ppt/_rels/presentation.xml.rel", "xl/_rels/presentation.xml.rel", "word/_rels/presentation.xml.rel", "doc.kml", }) if err != nil { b.Fatal(err) } b.ReportAllocs() for b.Loop() { Docx(buf.Bytes(), 0) Xlsx(buf.Bytes(), 0) Pptx(buf.Bytes(), 0) Jar(buf.Bytes(), 0) KMZ(buf.Bytes(), 0) } } ================================================ FILE: modules/mime/internal/markup/markup.go ================================================ // Package markup implements functions for extracting info from // HTML and XML documents. package markup import ( "bytes" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) // GetAnAttribute assumes we passed over an SGML tag and extracts first // attribute and its value. // // Initially, this code existed inside charset/charset.go, because it was part of // implementing the https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding // algorithm. But because extracting an attribute from a tag is the same for // both HTML and XML, then the code was moved here. func GetAnAttribute(s *scan.Bytes) (name, val []byte, hasMore bool) { for scan.ByteIsWS(s.Peek()) || s.Peek() == '/' { s.Advance(1) } if s.Peek() == '>' { return nil, nil, false } origS, end := *s, 0 // step 4 and 5 for { // bap means byte at position in the specification. bap := s.Pop() if bap == 0 { return nil, nil, false } if bap == '=' && end > 0 { val, hasMore := getAValue(s) return origS[:end], val, hasMore } else if scan.ByteIsWS(bap) { for scan.ByteIsWS(s.Peek()) { s.Advance(1) } if s.Peek() != '=' { return origS[:end], nil, true } s.Advance(1) for scan.ByteIsWS(s.Peek()) { s.Advance(1) } val, hasMore := getAValue(s) return origS[:end], val, hasMore } else if bap == '/' || bap == '>' { return origS[:end], nil, false } else { // for any ASCII, non-ASCII, just advance end++ } } } func getAValue(s *scan.Bytes) (_ []byte, hasMore bool) { for scan.ByteIsWS(s.Peek()) { s.Advance(1) } origS, end := *s, 0 bap := s.Pop() if bap == 0 { return nil, false } end++ // Step 10 switch bap { case '"', '\'': val := s.PopUntil(bap) if s.Pop() != bap { return nil, false } return val, s.Peek() != 0 && s.Peek() != '>' case '>': return nil, false } // Step 11 for { bap = s.Pop() if bap == 0 { return nil, false } switch { case scan.ByteIsWS(bap): return origS[:end], true case bap == '>': return origS[:end], false default: end++ } } } func SkipAComment(s *scan.Bytes) (skipped bool) { if bytes.HasPrefix(*s, []byte("")); i != -1 { s.Advance(i + 2 + 3) // 2 comes from len(). return true } } return false } ================================================ FILE: modules/mime/internal/markup/markup_test.go ================================================ package markup import ( "reflect" "testing" "github.com/antgroup/hugescm/modules/mime/internal/scan" ) var getAnAttributeTestCases = []struct { in string name string value string hasMore bool }{{ "", "", "", false, }, { "''", "", "", false, }, { `""`, "", "", false, }, { `"abc`, "", "", false, }, { "1>", "1", "", false, }, { "A>", "A", "", false, }, { "a>", "a", "", false, }, { "abc>", "abc", "", false, }, { "'abc'", "", "", false, }, { "'abc'>", "'abc'", "", false, }, { // > as attribute ender "meta1=meta>", "meta1", "meta", false, }, { "meta2=META>", "meta2", "META", false, }, { `meta3="meta">`, "meta3", "meta", false, }, { `meta4="'meta">`, "meta4", "'meta", false, }, { " meta5 = meta >", "meta5", "meta", true, }, { " meta6 =' meta '>", "meta6", " meta ", false, }, { ` meta7 =' "meta '>`, "meta7", ` "meta `, false, }, { ` mEtA7 =' "meta '>`, "mEtA7", ` "meta `, false, // / as attribute ender }, { // when the value is unquoted / right after is a parse warning "meta1=meta/", "meta1", "", false, }, { "meta2=META/", "meta2", "", false, }, { "meta3=meta /", "meta3", "meta", true, }, { "meta4=META /", "meta4", "META", true, }, { `meta5="meta"/`, "meta5", "meta", true, }, { `meta6="'meta"/`, "meta6", "'meta", true, }, { " meta7 = meta /", "meta7", "meta", true, }, { " meta8 =' meta '/", "meta8", " meta ", true, }, { ` meta9 =' "meta '/`, "meta9", ` "meta `, true, }, { ` meta0 /`, "meta0", ``, true, }, { "; charset=UTF-8", ";", "", true, }, { ` http-equiv="content-type" content="text/html; charset=iso-8859-15">`, "http-equiv", `content-type`, true, }} func TestGetAnAttribute(t *testing.T) { for _, tc := range getAnAttributeTestCases { t.Run(tc.in, func(t *testing.T) { s := scan.Bytes(tc.in) name, value, hasMore := GetAnAttribute(&s) if string(name) != tc.name { t.Errorf("name: got: %s, want: %s", name, tc.name) } if string(value) != tc.value { t.Errorf("value: got: %s, want: %s", value, tc.value) } if hasMore != tc.hasMore { t.Errorf("hasMore: got: %t, want: %t", hasMore, tc.hasMore) } }) } } func FuzzGetAnAttribute(f *testing.F) { for _, t := range getAnAttributeTestCases { f.Add([]byte(t.in)) } f.Fuzz(func(t *testing.T, d []byte) { s := scan.Bytes(d) GetAnAttribute(&s) }) } var getAValueTestCases = []struct { in string out string hasMore bool }{{ "", "", false, }, { " ", "", false, }, { "''", "", false, }, { `""`, "", false, }, { `"abc`, "", false, }, { ">", "", false, }, { "1>", "1", false, }, { "A>", "A", false, }, { "a>", "a", false, }, { "abc>", "abc", false, }, { "ABCXYZ>", "ABCXYZ", false, }, { "'abc'", "abc", false, }, { "'abc'>", "abc", false, }, { "abc def=ghi", "abc", true, }, { "abc >", "abc", true, }, { "'abc' >", "abc", true, }, { "'ABCXYZ' >", "ABCXYZ", true, }, { `"abc" >`, "abc", true, }} func TestGetAValue(t *testing.T) { for _, tc := range getAValueTestCases { t.Run(tc.in, func(t *testing.T) { s := scan.Bytes(tc.in) got, hasMore := getAValue(&s) if string(got) != tc.out { t.Errorf("got: %s, want: %s", got, tc.out) } if hasMore != tc.hasMore { t.Errorf("hasMore: got: %t, want: %t", hasMore, tc.hasMore) } }) } } func FuzzGetAValue(f *testing.F) { for _, tc := range getAValueTestCases { f.Add([]byte(tc.in)) } f.Fuzz(func(t *testing.T, d []byte) { s := scan.Bytes(d) getAValue(&s) }) } func TestGetAllAttributes(t *testing.T) { tcases := []struct { in string expected [][2]string }{{ "", [][2]string{}, }, { // doesn't have ending > "a", [][2]string{}, }, { // doesn't have ending > "abc", [][2]string{}, }, { "a b c", [][2]string{{"a", ""}, {"b", ""}}, }, { "abc abc abc", [][2]string{{"abc", ""}, {"abc", ""}}, }, { "a=1 b=2 c=3", [][2]string{{"a", "1"}, {"b", "2"}, {"c", ""}}, }, { "a=1 b c=3", [][2]string{{"a", "1"}, {"b", ""}, {"c", ""}}, }, { "a b=2 c", [][2]string{{"a", ""}, {"b", "2"}}, }, { ">", [][2]string{}, }, { "a>", [][2]string{{"a", ""}}, }, { "abc>", [][2]string{{"abc", ""}}, }, { "a b c>", [][2]string{{"a", ""}, {"b", ""}, {"c", ""}}, }, { "a b/ c>", [][2]string{{"a", ""}, {"b", ""}, {"c", ""}}, }, { "/a b/ c>", [][2]string{{"a", ""}, {"b", ""}, {"c", ""}}, }, { "a b abc/>", [][2]string{{"a", ""}, {"b", ""}, {"abc", ""}}, }} getAll := func(in string) [][2]string { s := scan.Bytes(in) ret := [][2]string{} for { name, value, _ := GetAnAttribute(&s) if len(name) == 0 { return ret } ret = append(ret, [2]string{string(name), string(value)}) } } for _, tc := range tcases { t.Run(tc.in, func(t *testing.T) { got := getAll(tc.in) if !reflect.DeepEqual(got, tc.expected) { t.Errorf("got: %v, want: %v", got, tc.expected) } }) } } func TestSkipAComment(t *testing.T) { tcases := []struct { in string out string skipped bool }{{ "", "", false, }, { "abc", "abc", false, }, { "", "", true, // regular comment }, { "", "", true, // the beginning and ending -- are the same chars }} for _, tc := range tcases { t.Run(tc.in, func(t *testing.T) { s := scan.Bytes(tc.in) skipped := SkipAComment(&s) if tc.skipped != skipped { t.Errorf("skipped got: %v, want: %v", skipped, tc.skipped) } if string(s) != tc.out { t.Errorf("got: %v, want: %v", string(s), tc.out) } }) } } ================================================ FILE: modules/mime/internal/scan/bytes.go ================================================ // Package scan has functions for scanning byte slices. package scan import ( "bytes" "encoding/binary" ) // Bytes is a byte slice with helper methods for easier scanning. type Bytes []byte func (b *Bytes) Advance(n int) bool { if n < 0 || len(*b) < n { return false } *b = (*b)[n:] return true } // TrimLWS trims whitespace from beginning of the bytes. func (b *Bytes) TrimLWS() { firstNonWS := 0 for ; firstNonWS < len(*b) && ByteIsWS((*b)[firstNonWS]); firstNonWS++ { } *b = (*b)[firstNonWS:] } // TrimRWS trims whitespace from the end of the bytes. func (b *Bytes) TrimRWS() { lb := len(*b) for lb > 0 && ByteIsWS((*b)[lb-1]) { *b = (*b)[:lb-1] lb-- } } // FirstNonWS returns the first non-whitespace character from b, // or 0x00 if no such character is found. func (b Bytes) FirstNonWS() byte { for i := range b { if ByteIsWS(b[i]) { continue } return b[i] } return 0x00 } // Peek one byte from b or 0x00 if b is empty. func (b *Bytes) Peek() byte { if len(*b) > 0 { return (*b)[0] } return 0 } // Pop one byte from b or 0x00 if b is empty. func (b *Bytes) Pop() byte { if len(*b) > 0 { ret := (*b)[0] *b = (*b)[1:] return ret } return 0 } // PopN pops n bytes from b or nil if b is empty. func (b *Bytes) PopN(n int) []byte { if len(*b) >= n { ret := (*b)[:n] *b = (*b)[n:] return ret } return nil } // PopUntil will advance b until, but not including, the first occurrence of stopAt // character. If no occurrence is found, then it will advance until the end of b. // The returned Bytes is a slice of all the bytes that we're advanced over. func (b *Bytes) PopUntil(stopAt ...byte) Bytes { if len(*b) == 0 { return Bytes{} } i := bytes.IndexAny(*b, string(stopAt)) if i == -1 { i = len(*b) } prefix := (*b)[:i] *b = (*b)[i:] return prefix } // ReadSlice is the same as PopUntil, but the returned value includes stopAt as well. func (b *Bytes) ReadSlice(stopAt byte) Bytes { if len(*b) == 0 { return Bytes{} } i := bytes.IndexByte(*b, stopAt) if i == -1 { i = len(*b) } else { i++ } prefix := (*b)[:i] *b = (*b)[i:] return prefix } // Line returns the first line from b and advances b with the length of the // line. One new line character is trimmed after the line if it exists. func (b *Bytes) Line() Bytes { line := b.PopUntil('\n') lline := len(line) if lline > 0 && line[lline-1] == '\r' { line = line[:lline-1] } b.Advance(1) return line } // DropLastLine drops the last incomplete line from b. // // mimetype limits itself to ReadLimit bytes when performing a detection. // This means, for file formats like CSV for NDJSON, the last line of the input // can be an incomplete line. // If b length is less than readLimit, it means we received an incomplete file // and proceed with dropping the last line. func (b *Bytes) DropLastLine(readLimit uint32) { if readLimit == 0 || uint64(len(*b)) < uint64(readLimit) { return } for i := len(*b) - 1; i > 0; i-- { if (*b)[i] == '\n' { *b = (*b)[:i] return } } } func (b *Bytes) Uint16() (uint16, bool) { if len(*b) < 2 { return 0, false } v := binary.LittleEndian.Uint16(*b) *b = (*b)[2:] return v, true } func (b *Bytes) Uint32() (uint32, bool) { if len(*b) < 4 { return 0, false } v := binary.LittleEndian.Uint32(*b) *b = (*b)[4:] return v, true } func (b *Bytes) Uint32be() (uint32, bool) { if len(*b) < 4 { return 0, false } v := binary.BigEndian.Uint32(*b) *b = (*b)[4:] return v, true } type Flags int const ( // CompactWS will make one whitespace from pattern to match one or more spaces from input. CompactWS Flags = 1 << iota // IgnoreCase will match lower case from pattern with lower case from input. // IgnoreCase will match upper case from pattern with both lower and upper case from input. // This flag is not really well named, IgnoreCase // FullWord ensures the input ends with a full word (it's followed by spaces.) FullWord ) // Search for occurrences of pattern p inside b at any index. // It returns the index where p was found in b and how many bytes were needed // for matching the pattern. func (b Bytes) Search(p []byte, flags Flags) (i int, l int) { lb, lp := len(b), len(p) if lp == 0 { return 0, 0 } if lb == 0 { return -1, 0 } if flags == 0 { if i = bytes.Index(b, p); i == -1 { return -1, 0 } else { return i, lp } } for i := range b { if lb-i < lp { return -1, 0 } if l = b[i:].Match(p, flags); l != -1 { return i, l } } return -1, 0 } // Match returns how many bytes were needed to match pattern p. // It returns -1 if p does not match b. func (b Bytes) Match(p []byte, flags Flags) int { l := len(b) if len(p) == 0 { return 0 } if l == 0 { return -1 } // Some cases we can handle with a simple bytes.HasPrefix. if flags == 0 || flags == FullWord { if bytes.HasPrefix(b, p) { b = b[len(p):] p = p[len(p):] goto out } return -1 } for len(b) > 0 { // If we finished all we were looking for from p. if len(p) == 0 { goto out } if flags&IgnoreCase > 0 && isUpper(p[0]) { if upper(b[0]) != p[0] { return -1 } b, p = b[1:], p[1:] } else if flags&CompactWS > 0 && ByteIsWS(p[0]) { p = p[1:] if !ByteIsWS(b[0]) { return -1 } b = b[1:] if !ByteIsWS(p[0]) { b.TrimLWS() } } else { if b[0] != p[0] { return -1 } b, p = b[1:], p[1:] } } out: // If p still has leftover characters, it means it didn't fully match b. if len(p) > 0 { return -1 } if flags&FullWord > 0 { if len(b) > 0 && !ByteIsWS(b[0]) { return -1 } } return l - len(b) } func isUpper(c byte) bool { return c >= 'A' && c <= 'Z' } func upper(c byte) byte { if c >= 'a' && c <= 'z' { return c - ('a' - 'A') } return c } func ByteIsWS(b byte) bool { return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' ' } var ( ASCIISpaces = []byte{' ', '\r', '\n', '\x0c', '\t'} ASCIIDigits = []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} ) ================================================ FILE: modules/mime/internal/scan/bytes_test.go ================================================ package scan import ( "bufio" "fmt" "io" "strings" "testing" "math/rand" ) func TestPeek(t *testing.T) { tcases := []struct { name string in string peeked byte }{{ "empty", "", 0, }, { "123", "123", '1', }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) peeked := b.Peek() if string(b) != tc.in { t.Errorf("left: got: %s, want: %s", string(b), tc.in) } if peeked != tc.peeked { t.Errorf("peeked: got: %c, want: %c", peeked, tc.peeked) } }) } } func TestPop(t *testing.T) { tcases := []struct { name string in string popped byte left string }{{ "empty", "", 0, "", }, { "123", "123", '1', "23", }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) popped := b.Pop() if string(b) != tc.left { t.Errorf("left: got: %s, want: %s", string(b), tc.left) } if popped != tc.popped { t.Errorf("popped: got: %c, want: %c", popped, tc.popped) } }) } } func TestPopN(t *testing.T) { tcases := []struct { name string in string n int popped string left string }{{ "empty", "", 0, "", "", }, { "1,0", "1", 0, "", "1", }, { "12,0", "12", 0, "", "12", }, { "1,1", "1", 1, "1", "", }, { "12,1", "12", 1, "1", "2", }, { "123,1", "123", 1, "1", "23", }, { "123,2", "123", 2, "12", "3", }, { "123,3", "123", 3, "123", "", }, { "123,4", "123", 4, "", "123", }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) popped := b.PopN(tc.n) if string(b) != tc.left { t.Errorf("left: got: %s, want: %s", string(b), tc.left) } if string(popped) != tc.popped { t.Errorf("popped: got: %s, want: %s", string(popped), tc.popped) } }) } } func TestTrim(t *testing.T) { tcases := []struct { name string in string left string right string }{{ "empty", "", "", "", }, { "one space", " ", "", "", }, { "all spaces", " \r\n\t\x0c", "", "", }, { "one char and spaces", " \r\n\t\x0ca \r\n\t\x0c", "a \r\n\t\x0c", " \r\n\t\x0ca", }, { "one char", "a", "a", "a", }, { // Unicode Ogham space mark "unicode space ogham", " ", " ", " ", }, { // Unicode Em space mark "unicode em space", "\u2003", "\u2003", "\u2003", }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) b.TrimLWS() if string(b) != tc.left { t.Errorf("left: got: %s, want: %s", string(b), tc.left) } b = Bytes(tc.in) b.TrimRWS() if string(b) != tc.right { t.Errorf("right: got: %s, want: %s", string(b), tc.right) } }) } } func TestFirstNonWS(t *testing.T) { tcases := []struct { name string in string c byte }{{ "empty", "", 0x00, }, { "all ws", " ", 0x00, }, { "first char", "a", 'a', }, { "second char", " a", 'a', }, { "space then nil", " \x00", 0x00, }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) c := b.FirstNonWS() if c != tc.c { t.Errorf("got: %x, want: %x", c, tc.c) } }) } } func TestAdvance(t *testing.T) { tcases := []struct { name string in string advance int want string shouldDo bool }{{ "empty 0", "", 0, "", true, }, { "empty 1", "", 1, "", false, }, { "empty -1", "", -1, "", false, }, { "123 0", "123", 0, "123", true, }, { "123 -1", "123", -1, "123", false, }, { "123 1", "123", 1, "23", true, }, { "123 4", "123", 4, "123", false, }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) did := b.Advance(tc.advance) if did != tc.shouldDo { t.Errorf("got: %t, want: %t", did, tc.shouldDo) } if string(b) != tc.want { t.Errorf("got: %s, want: %s", string(b), tc.want) } }) } } func TestLine(t *testing.T) { tcases := []struct { name string in string line string leftover string }{{ "empty", "", "", "", }, { "one line", "abc", "abc", "", }, { "just a \\n", "\n", "", "", }, { "just two \\n", "\n\n", "", "\n", }, { "one line with \\n", "abc\n", "abc", "", }, { "two lines", "abc\ndef", "abc", "def", }, { "two lines with \\n", "abc\ndef\n", "abc", "def\n", }, { "drops final cr", "abc\r", "abc", "", }, { "cr inside line", "abc\rdef", "abc\rdef", "", }, { "nl and cr", "\n\r", "", "\r", }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) line := b.Line() if string(line) != tc.line { t.Errorf("line: got: %s, want: %s", line, []byte(tc.line)) } if string(b) != tc.leftover { t.Errorf("leftover: got: %s, want: %s", b, []byte(tc.leftover)) } // Test if it behaves like bufio.Scanner as well. s := bufio.NewScanner(strings.NewReader(tc.in)) s.Scan() if string(line) != s.Text() { t.Errorf("Bytes.Line not like bufio.Scanner") } }) } } func TestPopUntil(t *testing.T) { tcases := []struct { name string in string untilAny string popped string leftover string }{{ "empty", "", "", "", "", }, { "empty with until", "", "123", "", "", }, { "until empty", "123", "", "123", "", }, { "until 1", "123", "1", "", "123", }, { "until 2", "123", "2", "1", "23", }, { "until 3", "123", "3", "12", "3", }, { "until 4", "123", "4", "123", "", }, { "multiple untilAny", "123", "32", "1", "23", }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) popped := b.PopUntil([]byte(tc.untilAny)...) if string(popped) != tc.popped { t.Errorf("popped: got: %s, want: %s", popped, []byte(tc.popped)) } if string(b) != tc.leftover { t.Errorf("leftover: got: %s, want: %s", b, []byte(tc.leftover)) } }) } } func TestReadSlice(t *testing.T) { tcases := []struct { name string in string stopAt byte popped string leftover string }{{ "both empty", "", 0, "", "", }, { "stop at not found", "abc", 'd', "abc", "", }, { "stop at the end", "abc", 'c', "abc", "", }, { "stop at in the middle", "abcdef", 'c', "abc", "def", }, { "stop at the beginning", "abcdef", 'a', "a", "bcdef", }, { "just one char", "a", 'a', "a", "", }, { "same char twice", "aa", 'a', "a", "a", }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) got := b.ReadSlice(tc.stopAt) if tc.popped != string(got) { t.Errorf("popped got: %s, want: %s", got, tc.popped) } if tc.leftover != string(b) { t.Errorf("leftover got: %s, want: %s", string(b), tc.leftover) } }) } } func TestUint16(t *testing.T) { tcases := []struct { name string in []byte res uint16 ok bool }{{ "empty", nil, 0, false, }, { "too short", []byte{0}, 0, false, }, { "just enough", []byte{1, 0}, 1, true, }, { "longer", []byte{1, 0, 2}, 1, true, }} for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.in) res, ok := b.Uint16() if res != tc.res { t.Errorf("got: %d, want: %d", res, tc.res) } if ok != tc.ok { t.Errorf("ok: got: %t, want: %t", ok, tc.ok) } }) } } var searchTestcases = []struct { name string haystack string needle string flags Flags expectIdx int expectLen int }{{ "empty", "", "", 0, 0, 0, }, { "empty cws", "", "", CompactWS, 0, 0, }, { "empty ic", "", "", IgnoreCase, 0, 0, }, { "just haystack", "abc", "", 0, 0, 0, }, { "just haystack cws", "abc", "", CompactWS, 0, 0, }, { "just haystack ic", "abc", "", IgnoreCase, 0, 0, }, { "just needle", "", "abc", 0, -1, 0, }, { "just needle cws", "", "abc", CompactWS, -1, 0, }, { "just needle ic", "", "abc", IgnoreCase, -1, 0, }, { "simple", "abc", "abc", 0, 0, 3, }, { "not found", "abc", "def", 0, -1, 0, }, { "simple cws", "abc", "abc", CompactWS, 0, 3, }, { "simple ic", "abc", "abc", IgnoreCase, 0, 3, }, { "ic 1 upper", "aBc", "ABC", IgnoreCase, 0, 3, }, { "ic prefixed", "aaBcß", "ABC", IgnoreCase, 1, 3, }, { "ic prefixed utf8", "ßaBcß", "ABC", IgnoreCase, 2, 3, // 2 because ß is 2 bytes long }, { "simple cws|ic", " a", " A", CompactWS | IgnoreCase, 0, 3, }, { "simple cws|ic with suffix and prefix", "a ab", " A", CompactWS | IgnoreCase, 1, 3, }, { "trailing space in input", "a a ", " A", CompactWS | IgnoreCase, 1, 3, }, { "empty haystack with needle cws|ic", "", "abc", CompactWS | IgnoreCase, -1, 0, }, { "empty haystack with needle cws", "", "abc", CompactWS, -1, 0, }} func TestSearch(t *testing.T) { for _, tc := range searchTestcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.haystack) i, l := b.Search([]byte(tc.needle), tc.flags) if i != tc.expectIdx || l != tc.expectLen { t.Errorf("want: %d,%d got: %d,%d", tc.expectIdx, tc.expectLen, i, l) } }) } } func FuzzSearch(f *testing.F) { for _, tc := range searchTestcases { f.Add([]byte(tc.haystack), []byte(tc.needle), int(tc.flags)) } f.Fuzz(func(t *testing.T, haystack, needle []byte, flags int) { b := Bytes(haystack) b.Search(needle, Flags(flags)%CompactWS|IgnoreCase|FullWord) }) } var matchTestcases = []struct { name string b string p string flags Flags expectLen int }{{ "empty", "", "", 0, 0, }, { "empty compact ws", "", "", CompactWS, 0, }, { "empty ic", "", "", IgnoreCase, 0, }, { "empty cws|ic", "", "", CompactWS | IgnoreCase, 0, }, { "simple", "abc", "abc", 0, 3, }, { "simple cws|ic", "abc", "abc", CompactWS | IgnoreCase, 3, }, { "not found", "abc", "def", 0, -1, }, { "simple cws", "abc", "abc", CompactWS, 3, }, { "simple ic", "abc", "abc", IgnoreCase, 3, }, { "ic 1 upper", "aBc", "ABC", IgnoreCase, 3, }, { "ic prefixed", "aaBcß", "ABC", IgnoreCase, -1, }, { "ic prefixed utf8", "ßaBcß", "ABC", IgnoreCase, -1, }, { "simple cws|ic with space", " a", " A", CompactWS | IgnoreCase, 3, }, { "trailing space in input", "a a ", " A", CompactWS | IgnoreCase, -1, }, { "empty b with p", "", "/bin/bash", CompactWS, -1, }, { "failing", "asd", "asdf", IgnoreCase, -1, }, { "exact fw", "abc", "abc", FullWord, 3, }, { "success fw", "abc ", "abc", FullWord, 3, }, { "fail fw", "abcd", "abc", FullWord, -1, }, { // #762 "fw+ic", "abc ", "ABC", FullWord | IgnoreCase, 3, }, { "fw+cws", "a bc d", "a bc", FullWord | CompactWS, 5, }, { "fw+ic+cws", "a bc d", "A BC", FullWord | IgnoreCase | CompactWS, 5, }} func TestMatch(t *testing.T) { for _, tc := range matchTestcases { t.Run(tc.name, func(t *testing.T) { b := Bytes(tc.b) l := b.Match([]byte(tc.p), tc.flags) if l != tc.expectLen { t.Errorf("want: %d got: %d", tc.expectLen, l) } }) } } func FuzzMatch(f *testing.F) { for _, tc := range matchTestcases { f.Add([]byte(tc.b), []byte(tc.p), int(tc.flags)) } f.Fuzz(func(t *testing.T, b, p []byte, flags int) { Bytes(b).Match(p, Flags(flags)%CompactWS|IgnoreCase|FullWord) }) } func BenchmarkMatch(b *testing.B) { r := rand.New(rand.NewSource(0)) randData := make([]byte, 1024) if _, err := io.ReadFull(r, randData); err != io.ErrUnexpectedEOF && err != nil { b.Fatal(err) } b.ReportAllocs() for _, f := range []Flags{ 0, CompactWS, IgnoreCase, FullWord, } { b.Run(fmt.Sprintf("%d", f), func(b *testing.B) { for b.Loop() { Bytes(randData).Match(randData, f) } }) } } ================================================ FILE: modules/mime/mime.go ================================================ package mime import ( stdmime "mime" "slices" "strings" "github.com/antgroup/hugescm/modules/mime/internal/charset" "github.com/antgroup/hugescm/modules/mime/internal/magic" ) // MIME struct holds information about a file format: the string representation // of the MIME type, the extension and the parent file format. type MIME struct { mime string aliases []string extension string // detector receives the raw input and a limit for the number of bytes it is // allowed to check. It returns whether the input matches a signature or not. detector magic.Detector children []*MIME parent *MIME } // String returns the string representation of the MIME type, e.g., "application/zip". func (m *MIME) String() string { return m.mime } // Extension returns the file extension associated with the MIME type. // It includes the leading dot, as in ".html". When the file format does not // have an extension, the empty string is returned. func (m *MIME) Extension() string { return m.extension } // Parent returns the parent MIME type from the hierarchy. // Each MIME type has a non-nil parent, except for the root MIME type. // // For example, the application/json and text/html MIME types have text/plain as // their parent because they are text files who happen to contain JSON or HTML. // Another example is the ZIP format, which is used as container // for Microsoft Office files, EPUB files, JAR files, and others. func (m *MIME) Parent() *MIME { return m.parent } // Is checks whether this MIME type, or any of its aliases, is equal to the // expected MIME type. MIME type equality test is done on the "type/subtype" // section, ignores any optional MIME parameters, ignores any leading and // trailing whitespace, and is case insensitive. func (m *MIME) Is(expectedMIME string) bool { // Parsing is needed because some detected MIME types contain parameters // that need to be stripped for the comparison. expectedMIME, _, _ = stdmime.ParseMediaType(expectedMIME) found, _, _ := stdmime.ParseMediaType(m.mime) return expectedMIME == found || slices.Contains(m.aliases, expectedMIME) } func newMIME( mime, extension string, detector magic.Detector, children ...*MIME) *MIME { m := &MIME{ mime: mime, extension: extension, detector: detector, children: children, } for _, c := range children { c.parent = m } return m } func (m *MIME) alias(aliases ...string) *MIME { m.aliases = aliases return m } // match does a depth-first search on the signature tree. It returns the deepest // successful node for which all the children detection functions fail. func (m *MIME) match(in []byte, readLimit uint32) *MIME { for _, c := range m.children { if c.detector(in, readLimit) { return c.match(in, readLimit) } } needsCharset := map[string]func([]byte) string{ "text/plain": charset.FromPlain, "text/html": charset.FromHTML, "text/xml": charset.FromXML, } charset := "" if f, ok := needsCharset[m.mime]; ok { // The charset comes from BOM, from HTML headers, from XML headers. // Limit the number of bytes searched for to 1024. charset = f(in[:min(len(in), 1024)]) } if m == root || charset == "" { return m } return m.cloneHierarchy(charset) } // Flatten transforms an hierarchy of MIMEs into a slice of MIMEs. func (m *MIME) Flatten() []*MIME { out := []*MIME{m} //nolint:prealloc for _, c := range m.children { out = append(out, c.Flatten()...) } return out } // Hierarchy returns an easy to read list of ancestors for m. // For example, application/json would return json>txt>root. func (m *MIME) Hierarchy() string { var h strings.Builder for m := m; m != nil; m = m.Parent() { e := strings.TrimPrefix(m.Extension(), ".") if e == "" { // There are some MIME without extensions. When generating the hierarchy, // it would be confusing to use empty string as extension. // Use the subtype instead; ex: application/x-executable -> x-executable. e = strings.Split(m.String(), "/")[1] if m.Is("application/octet-stream") { // for octet-stream use root, because it's short and used in many places e = "root" } } h.WriteString(">" + e) } return strings.TrimPrefix(h.String(), ">") } // clone creates a new MIME with the provided optional MIME parameters. func (m *MIME) clone(charset string) *MIME { clonedMIME := m.mime if charset != "" { clonedMIME = m.mime + "; charset=" + charset } return &MIME{ mime: clonedMIME, aliases: m.aliases, extension: m.extension, } } // cloneHierarchy creates a clone of m and all its ancestors. The optional MIME // parameters are set on the last child of the hierarchy. func (m *MIME) cloneHierarchy(charset string) *MIME { ret := m.clone(charset) lastChild := ret for p := m.Parent(); p != nil; p = p.Parent() { pClone := p.clone("") lastChild.parent = pClone lastChild = pClone } return ret } func (m *MIME) lookup(mime string) *MIME { if mime == m.mime { return m } if slices.Contains(m.aliases, mime) { return m } for _, c := range m.children { if m := c.lookup(mime); m != nil { return m } } return nil } // Extend adds detection for a sub-format. The detector is a function // returning true when the raw input file satisfies a signature. // The sub-format will be detected if all the detectors in the parent chain return true. // The extension should include the leading dot, as in ".html". func (m *MIME) Extend(detector func(raw []byte, limit uint32) bool, mime, extension string, aliases ...string) { mime, _, _ = stdmime.ParseMediaType(mime) c := &MIME{ mime: mime, extension: extension, detector: detector, parent: m, aliases: aliases, } mu.Lock() m.children = append([]*MIME{c}, m.children...) mu.Unlock() } ================================================ FILE: modules/mime/mime_test.go ================================================ package mime import ( "fmt" "net/http" "os" "path/filepath" "runtime" "testing" "time" ) const jscode = `#!/bin/node function main(){ } ` func TestJs(t *testing.T) { m := DetectAny([]byte(jscode)) fmt.Fprintf(os.Stderr, "%v\n", m.String()) } const h5 = ` ` func TestH5(t *testing.T) { m := DetectAny([]byte(h5)) fmt.Fprintf(os.Stderr, "%v\n", m.String()) } const svgblock = ` ` func TestSVG(t *testing.T) { now := time.Now() m := DetectAny([]byte(svgblock)) fmt.Fprintf(os.Stderr, "%v spent: %v\n", m.String(), time.Since(now)) } const svgblockNoComment = ` ` func TestSVGNoComment(t *testing.T) { m := DetectAny([]byte(svgblockNoComment)) fmt.Fprintf(os.Stderr, "%v\n", m.String()) } const ( htmlText = ` ` ) func TestHTML2(t *testing.T) { m := DetectAny([]byte(htmlText)) fmt.Fprintf(os.Stderr, "%v\n", m.String()) } func TestSVG2(t *testing.T) { _, filename, _, _ := runtime.Caller(0) b, err := os.ReadFile(filepath.Join(filepath.Dir(filename), "mimetsx")) if err != nil { return } m := DetectAny(b) fmt.Fprintf(os.Stderr, "%v %s\n", m.String(), http.DetectContentType(b)) m2 := DetectAny([]byte(` `)) fmt.Fprintf(os.Stderr, "%s\n", m2.String()) } func TestJsonMIME(t *testing.T) { for p := json; p != nil; p = p.Parent() { if p.Is("text/plain") { fmt.Fprintf(os.Stderr, "text: %v\n", json.String()) } } m2 := DetectAny([]byte(` `)) for p := m2; p != nil; p = p.Parent() { if p.Is("text/plain") { fmt.Fprintf(os.Stderr, "text: %v\n", m2.String()) } } } func TestSVGForEach(t *testing.T) { ss := []string{ "", ` `, ` `, ``, "var svgText=``", ` `, } for _, s := range ss { m := DetectAny([]byte(s)) fmt.Fprintf(os.Stderr, "[%s]\n mime: %v\n", s, m.mime) } } func TestXML(t *testing.T) { a := ` - Tove Jani Reminder Don't forget me this weekend! ` m := DetectAny([]byte(a)) fmt.Fprintf(os.Stderr, "mime: %v\n", m.mime) } ================================================ FILE: modules/mime/mimetsx ================================================ import React from 'react'; import Beric from './Beric'; import Straight from './Straight'; import Default from './Default'; import type { Position, ConnectLineMethod } from './interface'; import './index.less'; export interface ConnectLineProps { connectLineMethod?: ConnectLineMethod; columnSpacing: number; currentPosition: Position; radius: number; space: number; strokeWidth: number; stageTopToTopDistance: number; style?: React.CSSProperties; targetPositions: Position[]; } const Index: React.FC = ({ connectLineMethod = 'default', columnSpacing, currentPosition, radius, space, stageTopToTopDistance, strokeWidth, style, targetPositions = [], }) => { // todo:移除初始值会报错,问题排查中 const { top = 0, right = 0 } = currentPosition ?? {}; const dys = targetPositions.map((s) => s.top - top) ?? [0]; const dyMax = Math.max(...dys); const dyMin = Math.min(...dys); const height = Math.max(dyMax - dyMin, Math.abs(dyMax), Math.abs(dyMin)) + strokeWidth; const translateTop = dyMin > 0 ? -strokeWidth / 2 : dyMin - strokeWidth / 2; const dxes = targetPositions.map((s) => s.left - right) ?? [0]; const width = Math.max(...dxes); return targetPositions.length ? ( {connectLineMethod === 'beric' && ( )} {connectLineMethod === 'straight' && ( )} {connectLineMethod === 'default' && ( )} ) : null; }; export default Index; ================================================ FILE: modules/mime/mimetype.go ================================================ // Package mimetype uses magic number signatures to detect the MIME type of a file. // // File formats are stored in a hierarchy with application/octet-stream at its root. // For example, the hierarchy for HTML format is application/octet-stream -> // text/plain -> text/html. package mime import ( "errors" "io" "mime" "os" "sync/atomic" ) const defaultLimit uint32 = 3072 // readLimit is the maximum number of bytes from the input used when detecting. var readLimit uint32 = defaultLimit // Detect returns the MIME type found from the provided byte slice. // // The result is always a valid MIME type, with application/octet-stream // returned when identification failed. func Detect(in []byte) *MIME { // Using atomic because readLimit can be written at the same time in other goroutine. l := atomic.LoadUint32(&readLimit) if l > 0 && len(in) > int(l) { in = in[:l] } mu.RLock() defer mu.RUnlock() return root.match(in, l) } // DetectReader returns the MIME type of the provided reader. // // The result is always a valid MIME type, with application/octet-stream // returned when identification failed with or without an error. // Any error returned is related to the reading from the input reader. // // DetectReader assumes the reader offset is at the start. If the input is an // io.ReadSeeker you previously read from, it should be rewinded before detection: // // reader.Seek(0, io.SeekStart) func DetectReader(r io.Reader) (*MIME, error) { var in []byte var err error // Using atomic because readLimit can be written at the same time in other goroutine. l := atomic.LoadUint32(&readLimit) if l == 0 { in, err = io.ReadAll(r) if err != nil { return errMIME, err } } else { var n int in = make([]byte, l) // io.UnexpectedEOF means len(r) < len(in). It is not an error in this case, // it just means the input file is smaller than the allocated bytes slice. n, err = io.ReadFull(r, in) if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { return errMIME, err } in = in[:n] } mu.RLock() defer mu.RUnlock() return root.match(in, l), nil } // DetectFile returns the MIME type of the provided file. // // The result is always a valid MIME type, with application/octet-stream // returned when identification failed with or without an error. // Any error returned is related to the opening and reading from the input file. func DetectFile(path string) (*MIME, error) { f, err := os.Open(path) if err != nil { return errMIME, err } defer f.Close() // nolint return DetectReader(f) } // EqualsAny reports whether s MIME type is equal to any MIME type in mimes. // MIME type equality test is done on the "type/subtype" section, ignores // any optional MIME parameters, ignores any leading and trailing whitespace, // and is case insensitive. func EqualsAny(s string, mimes ...string) bool { s, _, _ = mime.ParseMediaType(s) for _, m := range mimes { m, _, _ = mime.ParseMediaType(m) if s == m { return true } } return false } // SetLimit sets the maximum number of bytes read from input when detecting the MIME type. // Increasing the limit provides better detection for file formats which store // their magical numbers towards the end of the file: docx, pptx, xlsx, etc. // During detection data is read in a single block of size limit, i.e. it is not buffered. // A limit of 0 means the whole input file will be used. func SetLimit(limit uint32) { // Using atomic because readLimit can be read at the same time in other goroutine. atomic.StoreUint32(&readLimit, limit) } // Extend adds detection for other file formats. // It is equivalent to calling Extend() on the root MIME type "application/octet-stream". func Extend(detector func(raw []byte, limit uint32) bool, mime, extension string, aliases ...string) { root.Extend(detector, mime, extension, aliases...) } // Lookup finds a MIME object by its string representation. // The representation can be the main MIME type, or any of its aliases. func Lookup(m string) *MIME { // We store the MIME types without optional params, so // perform parsing to extract the target MIME type without optional params. m, _, _ = mime.ParseMediaType(m) mu.RLock() defer mu.RUnlock() return root.lookup(m) } ================================================ FILE: modules/mime/sanitize.go ================================================ package mime // https://github.com/chromium/chromium/blob/main/third_party/blink/common/mime_util/mime_util.cc var ( // These types are excluded from the logic that allows all text/ types because // while they are technically text, it's very unlikely that a user expects to // see them rendered in text form. UnsupportedTextTypes = []string{ "text/calendar", "text/x-calendar", "text/x-vcalendar", "text/vcalendar", "text/vcard", "text/x-vcard", "text/directory", "text/ldif", "text/qif", "text/x-qif", "text/x-csv", "text/x-vcf", "text/rtf", "text/comma-separated-values", "text/csv", "text/tab-separated-values", "text/tsv", "text/ofx", // https://crbug.com/162238 "text/vnd.sun.j2me.app-descriptor", // https://crbug.com/176450 "text/x-ms-iqy", // https://crbug.com/1054863 "text/x-ms-odc", // https://crbug.com/1054863 "text/x-ms-rqy", // https://crbug.com/1054863 "text/x-ms-contact", // https://crbug.com/1054863 } SupportedNonImageTypes = []string{ "image/svg+xml", // SVG is text-based XML, even though it has an image/ // type "application/xml", "application/atom+xml", "application/rss+xml", "application/xhtml+xml", "application/json", "message/rfc822", // For MHTML support. "multipart/related", // For MHTML support. "multipart/x-mixed-replace", // Note: ADDING a new type here will probably render it AS HTML. This can // result in cross site scripting. } ) // DetectAny detects the MIME type from the input bytes. // The input []byte is not modified; only read operations are performed. func DetectAny(in []byte) *MIME { return root.match(in, uint32(len(in))) } func (m *MIME) Sanitize() string { return m.mime } ================================================ FILE: modules/mime/tree.go ================================================ package mime import ( "sync" "github.com/antgroup/hugescm/modules/mime/internal/magic" ) // mimetype stores the list of MIME types in a tree structure with // "application/octet-stream" at the root of the hierarchy. The hierarchy // approach minimizes the number of checks that need to be done on the input // and allows for more precise results once the base type of file has been // identified. // // root is a detector which passes for any slice of bytes. // When a detector passes the check, the children detectors // are tried in order to find a more accurate MIME type. var root = newMIME("application/octet-stream", "", func([]byte, uint32) bool { return true }, xpm, sevenZ, zip, pdf, fdf, ole, ps, psd, p7s, ogg, png, jpg, jxl, jp2, jpx, jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, lotus, ico, mp3, flac, midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mp4, webM, avi, flv, mkv, asf, aac, voc, m3u, rmvb, gzip, class, swf, crx, ttf, woff, woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg, cbor, sqlite3, dwg, nes, lnk, macho, qcp, icns, hdr, mrc, mdb, accdb, zstd, cab, rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb, cabIS, jxr, parquet, oneNote, chm, wpd, dxf, grib, zlib, inf, hlp, fm, bufr, pyc, // Keep text last because it is the slowest check. text, ) // errMIME is returned from Detect functions when err is not nil. // Detect could return root for erroneous cases, but it needs to lock mu in order to do so. // errMIME is same as root but it does not require locking. var errMIME = newMIME("application/octet-stream", "", func([]byte, uint32) bool { return false }) // mu guards access to the root MIME tree. Access to root must be synchronized with this lock. var mu = &sync.RWMutex{} // The list of nodes appended to the root node. var ( xz = newMIME("application/x-xz", ".xz", magic.Xz) gzip = newMIME("application/gzip", ".gz", magic.Gzip).alias( "application/x-gzip", "application/x-gunzip", "application/gzipped", "application/gzip-compressed", "application/x-gzip-compressed", "gzip/document") sevenZ = newMIME("application/x-7z-compressed", ".7z", magic.SevenZ) // APK must be checked before JAR because APK is a subset of JAR. // This means APK should be a child of JAR detector, but in practice, // the decisive signature for JAR might be located at the end of the file // and not reachable because of library readLimit. zip = newMIME("application/zip", ".zip", magic.Zip, docx, pptx, xlsx, epub, apk, jar, odt, ods, odp, odg, odf, odc, sxc, kmz, visio). alias("application/x-zip", "application/x-zip-compressed") tar = newMIME("application/x-tar", ".tar", magic.Tar) xar = newMIME("application/x-xar", ".xar", magic.Xar) bz2 = newMIME("application/x-bzip2", ".bz2", magic.Bz2) pdf = newMIME("application/pdf", ".pdf", magic.PDF). alias("application/x-pdf") fdf = newMIME("application/vnd.fdf", ".fdf", magic.Fdf) xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", magic.Xlsx) docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", magic.Docx) pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", magic.Pptx) visio = newMIME("application/vnd.ms-visio.drawing.main+xml", ".vsdx", magic.Visio) epub = newMIME("application/epub+zip", ".epub", magic.Epub) jar = newMIME("application/java-archive", ".jar", magic.Jar). alias("application/jar", "application/jar-archive", "application/x-java-archive") apk = newMIME("application/vnd.android.package-archive", ".apk", magic.APK) ole = newMIME("application/x-ole-storage", "", magic.Ole, msi, msg, xls, pub, ppt, doc) msi = newMIME("application/x-ms-installer", ".msi", magic.Msi). alias("application/x-windows-installer", "application/x-msi") doc = newMIME("application/msword", ".doc", magic.Doc). alias("application/vnd.ms-word") ppt = newMIME("application/vnd.ms-powerpoint", ".ppt", magic.Ppt). alias("application/mspowerpoint") pub = newMIME("application/vnd.ms-publisher", ".pub", magic.Pub) xls = newMIME("application/vnd.ms-excel", ".xls", magic.Xls). alias("application/msexcel") msg = newMIME("application/vnd.ms-outlook", ".msg", magic.Msg) ps = newMIME("application/postscript", ".ps", magic.Ps) fits = newMIME("application/fits", ".fits", magic.Fits).alias("image/fits") ogg = newMIME("application/ogg", ".ogg", magic.Ogg, oggAudio, oggVideo). alias("application/x-ogg") oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio) oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo) // text = newMIME("text/plain", ".txt", magic.Text, svg, html, xml, php, js, lua, perl, python, ruby, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt, shell, netpbm, netpgm, netppm, netpam, rfc822) text = newMIME("text/plain", ".txt", magic.Text, svg, xml, lua, perl, python, ruby, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt, shell, netpbm, netpgm, netppm, netpam, rfc822) xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2, cdxxml). alias("application/xml") // xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2, xhtml, cdxxml). // alias("application/xml") // xhtml = newMIME("application/xhtml+xml", ".html", magic.XHTML) json = newMIME("application/json", ".json", magic.JSON, geoJSON, har, gltf, cdxJSON) har = newMIME("application/json", ".har", magic.HAR) csv = newMIME("text/csv", ".csv", magic.CSV) tsv = newMIME("text/tab-separated-values", ".tsv", magic.TSV) geoJSON = newMIME("application/geo+json", ".geojson", magic.GeoJSON) ndJSON = newMIME("application/x-ndjson", ".ndjson", magic.NdJSON) cdxJSON = newMIME("application/vnd.cyclonedx+json", ".json", magic.CDXJSON) // html = newMIME("text/html", ".html", magic.HTML) // php = newMIME("text/x-php", ".php", magic.Php) rtf = newMIME("text/rtf", ".rtf", magic.Rtf).alias("application/rtf") // js = newMIME("text/javascript", ".js", magic.Js). // alias("application/x-javascript", "application/javascript") srt = newMIME("application/x-subrip", ".srt", magic.Srt). alias("application/x-srt", "text/x-srt") vtt = newMIME("text/vtt", ".vtt", magic.Vtt) lua = newMIME("text/x-lua", ".lua", magic.Lua) perl = newMIME("text/x-perl", ".pl", magic.Perl) python = newMIME("text/x-python", ".py", magic.Python). alias("text/x-script.python", "application/x-python") pyc = newMIME("application/x-bytecode.python", ".pyc", magic.Pyc) ruby = newMIME("text/x-ruby", ".rb", magic.Ruby). alias("application/x-ruby") shell = newMIME("text/x-shellscript", ".sh", magic.Shell). alias("text/x-sh", "application/x-shellscript", "application/x-sh") tcl = newMIME("text/x-tcl", ".tcl", magic.Tcl). alias("application/x-tcl") vCard = newMIME("text/vcard", ".vcf", magic.VCard) iCalendar = newMIME("text/calendar", ".ics", magic.ICalendar) svg = newMIME("image/svg+xml", ".svg", magic.Svg) rss = newMIME("application/rss+xml", ".rss", magic.Rss). alias("text/rss") owl2 = newMIME("application/owl+xml", ".owl", magic.Owl2) atom = newMIME("application/atom+xml", ".atom", magic.Atom) x3d = newMIME("model/x3d+xml", ".x3d", magic.X3d) kml = newMIME("application/vnd.google-earth.kml+xml", ".kml", magic.Kml) kmz = newMIME("application/vnd.google-earth.kmz", ".kmz", magic.KMZ) xliff = newMIME("application/x-xliff+xml", ".xlf", magic.Xliff) collada = newMIME("model/vnd.collada+xml", ".dae", magic.Collada) gml = newMIME("application/gml+xml", ".gml", magic.Gml) gpx = newMIME("application/gpx+xml", ".gpx", magic.Gpx) tcx = newMIME("application/vnd.garmin.tcx+xml", ".tcx", magic.Tcx) amf = newMIME("application/x-amf", ".amf", magic.Amf) threemf = newMIME("application/vnd.ms-package.3dmanufacturing-3dmodel+xml", ".3mf", magic.Threemf) cdxxml = newMIME("application/vnd.cyclonedx+xml", ".xml", magic.CDXXML) png = newMIME("image/png", ".png", magic.Png, apng) apng = newMIME("image/apng", ".apng", magic.Apng). alias("image/vnd.mozilla.apng") jpg = newMIME("image/jpeg", ".jpg", magic.Jpg) jxl = newMIME("image/jxl", ".jxl", magic.Jxl) jp2 = newMIME("image/jp2", ".jp2", magic.Jp2) jpx = newMIME("image/jpx", ".jpf", magic.Jpx) jpm = newMIME("image/jpm", ".jpm", magic.Jpm). alias("video/jpm") jxs = newMIME("image/jxs", ".jxs", magic.Jxs) xpm = newMIME("image/x-xpixmap", ".xpm", magic.Xpm) bpg = newMIME("image/bpg", ".bpg", magic.Bpg) gif = newMIME("image/gif", ".gif", magic.Gif) webp = newMIME("image/webp", ".webp", magic.Webp) tiff = newMIME("image/tiff", ".tiff", magic.Tiff) bmp = newMIME("image/bmp", ".bmp", magic.Bmp). alias("image/x-bmp", "image/x-ms-bmp") // lotus check must be done before ico because some ico detection is a bit // relaxed and some lotus files are wrongfully identified as ico otherwise. lotus = newMIME("application/vnd.lotus-1-2-3", ".123", magic.Lotus123) ico = newMIME("image/x-icon", ".ico", magic.Ico) icns = newMIME("image/x-icns", ".icns", magic.Icns) psd = newMIME("image/vnd.adobe.photoshop", ".psd", magic.Psd). alias("image/x-psd", "application/photoshop") heic = newMIME("image/heic", ".heic", magic.Heic) heicSeq = newMIME("image/heic-sequence", ".heic", magic.HeicSequence) heif = newMIME("image/heif", ".heif", magic.Heif) heifSeq = newMIME("image/heif-sequence", ".heif", magic.HeifSequence) hdr = newMIME("image/vnd.radiance", ".hdr", magic.Hdr) avif = newMIME("image/avif", ".avif", magic.AVIF) mp3 = newMIME("audio/mpeg", ".mp3", magic.Mp3). alias("audio/x-mpeg", "audio/mp3") flac = newMIME("audio/flac", ".flac", magic.Flac) midi = newMIME("audio/midi", ".midi", magic.Midi). alias("audio/mid", "audio/sp-midi", "audio/x-mid", "audio/x-midi") ape = newMIME("audio/ape", ".ape", magic.Ape) musePack = newMIME("audio/musepack", ".mpc", magic.MusePack) wav = newMIME("audio/wav", ".wav", magic.Wav). alias("audio/x-wav", "audio/vnd.wave", "audio/wave") aiff = newMIME("audio/aiff", ".aiff", magic.Aiff).alias("audio/x-aiff") au = newMIME("audio/basic", ".au", magic.Au) amr = newMIME("audio/amr", ".amr", magic.Amr). alias("audio/amr-nb") aac = newMIME("audio/aac", ".aac", magic.AAC) voc = newMIME("audio/x-unknown", ".voc", magic.Voc) aMp4 = newMIME("audio/mp4", ".mp4", magic.AMp4). alias("audio/x-mp4a") m4a = newMIME("audio/x-m4a", ".m4a", magic.M4a) m3u = newMIME("application/vnd.apple.mpegurl", ".m3u", magic.M3U). alias("audio/mpegurl", "application/x-mpegurl") m4v = newMIME("video/x-m4v", ".m4v", magic.M4v) mj2 = newMIME("video/mj2", ".mj2", magic.Mj2) dvb = newMIME("video/vnd.dvb.file", ".dvb", magic.Dvb) mp4 = newMIME("video/mp4", ".mp4", magic.Mp4, avif, threeGP, threeG2, aMp4, mqv, m4a, m4v, heic, heicSeq, heif, heifSeq, mj2, dvb) webM = newMIME("video/webm", ".webm", magic.WebM). alias("audio/webm") mpeg = newMIME("video/mpeg", ".mpeg", magic.Mpeg) quickTime = newMIME("video/quicktime", ".mov", magic.QuickTime) mqv = newMIME("video/quicktime", ".mqv", magic.Mqv) threeGP = newMIME("video/3gpp", ".3gp", magic.ThreeGP). alias("video/3gp", "audio/3gpp") threeG2 = newMIME("video/3gpp2", ".3g2", magic.ThreeG2). alias("video/3g2", "audio/3gpp2") avi = newMIME("video/x-msvideo", ".avi", magic.Avi). alias("video/avi", "video/msvideo") flv = newMIME("video/x-flv", ".flv", magic.Flv) mkv = newMIME("video/x-matroska", ".mkv", magic.Mkv) asf = newMIME("video/x-ms-asf", ".asf", magic.Asf). alias("video/asf", "video/x-ms-wmv") rmvb = newMIME("application/vnd.rn-realmedia-vbr", ".rmvb", magic.Rmvb) class = newMIME("application/x-java-applet", ".class", magic.Class) swf = newMIME("application/x-shockwave-flash", ".swf", magic.SWF) crx = newMIME("application/x-chrome-extension", ".crx", magic.CRX) ttf = newMIME("font/ttf", ".ttf", magic.Ttf). alias("font/sfnt", "application/x-font-ttf", "application/font-sfnt") woff = newMIME("font/woff", ".woff", magic.Woff) woff2 = newMIME("font/woff2", ".woff2", magic.Woff2) otf = newMIME("font/otf", ".otf", magic.Otf) ttc = newMIME("font/collection", ".ttc", magic.Ttc) eot = newMIME("application/vnd.ms-fontobject", ".eot", magic.Eot) wasm = newMIME("application/wasm", ".wasm", magic.Wasm) shp = newMIME("application/vnd.shp", ".shp", magic.Shp) shx = newMIME("application/vnd.shx", ".shx", magic.Shx, shp) dbf = newMIME("application/x-dbf", ".dbf", magic.Dbf) exe = newMIME("application/vnd.microsoft.portable-executable", ".exe", magic.Exe) elf = newMIME("application/x-elf", "", magic.Elf, elfObj, elfExe, elfLib, elfDump) elfObj = newMIME("application/x-object", "", magic.ElfObj) elfExe = newMIME("application/x-executable", "", magic.ElfExe) elfLib = newMIME("application/x-sharedlib", ".so", magic.ElfLib) elfDump = newMIME("application/x-coredump", "", magic.ElfDump) ar = newMIME("application/x-archive", ".a", magic.Ar, deb). alias("application/x-unix-archive") deb = newMIME("application/vnd.debian.binary-package", ".deb", magic.Deb) rpm = newMIME("application/x-rpm", ".rpm", magic.RPM) dcm = newMIME("application/dicom", ".dcm", magic.Dcm) odt = newMIME("application/vnd.oasis.opendocument.text", ".odt", magic.Odt, ott). alias("application/x-vnd.oasis.opendocument.text") ott = newMIME("application/vnd.oasis.opendocument.text-template", ".ott", magic.Ott). alias("application/x-vnd.oasis.opendocument.text-template") ods = newMIME("application/vnd.oasis.opendocument.spreadsheet", ".ods", magic.Ods, ots). alias("application/x-vnd.oasis.opendocument.spreadsheet") ots = newMIME("application/vnd.oasis.opendocument.spreadsheet-template", ".ots", magic.Ots). alias("application/x-vnd.oasis.opendocument.spreadsheet-template") odp = newMIME("application/vnd.oasis.opendocument.presentation", ".odp", magic.Odp, otp). alias("application/x-vnd.oasis.opendocument.presentation") otp = newMIME("application/vnd.oasis.opendocument.presentation-template", ".otp", magic.Otp). alias("application/x-vnd.oasis.opendocument.presentation-template") odg = newMIME("application/vnd.oasis.opendocument.graphics", ".odg", magic.Odg, otg). alias("application/x-vnd.oasis.opendocument.graphics") otg = newMIME("application/vnd.oasis.opendocument.graphics-template", ".otg", magic.Otg). alias("application/x-vnd.oasis.opendocument.graphics-template") odf = newMIME("application/vnd.oasis.opendocument.formula", ".odf", magic.Odf). alias("application/x-vnd.oasis.opendocument.formula") odc = newMIME("application/vnd.oasis.opendocument.chart", ".odc", magic.Odc). alias("application/x-vnd.oasis.opendocument.chart") sxc = newMIME("application/vnd.sun.xml.calc", ".sxc", magic.Sxc) rar = newMIME("application/x-rar-compressed", ".rar", magic.RAR). alias("application/x-rar") djvu = newMIME("image/vnd.djvu", ".djvu", magic.DjVu) mobi = newMIME("application/x-mobipocket-ebook", ".mobi", magic.Mobi) lit = newMIME("application/x-ms-reader", ".lit", magic.Lit) sqlite3 = newMIME("application/vnd.sqlite3", ".sqlite", magic.Sqlite). alias("application/x-sqlite3") dwg = newMIME("image/vnd.dwg", ".dwg", magic.Dwg). alias("image/x-dwg", "application/acad", "application/x-acad", "application/autocad_dwg", "application/dwg", "application/x-dwg", "application/x-autocad", "drawing/dwg") warc = newMIME("application/warc", ".warc", magic.Warc) nes = newMIME("application/vnd.nintendo.snes.rom", ".nes", magic.Nes) lnk = newMIME("application/x-ms-shortcut", ".lnk", magic.Lnk) macho = newMIME("application/x-mach-binary", ".macho", magic.MachO) qcp = newMIME("audio/qcelp", ".qcp", magic.Qcp) mrc = newMIME("application/marc", ".mrc", magic.Marc) mdb = newMIME("application/x-msaccess", ".mdb", magic.MsAccessMdb) accdb = newMIME("application/x-msaccess", ".accdb", magic.MsAccessAce) zstd = newMIME("application/zstd", ".zst", magic.Zstd) cab = newMIME("application/vnd.ms-cab-compressed", ".cab", magic.Cab) cabIS = newMIME("application/x-installshield", ".cab", magic.InstallShieldCab) lzip = newMIME("application/lzip", ".lz", magic.Lzip).alias("application/x-lzip") torrent = newMIME("application/x-bittorrent", ".torrent", magic.Torrent) cpio = newMIME("application/x-cpio", ".cpio", magic.Cpio) tzif = newMIME("application/tzif", "", magic.TzIf) p7s = newMIME("application/pkcs7-signature", ".p7s", magic.P7s) xcf = newMIME("image/x-xcf", ".xcf", magic.Xcf) pat = newMIME("image/x-gimp-pat", ".pat", magic.Pat) gbr = newMIME("image/x-gimp-gbr", ".gbr", magic.Gbr) xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf) glb = newMIME("model/gltf-binary", ".glb", magic.GLB) gltf = newMIME("model/gltf+json", ".gltf", magic.GLTF) jxr = newMIME("image/jxr", ".jxr", magic.Jxr).alias("image/vnd.ms-photo") parquet = newMIME("application/vnd.apache.parquet", ".parquet", magic.Par1). alias("application/x-parquet") netpbm = newMIME("image/x-portable-bitmap", ".pbm", magic.NetPBM) netpgm = newMIME("image/x-portable-graymap", ".pgm", magic.NetPGM) netppm = newMIME("image/x-portable-pixmap", ".ppm", magic.NetPPM) netpam = newMIME("image/x-portable-arbitrarymap", ".pam", magic.NetPAM) cbor = newMIME("application/cbor", ".cbor", magic.CBOR) oneNote = newMIME("application/onenote", ".one", magic.One) chm = newMIME("application/vnd.ms-htmlhelp", ".chm", magic.CHM) wpd = newMIME("application/vnd.wordperfect", ".wpd", magic.WPD) dxf = newMIME("image/vnd.dxf", ".dxf", magic.DXF) rfc822 = newMIME("message/rfc822", ".eml", magic.RFC822) grib = newMIME("application/grib", ".grb", magic.GRIB) zlib = newMIME("application/zlib", "", magic.Zlib) inf = newMIME("application/x-os2-inf", ".inf", magic.Inf) hlp = newMIME("application/x-os2-hlp", ".hlp", magic.Hlp) fm = newMIME("application/vnd.framemaker", ".fm", magic.FrameMaker) bufr = newMIME("application/bufr", ".bufr", magic.BUFR) ) ================================================ FILE: modules/oss/bucket.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "context" "crypto/hmac" "crypto/sha256" "encoding/base64" "fmt" "io" "net/http" "net/url" "os" "strconv" "strings" "time" ) // Stat // https://www.alibabacloud.com/help/zh/oss/developer-reference/headobject func (b *bucket) Stat(ctx context.Context, resourcePath string) (*Stat, error) { u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, } req, err := b.NewRequestWithContext(ctx, "HEAD", u.String(), nil) if err != nil { return nil, err } resource := b.getResourceV2(resourcePath, "") b.signature(req, resource) resp, err := b.Do(req) if err != nil { return nil, err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return nil, os.ErrNotExist } if resp.StatusCode < 200 || resp.StatusCode > 299 { return nil, readOssError(resp) } size, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) if err != nil { return nil, err } return &Stat{Size: size, Crc64: resp.Header.Get("X-Oss-Hash-Crc64ecma"), Mime: resp.Header.Get("Content-Type")}, nil } func (b *bucket) checkSize(ctx context.Context, resourcePath string, resp *http.Response) (int64, error) { if rangeHdr := resp.Header.Get("Content-Range"); len(rangeHdr) != 0 { if size, err := parseSizeFromRange(rangeHdr); err == nil { return size, nil } si, err := b.Stat(ctx, resourcePath) if err != nil { return 0, err } return si.Size, nil } if size, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64); err == nil { return size, nil } si, err := b.Stat(ctx, resourcePath) if err != nil { return -1, err } return si.Size, nil } // Open: // https://www.alibabacloud.com/help/zh/oss/developer-reference/getobject func (b *bucket) Open(ctx context.Context, resourcePath string, start, length int64) (RangeReader, error) { u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, } req, err := b.NewRequestWithContext(ctx, "GET", u.String(), nil) if err != nil { return nil, err } // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range switch { case start < 0: req.Header.Set("Range", fmt.Sprintf("bytes=%d", start)) case start >= 0 && length > 0: req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", start, start+length-1)) case start > 0: req.Header.Set("Range", fmt.Sprintf("bytes=%d-", start)) default: // NO RANGE } resource := b.getResourceV2(resourcePath, "") b.signature(req, resource) resp, err := b.Do(req) if err != nil { return nil, err } if resp.StatusCode == http.StatusNotFound { _ = resp.Body.Close() return nil, os.ErrNotExist } if resp.StatusCode < 200 || resp.StatusCode > 299 { defer resp.Body.Close() // nolint return nil, readOssError(resp) } size, err := b.checkSize(ctx, resourcePath, resp) if err != nil { _ = resp.Body.Close() return nil, err } return NewRangeReader(resp.Body, size, resp.Header.Get("Content-Range")), nil } func (b *bucket) Put(ctx context.Context, resourcePath string, r io.Reader, mime string) error { u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, } req, err := b.NewRequestWithContext(ctx, "PUT", u.String(), r) if err != nil { return err } if len(mime) != 0 { req.Header.Set("Content-Type", mime) } resource := b.getResourceV2(resourcePath, "") b.signature(req, resource) resp, err := b.Do(req) if err != nil { return err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return os.ErrNotExist } if resp.StatusCode < 200 || resp.StatusCode > 299 { return readOssError(resp) } return nil } /* import base64 import hmac import hashlib import urllib h = hmac.new(accesskey, "GET\n\n\n1141889120\n%2Fexamplebucket%2Foss-api.pdf?\ &x-oss-ac-forward-allow=true\ &x-oss-ac-source-ip=127.0.0.1\ &x-oss-ac-subnet-mask=32\ &x-oss-signature-version=OSS2", hashlib.sha256) Signature = base64.encodestring(h.digest()).strip() */ func (b *bucket) Share(ctx context.Context, resourcePath string, expiresAt int64) string { u := &url.URL{ Scheme: b.sharedScheme, Host: b.sharedBucketEndpoint, Path: resourcePath, } if expiresAt <= 0 { expiresAt = time.Now().Add(time.Hour).Unix() } // headers := make(map[string]string) headers["x-oss-expires"] = strconv.FormatInt(expiresAt, 10) headers["x-oss-access-key-id"] = b.accessKeyID headers["x-oss-signature-version"] = "OSS2" hs := newHeaderSorter(headers) hs.Sort() var q strings.Builder for i := range hs.Keys { if i != 0 { _, _ = q.WriteString("&") } _, _ = q.WriteString(hs.Keys[i]) _ = q.WriteByte('=') _, _ = q.WriteString(url.QueryEscape(hs.Vals[i])) } qs := q.String() canonicalizedResource := b.getResourceV2(resourcePath, qs) // V2: // Please note that the v2 signature document given in the OSS documentation is wrong. Please analyze the open source code to implement it. // signStr = req.Method + "\n" + contentMd5 + "\n" + contentType + "\n" + date + "\n" + canonicalizedOSSHeaders + strings.Join(additionalList, ";") + "\n" + canonicalizedResource signedText := fmt.Sprintf("GET\n\n\n%d\n\n%s", expiresAt, canonicalizedResource) h := hmac.New(sha256.New, []byte(b.accessKeySecret)) _, _ = h.Write([]byte(signedText)) signed := base64.StdEncoding.EncodeToString(h.Sum(nil)) u.RawQuery = qs + "&x-oss-signature=" + url.QueryEscape(signed) return u.String() } ================================================ FILE: modules/oss/delete.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "bytes" "context" "crypto/md5" "encoding/base64" "encoding/xml" "errors" "net/http" "net/url" "strconv" "strings" "unicode/utf8" ) var ( escQuot = []byte(""") // shorter than """ escApos = []byte("'") // shorter than "'" escAmp = []byte("&") escLT = []byte("<") escGT = []byte(">") escTab = []byte(" ") escNL = []byte(" ") escCR = []byte(" ") escFFFD = []byte("\uFFFD") // Unicode replacement character ) func EscapeLFString(str string) string { var log bytes.Buffer for i := 0; i < len(str); i++ { if str[i] != '\n' { log.WriteByte(str[i]) } else { log.WriteString("\\n") } } return log.String() } // EscapeString writes to p the properly escaped XML equivalent // of the plain text data s. func EscapeXml(s string) string { var p strings.Builder var esc []byte hextable := "0123456789ABCDEF" escPattern := []byte("�") last := 0 for i := 0; i < len(s); { r, width := utf8.DecodeRuneInString(s[i:]) i += width switch r { case '"': esc = escQuot case '\'': esc = escApos case '&': esc = escAmp case '<': esc = escLT case '>': esc = escGT case '\t': esc = escTab case '\n': esc = escNL case '\r': esc = escCR default: if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { if r >= 0x00 && r < 0x20 { escPattern[3] = hextable[r>>4] escPattern[4] = hextable[r&0x0f] esc = escPattern } else { esc = escFFFD } break } continue } p.WriteString(s[last : i-width]) p.Write(esc) last = i } p.WriteString(s[last:]) return p.String() } // Decide whether the given rune is in the XML Character Range, per // the Char production of https://www.xml.com/axml/testaxml.htm, // Section 2.2 Characters. func isInCharacterRange(r rune) (inrange bool) { return r == 0x09 || r == 0x0A || r == 0x0D || r >= 0x20 && r <= 0xD7FF || r >= 0xE000 && r <= 0xFFFD || r >= 0x10000 && r <= 0x10FFFF } type deleteXML struct { XMLName xml.Name `xml:"Delete"` Objects []*DeleteObject `xml:"Object"` // Objects to delete Quiet bool `xml:"Quiet"` // Flag of quiet mode. } // DeleteObject defines the struct for deleting object type DeleteObject struct { XMLName xml.Name `xml:"Object"` Key string `xml:"Key"` // Object name VersionId string `xml:"VersionId,omitempty"` // Object VersionId } // DeleteObjectsResult defines result of DeleteObjects request type DeleteObjectsResult struct { XMLName xml.Name DeletedObjects []string // Deleted object key list } // DeletedKeyInfo defines object delete info type DeletedKeyInfo struct { XMLName xml.Name `xml:"Deleted"` Key string `xml:"Key"` // Object key VersionId string `xml:"VersionId"` // VersionId DeleteMarker bool `xml:"DeleteMarker"` // Object DeleteMarker DeleteMarkerVersionId string `xml:"DeleteMarkerVersionId"` // Object DeleteMarkerVersionId } type DeleteObjectVersionsResult struct { XMLName xml.Name `xml:"DeleteResult"` DeletedObjectsDetail []DeletedKeyInfo `xml:"Deleted"` // Deleted object detail info } // Owner defines Bucket/Object's owner type Owner struct { XMLName xml.Name `xml:"Owner"` ID string `xml:"ID"` // Owner ID DisplayName string `xml:"DisplayName"` // Owner's display name } // marshalDeleteObjectToXml deleteXML struct to xml func marshalDeleteObjectToXml(dxml deleteXML) string { var builder strings.Builder builder.WriteString("\n") builder.WriteString("") builder.WriteString("") builder.WriteString(strconv.FormatBool(dxml.Quiet)) builder.WriteString("") if len(dxml.Objects) > 0 { for _, object := range dxml.Objects { builder.WriteString("") if object.Key != "" { builder.WriteString("") builder.WriteString(EscapeXml(object.Key)) builder.WriteString("") } if object.VersionId != "" { builder.WriteString("") builder.WriteString(object.VersionId) builder.WriteString("") } builder.WriteString("") } } builder.WriteString("") return builder.String() } // https://www.alibabacloud.com/help/zh/oss/developer-reference/deleteobject func (b *bucket) Delete(ctx context.Context, resourcePath string) error { u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, } req, err := b.NewRequestWithContext(ctx, "DELETE", u.String(), nil) if err != nil { return err } resource := b.getResourceV2(resourcePath, "") b.signature(req, resource) resp, err := b.Do(req) if err != nil { return err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return readOssError(resp) } if resp.StatusCode < 200 || resp.StatusCode > 299 { return errors.New(resp.Status) } return nil } func (b *bucket) deleteMultipleObjects(ctx context.Context, objectKeys []string) error { var dxml deleteXML for _, key := range objectKeys { dxml.Objects = append(dxml.Objects, &DeleteObject{Key: key}) } xmlData := marshalDeleteObjectToXml(dxml) q := "delete" u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, RawQuery: q, } md5sum := md5.Sum([]byte(xmlData)) req, err := b.NewRequestWithContext(ctx, "POST", u.String(), strings.NewReader(xmlData)) if err != nil { return err } req.Header.Set("Content-Type", "application/xml") req.Header.Set("Content-MD5", base64.StdEncoding.EncodeToString(md5sum[:])) resource := b.getResourceV2("", q) b.signature(req, resource) resp, err := b.Do(req) if err != nil { return err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return readOssError(resp) } if resp.StatusCode < 200 || resp.StatusCode > 299 { return readOssError(resp) } var result DeleteObjectVersionsResult if err := xml.NewDecoder(resp.Body).Decode(&result); err != nil { return err } return nil } // https://www.alibabacloud.com/help/zh/oss/developer-reference/deletemultipleobjects func (b *bucket) DeleteMultipleObjects(ctx context.Context, objectKeys []string) error { for len(objectKeys) > 0 { minSize := min(len(objectKeys), 200) if err := b.deleteMultipleObjects(ctx, objectKeys[:minSize]); err != nil { return err } objectKeys = objectKeys[minSize:] } return nil } ================================================ FILE: modules/oss/error.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "errors" "encoding/base64" "encoding/xml" "fmt" "io" "net/http" ) // Error represents an error in an operation with OSS. type Error struct { StatusCode int // HTTP status code (200, 403, ...) Code string // OSS error code ("UnsupportedOperation", ...) Message string // The human-oriented error message BucketName string RequestId string HostId string } func (e *Error) Error() string { return fmt.Sprintf("Aliyun API Error: RequestId: %s Status Code: %d Code: %s Message: %s", e.RequestId, e.StatusCode, e.Code, e.Message) } // ServiceError contains fields of the error response from Oss Service REST API. type ServiceError struct { XMLName xml.Name `xml:"Error"` Code string `xml:"Code"` // The error code returned from OSS to the caller Message string `xml:"Message"` // The detail error message from OSS RequestID string `xml:"RequestId"` // The UUID used to uniquely identify the request HostID string `xml:"HostId"` // The OSS server cluster's Id Endpoint string `xml:"Endpoint"` Ec string `xml:"EC"` RawMessage string // The raw messages from OSS StatusCode int // HTTP status code } // Error implements interface error func (e *ServiceError) Error() string { errorMessage := fmt.Sprintf("oss: service returned error: StatusCode=%d, ErrorCode=%s, ErrorMessage=\"%s\", RequestId=%s", e.StatusCode, e.Code, e.Message, e.RequestID) if len(e.Endpoint) > 0 { errorMessage = fmt.Sprintf("%s, Endpoint=%s", errorMessage, e.Endpoint) } if len(e.Ec) > 0 { errorMessage = fmt.Sprintf("%s, Ec=%s", errorMessage, e.Ec) } return errorMessage } func readResponseBody(resp *http.Response) ([]byte, error) { out, err := io.ReadAll(resp.Body) if errors.Is(err, io.EOF) { err = nil } return out, err } func serviceErrFromXML(body []byte, statusCode int, requestID string) (*ServiceError, error) { var se ServiceError if err := xml.Unmarshal(body, &se); err != nil { return nil, err } se.StatusCode = statusCode se.RequestID = requestID se.RawMessage = string(body) return &se, nil } func readOssError(resp *http.Response) error { if resp.StatusCode >= 400 && resp.StatusCode <= 505 { b, err := readResponseBody(resp) if err != nil { return err } if len(b) == 0 && len(resp.Header.Get("X-Oss-Err")) != 0 { if e, err := base64.StdEncoding.DecodeString(resp.Header.Get("X-Oss-Err")); err == nil { b = e } } if len(b) > 0 { if se, err := serviceErrFromXML(b, resp.StatusCode, resp.Header.Get("X-Oss-Request-Id")); err == nil { return se } } } return &ServiceError{StatusCode: resp.StatusCode, RequestID: resp.Header.Get("X-Oss-Request-Id"), Ec: resp.Header.Get("X-Oss-Ec")} } ================================================ FILE: modules/oss/gcs.example ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "context" "fmt" "io" "net/http" "os" "time" "cloud.google.com/go/storage" "google.golang.org/api/iterator" "google.golang.org/api/option" ) type gscBucket struct { bucket *storage.BucketHandle } var ( _ Bucket = &gscBucket{} ) func NewGscBucket(ctx context.Context, credentialsJSON []byte, ossBucketName string) (Bucket, error) { client, err := storage.NewClient(ctx, option.WithCredentialsJSON(credentialsJSON)) if err != nil { return nil, err } return &gscBucket{bucket: client.Bucket(ossBucketName)}, nil } func (b *gscBucket) Stat(ctx context.Context, resourcePath string) (*Stat, error) { h := b.bucket.Object(resourcePath) attr, err := h.Attrs(ctx) if err != nil { return nil, err } return &Stat{Size: attr.Size}, nil } // https://developer.mozilla.org/zh-CN/docs/Web/HTTP/Headers/Content-Range func (b *gscBucket) Open(ctx context.Context, resourcePath string, start, length int64) (RangeReader, error) { h := b.bucket.Object(resourcePath) if (start >= 0 && length > 0) || start > 0 { gr, err := h.NewRangeReader(ctx, start, length) if err != nil { return nil, err } rangeHdr := fmt.Sprintf("bytes %d-%d/%d", gr.Attrs.StartOffset, gr.Attrs.StartOffset+length-1, gr.Attrs.Size) return NewRangeReader(gr, gr.Attrs.Size, rangeHdr), nil } gr, err := h.NewReader(ctx) if err != nil { return nil, err } return NewRangeReader(gr, gr.Attrs.Size, ""), nil } func (b *gscBucket) Delete(ctx context.Context, resourcePath string) error { h := b.bucket.Object(resourcePath) return h.Delete(ctx) } func (b *gscBucket) Put(ctx context.Context, resourcePath string, r io.Reader, mime string) error { h := b.bucket.Object(resourcePath) w := h.NewWriter(ctx) w.ContentType = mime defer w.Close() // nolint if _, err := io.Copy(w, r); err != nil { return err } return nil } func (b *gscBucket) StartUpload(ctx context.Context, resourcePath, filePath string, mime string) error { fd, err := os.Open(filePath) if err != nil { return err } defer fd.Close() // nolint return b.Put(ctx, resourcePath, fd, mime) } func (b *gscBucket) LinearUpload(ctx context.Context, resourcePath string, r io.Reader, size int64, mime string) error { if size < maxPartSize { return b.Put(ctx, resourcePath, r, mime) } h := b.bucket.Object(resourcePath) w := h.NewWriter(ctx) w.ContentType = mime w.ChunkSize = int(defaultPartSize) defer w.Close() // nolint if _, err := io.Copy(w, r); err != nil { return err } return nil } func (b *gscBucket) DeleteMultipleObjects(ctx context.Context, objectKeys []string) error { for _, o := range objectKeys { _ = b.bucket.Object(o).Delete(ctx) } return nil } func (b *gscBucket) ListObjects(ctx context.Context, prefix, continuationToken string) ([]*Object, string, error) { objects := make([]*Object, 0, 100) q := &storage.Query{Prefix: prefix} it := b.bucket.Objects(ctx, q) it.PageInfo().Token = continuationToken for i := 0; i < 1000; i++ { o, err := it.Next() if err == iterator.Done { break } if err != nil { return nil, "", err } objects = append(objects, &Object{Key: o.Name, Size: o.Size, ETag: o.Etag}) } return objects, it.PageInfo().Token, nil } func (b *gscBucket) Share(ctx context.Context, resourcePath string, expiresAt int64) string { signedURL, _ := b.bucket.SignedURL(resourcePath, &storage.SignedURLOptions{Method: http.MethodGet, Expires: time.Now().Add(time.Second * time.Duration(expiresAt))}) return signedURL } ================================================ FILE: modules/oss/list.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "context" "encoding/xml" "net/http" "net/url" "time" ) // ListObjectsResult defines the result from ListObjects request type ListObjectsResult struct { XMLName xml.Name `xml:"ListBucketResult"` Prefix string `xml:"Prefix"` // The object prefix Marker string `xml:"Marker"` // The marker filter. MaxKeys int `xml:"MaxKeys"` // Max keys to return Delimiter string `xml:"Delimiter"` // The delimiter for grouping objects' name IsTruncated bool `xml:"IsTruncated"` // Flag indicates if all results are returned (when it's false) NextMarker string `xml:"NextMarker"` // The start point of the next query Objects []ObjectProperties `xml:"Contents"` // Object list CommonPrefixes []string `xml:"CommonPrefixes>Prefix"` // You can think of commonprefixes as "folders" whose names end with the delimiter } // ObjectProperties defines Object properties type ObjectProperties struct { XMLName xml.Name `xml:"Contents"` Key string `xml:"Key"` // Object key Type string `xml:"Type"` // Object type Size int64 `xml:"Size"` // Object size ETag string `xml:"ETag"` // Object ETag Owner Owner `xml:"Owner"` // Object owner information LastModified time.Time `xml:"LastModified"` // Object last modified time StorageClass string `xml:"StorageClass"` // Object storage class (Standard, IA, Archive) RestoreInfo string `xml:"RestoreInfo,omitempty"` // Object restoreInfo } // ListObjectsResultV2 defines the result from ListObjectsV2 request type ListObjectsResultV2 struct { XMLName xml.Name `xml:"ListBucketResult"` Prefix string `xml:"Prefix"` // The object prefix StartAfter string `xml:"StartAfter"` // the input StartAfter ContinuationToken string `xml:"ContinuationToken"` // the input ContinuationToken MaxKeys int `xml:"MaxKeys"` // Max keys to return Delimiter string `xml:"Delimiter"` // The delimiter for grouping objects' name IsTruncated bool `xml:"IsTruncated"` // Flag indicates if all results are returned (when it's false) NextContinuationToken string `xml:"NextContinuationToken"` // The start point of the next NextContinuationToken Objects []ObjectProperties `xml:"Contents"` // Object list CommonPrefixes []string `xml:"CommonPrefixes>Prefix"` // You can think of commonprefixes as "folders" whose names end with the delimiter } type Object struct { Key string `json:"key"` Size int64 `json:"size"` ETag string `json:"etag"` } const ( MaxKeys = 1000 ) // https://www.alibabacloud.com/help/zh/oss/developer-reference/listobjectsv2 func (b *bucket) ListObjects(ctx context.Context, prefix, continuationToken string) ([]*Object, string, error) { q := make(url.Values) q.Set("list-type", "2") q.Set("max-keys", "1000") q.Set("prefix", prefix) if len(continuationToken) != 0 { q.Set("continuation-token", continuationToken) } qs := q.Encode() u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, RawQuery: qs, } req, err := b.NewRequestWithContext(ctx, "GET", u.String(), nil) if err != nil { return nil, "", err } resource := b.getResourceV2("", qs) b.signature(req, resource) resp, err := b.Do(req) if err != nil { return nil, "", err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return nil, "", readOssError(resp) } if resp.StatusCode < 200 || resp.StatusCode > 299 { return nil, "", readOssError(resp) } var result ListObjectsResultV2 if err := xml.NewDecoder(resp.Body).Decode(&result); err != nil { return nil, "", err } objects := make([]*Object, 0, len(result.Objects)) for _, o := range result.Objects { objects = append(objects, &Object{Key: o.Key, Size: o.Size, ETag: o.ETag}) } return objects, result.NextContinuationToken, nil } ================================================ FILE: modules/oss/misc.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "errors" "fmt" "io" "strconv" "strings" ) type RangeReader interface { io.Reader io.Closer Size() int64 Range() string } type rangeReader struct { io.Reader closer io.Closer size int64 hdr string } func (r *rangeReader) Close() error { if r.closer == nil { return nil } return r.closer.Close() } func (r *rangeReader) Size() int64 { return r.size } func (r *rangeReader) Range() string { return r.hdr } func NewRangeReader(rc io.ReadCloser, size int64, hdr string) RangeReader { return &rangeReader{Reader: rc, closer: rc, size: size, hdr: hdr} } // https://developer.mozilla.org/zh-CN/docs/Web/HTTP/Headers/Content-Range const ( unitBytes = "bytes" ) var ( ErrNoSizeFromRange = errors.New("no size from range") ) // Content-Range: -/ // Content-Range: -/* // Content-Range: */ func parseSizeFromRange(hdr string) (int64, error) { before, after, ok := strings.Cut(hdr, " ") if !ok { return 0, ErrNoSizeFromRange } if before != unitBytes { return 0, ErrNoSizeFromRange } sv := strings.FieldsFunc(after, func(r rune) bool { return r == '-' || r == '/' }) if len(sv) == 2 { if sv[0] != "*" { return 0, ErrNoSizeFromRange } size, err := strconv.ParseInt(sv[1], 10, 64) if err != nil { return 0, fmt.Errorf("parse size from range %s %w", hdr, err) } return size, nil } if len(sv) != 3 || sv[2] == "*" { return 0, ErrNoSizeFromRange } size, err := strconv.ParseInt(sv[2], 10, 64) if err != nil { return 0, fmt.Errorf("parse size from range %s %w", hdr, err) } return size, nil } ================================================ FILE: modules/oss/misc_test.go ================================================ package oss import ( "fmt" "os" "testing" ) func TestSizeFromRange(t *testing.T) { ss := []string{ "bytes 200-1000/67589", "bytes 100-900/344606", "bytes 100-900/*", "bytes */344606", "x", } for _, s := range ss { i, err := parseSizeFromRange(s) if err != nil { fmt.Fprintf(os.Stderr, "hdr: %s error: %v\n", s, err) continue } fmt.Fprintf(os.Stderr, "hdr: %s size %d \n", s, i) } } ================================================ FILE: modules/oss/multipart.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "bytes" "context" "encoding/xml" "errors" "fmt" "io" "math" "net/http" "net/url" "os" "sort" "time" ) // size constant defined const ( Byte int64 = 1 << (iota * 10) KiByte MiByte GiByte TiByte PiByte EiByte ) const ( MaxRecvBytes = 16 << 20 // 16M MaxSendBytes = math.MaxInt32 ) const ( // https://help.aliyun.com/document_detail/31850.html?spm=a2c4g.31847.0.0.71f013681jxCO0 minPartSize = 100 * 1024 maxPartSize = 5 * GiByte defaultPartSize = GiByte // MaxPartSize = 5 * 1024 * 1024 * 1024 // Max part size, 5GB // MinPartSize = 100 * 1024 // Min part size, 100KB ) // InitiateMultipartUploadResult defines result of InitiateMultipartUpload request type InitiateMultipartUploadResult struct { XMLName xml.Name `xml:"InitiateMultipartUploadResult"` Bucket string `xml:"Bucket"` // Bucket name Key string `xml:"Key"` // Object name to upload UploadID string `xml:"UploadId"` // Generated UploadId } // UploadPart defines the upload/copy part type UploadPart struct { XMLName xml.Name `xml:"Part"` PartNumber int `xml:"PartNumber"` // Part number ETag string `xml:"ETag"` // ETag value of the part's data } type completeMultipartUploadXML struct { XMLName xml.Name `xml:"CompleteMultipartUpload"` Part []UploadPart `xml:"Part"` } // CompleteMultipartUploadResult defines result object of CompleteMultipartUploadRequest type CompleteMultipartUploadResult struct { XMLName xml.Name `xml:"CompleteMultipartUploadResult"` Location string `xml:"Location"` // Object URL Bucket string `xml:"Bucket"` // Bucket name ETag string `xml:"ETag"` // Object ETag Key string `xml:"Key"` // Object name } type UploadParts []UploadPart func (slice UploadParts) Len() int { return len(slice) } func (slice UploadParts) Less(i, j int) bool { return slice[i].PartNumber < slice[j].PartNumber } func (slice UploadParts) Swap(i, j int) { slice[i], slice[j] = slice[j], slice[i] } type chunk struct { number int // chunk number offset int64 // chunk offset size int64 // chunk size } func calculateChunk(size, partSize int64) []chunk { if size%partSize < minPartSize { partSize -= minPartSize } N := int(size / partSize) chunks := make([]chunk, 0, N+1) var offset int64 for i := range N { chunks = append(chunks, chunk{number: i + 1, offset: offset, size: partSize}) offset += partSize } if offset < size { chunks = append(chunks, chunk{number: N + 1, offset: offset, size: size - offset}) } return chunks } /* oss-example multipart.data 0004B9894A22E5B1888A1E29F823**** */ // InitiateMultipartUpload // https://www.alibabacloud.com/help/en/object-storage-service/latest/initiatemultipartupload func (b *bucket) initiateMultipartUpload(ctx context.Context, resourcePath string, mime string) (*InitiateMultipartUploadResult, error) { q := "uploads" u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, RawQuery: q, } req, err := b.NewRequestWithContext(ctx, "POST", u.String(), nil) if err != nil { return nil, err } if len(mime) != 0 { req.Header.Set("Content-Type", mime) } resource := b.getResourceV2(resourcePath, q) b.signature(req, resource) resp, err := b.Do(req) if err != nil { return nil, err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return nil, os.ErrNotExist } if resp.StatusCode < 200 || resp.StatusCode > 299 { return nil, readOssError(resp) } var result InitiateMultipartUploadResult if err := xml.NewDecoder(resp.Body).Decode(&result); err != nil { return nil, err } return &result, nil } // https://www.alibabacloud.com/help/en/object-storage-service/latest/abortmultipartupload func (b *bucket) abortMultipartUpload(resourcePath string, mur *InitiateMultipartUploadResult) error { // NOTE: If the upload fails due to context cancellation, we cannot use the original context because that would cause our cleanup to fail. ctx, cancelCtx := context.WithTimeout(context.Background(), time.Second*10) defer cancelCtx() q := fmt.Sprintf("uploadId=%s", mur.UploadID) u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, RawQuery: q, } req, err := b.NewRequestWithContext(ctx, "DELETE", u.String(), nil) if err != nil { return err } resource := b.getResourceV2(resourcePath, q) b.signature(req, resource) resp, err := b.Do(req) if err != nil { return err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return readOssError(resp) } if resp.StatusCode < 200 || resp.StatusCode > 299 { return errors.New(resp.Status) } return nil } // https://www.alibabacloud.com/help/en/object-storage-service/latest/completemultipartupload func (b *bucket) completeMultipartUpload(ctx context.Context, resourcePath string, mur *InitiateMultipartUploadResult, uploadParts []UploadPart) error { sort.Sort(UploadParts(uploadParts)) q := fmt.Sprintf("uploadId=%s", mur.UploadID) u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, RawQuery: q, } input := &completeMultipartUploadXML{Part: uploadParts} body, err := xml.Marshal(input) if err != nil { return err } req, err := b.NewRequestWithContext(ctx, "POST", u.String(), bytes.NewReader(body)) if err != nil { return err } resource := b.getResourceV2(resourcePath, q) b.signature(req, resource) resp, err := b.Do(req) if err != nil { return err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return readOssError(resp) } if resp.StatusCode < 200 || resp.StatusCode > 299 { return errors.New(resp.Status) } var result CompleteMultipartUploadResult if err := xml.NewDecoder(resp.Body).Decode(&result); err != nil { return err } return nil } // https://www.alibabacloud.com/help/en/object-storage-service/latest/uploadpart func (b *bucket) uploadPart(ctx context.Context, resourcePath string, reader io.Reader, mur *InitiateMultipartUploadResult, k chunk) (UploadPart, error) { result := UploadPart{PartNumber: k.number} q := fmt.Sprintf("partNumber=%d&uploadId=%s", k.number, mur.UploadID) u := &url.URL{ Scheme: b.scheme, Host: b.bucketEndpoint, Path: resourcePath, RawQuery: q, } req, err := b.NewRequestWithContext(ctx, "PUT", u.String(), reader) if err != nil { return result, err } resource := b.getResourceV2(resourcePath, q) b.signature(req, resource) resp, err := b.Do(req) if err != nil { return result, err } defer resp.Body.Close() // nolint if resp.StatusCode == http.StatusNotFound { return result, os.ErrNotExist } if resp.StatusCode < 200 || resp.StatusCode > 299 { return result, readOssError(resp) } result.ETag = resp.Header.Get("ETag") return result, nil } func (b *bucket) LinearUpload(ctx context.Context, resourcePath string, r io.Reader, size int64, mime string) error { if size < maxPartSize { return b.Put(ctx, resourcePath, r, mime) } chunks := calculateChunk(size, b.partSize) if len(chunks) < 2 { return fmt.Errorf("BUGS BAD CHUNK. size: %d, len(chunks): %d", size, len(chunks)) } mur, err := b.initiateMultipartUpload(ctx, resourcePath, mime) if err != nil { return err } parts := make([]UploadPart, len(chunks)) for i, k := range chunks { u, err := b.uploadPart(ctx, resourcePath, io.LimitReader(r, k.size), mur, k) if err != nil { _ = b.abortMultipartUpload(resourcePath, mur) return err } parts[i] = u } if err := b.completeMultipartUpload(ctx, resourcePath, mur, parts); err != nil { _ = b.abortMultipartUpload(resourcePath, mur) return fmt.Errorf("complete upload error: %w", err) } return nil } ================================================ FILE: modules/oss/oss.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "context" "io" "net" "net/http" "net/url" "strings" "time" ) const ( DefaultContentSha256 = "UNSIGNED-PAYLOAD" // for v4 signature OssContentSha256Key = "X-Oss-Content-Sha256" ) // PutObject https://help.aliyun.com/document_detail/31978.htm?spm=a2c4g.31948.0.0.3ec1f0355LA8x4#reference-l5p-ftw-tdb // GetObject https://help.aliyun.com/document_detail/31980.htm?spm=a2c4g.31948.0.0.3ec1f0355LA8x4#reference-ccf-rgd-5db // HeadObject https://help.aliyun.com/document_detail/31984.htm?spm=a2c4g.31948.0.0.3ec1f0355LA8x4#reference-bgh-cbw-wdb // GetObjectMeta https://help.aliyun.com/document_detail/31985.htm?spm=a2c4g.31948.0.0.3ec1f0355LA8x4#reference-sg4-k2w-wdb // DeleteObject https://help.aliyun.com/document_detail/31982.htm?spm=a2c4g.31948.0.0.3ec1f0355LA8x4#reference-iqc-mqv-wdb type Bucket interface { Stat(ctx context.Context, resourcePath string) (*Stat, error) Open(ctx context.Context, resourcePath string, start, length int64) (RangeReader, error) Delete(ctx context.Context, resourcePath string) error Put(ctx context.Context, resourcePath string, r io.Reader, mime string) error StartUpload(ctx context.Context, resourcePath, filePath string, mime string) error // LinearUpload: Aliyun oss currently has a 5GB file upload limit, so when the OSS object exceeds 5GB, we use the MultipartUpload mechanism to upload. However, // please note that due to network failures or other problems, large file uploads are prone to failure, and LFS is currently not working well. scheme to solve this problem. LinearUpload(ctx context.Context, resourcePath string, r io.Reader, size int64, mime string) error DeleteMultipleObjects(ctx context.Context, objectKeys []string) error ListObjects(ctx context.Context, prefix, continuationToken string) ([]*Object, string, error) Share(ctx context.Context, resourcePath string, expiresAt int64) string } var ( _ Bucket = &bucket{} ) const ( defaultConnTimeout = time.Second * 60 defaultReadWriteTimeout = time.Second * 120 defaultIdleConnTimeout = time.Second * 100 defaultResponseHeaderTimeout = time.Second * 120 defaultMaxIdleConns = 100 defaultMaxIdleConnsPerHost = 100 ) type bucket struct { scheme string host string name string accessKeyID string // AccessId accessKeySecret string // AccessKey bucketEndpoint string sharedScheme string sharedBucketEndpoint string product string region string partSize int64 // upload file multipart size *http.Client } type NewBucketOptions struct { Endpoint string SharedEndpoint string Bucket string AccessKeyID string AccessKeySecret string Product string Region string PartSize int64 } func NewBucket(opts *NewBucketOptions) (Bucket, error) { endpoint := opts.Endpoint if !strings.Contains(endpoint, "://") { endpoint = "http://" + endpoint } u, err := url.Parse(endpoint) if err != nil { return nil, err } dialer := net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, } b := &bucket{ scheme: u.Scheme, host: u.Host, name: opts.Bucket, accessKeyID: opts.AccessKeyID, accessKeySecret: opts.AccessKeySecret, bucketEndpoint: opts.Bucket + "." + u.Host, product: opts.Product, region: opts.Region, partSize: opts.PartSize, Client: &http.Client{ Transport: &http.Transport{ Proxy: http.ProxyFromEnvironment, DialContext: dialer.DialContext, ForceAttemptHTTP2: true, MaxIdleConns: defaultMaxIdleConns, MaxIdleConnsPerHost: defaultMaxIdleConnsPerHost, IdleConnTimeout: defaultIdleConnTimeout, }, }} if b.partSize <= 0 { b.partSize = defaultPartSize } if len(opts.SharedEndpoint) == 0 { b.sharedScheme = b.scheme b.sharedBucketEndpoint = b.bucketEndpoint return b, nil } sharedEndpoint := opts.SharedEndpoint if !strings.Contains(sharedEndpoint, "://") { sharedEndpoint = "http://" + sharedEndpoint } sharedURL, err := url.Parse(sharedEndpoint) if err != nil { return nil, err } b.sharedScheme = sharedURL.Scheme b.sharedBucketEndpoint = opts.Bucket + "." + sharedURL.Host return b, nil } func (b *bucket) NewRequestWithContext(ctx context.Context, method string, url string, body io.Reader) (*http.Request, error) { req, err := http.NewRequestWithContext(ctx, method, url, body) if err != nil { return nil, err } req.Header.Set("User-Agent", "HugeSCM/1.0") return req, nil } type Stat struct { Size int64 Mime string Crc64 string } ================================================ FILE: modules/oss/s3.example ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "context" "fmt" "io" "os" "time" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3/types" ) type s3Bucket struct { Bucket client *s3.Client pc *s3.PresignClient bucketName string partSize int64 } func NewS3Bucket(ctx context.Context, s3Region, s3AccessKeyID, s3AccessKeySecret, s3BucketName string, partSize int64) (Bucket, error) { cfg, err := config.LoadDefaultConfig(ctx, config.WithRegion(s3Region), config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(s3AccessKeyID, s3AccessKeySecret, ""))) if err != nil { return nil, err } if partSize <= minPartSize { partSize = defaultPartSize } // Create an Amazon S3 service client client := s3.NewFromConfig(cfg) return &s3Bucket{client: client, pc: s3.NewPresignClient(client), bucketName: s3BucketName, partSize: partSize}, nil } func (b *s3Bucket) Stat(ctx context.Context, resourcePath string) (*Stat, error) { o, err := b.client.HeadObject(ctx, &s3.HeadObjectInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), }) if err != nil { return nil, err } return &Stat{ Size: aws.ToInt64(o.ContentLength), Mime: aws.ToString(o.ContentType), }, nil } func (b *s3Bucket) checkSize(ctx context.Context, resourcePath string, o *s3.GetObjectOutput) (int64, error) { if rangeHdr := aws.ToString(o.ContentRange); len(rangeHdr) != 0 { if size, err := parseSizeFromRange(rangeHdr); err == nil { return size, nil } si, err := b.Stat(ctx, resourcePath) if err != nil { return 0, err } return si.Size, nil } return aws.ToInt64(o.ContentLength), nil } func (b *s3Bucket) Open(ctx context.Context, resourcePath string, start, length int64) (RangeReader, error) { var awsRange *string switch { case start < 0: awsRange = aws.String(fmt.Sprintf("bytes=%d", start)) case start >= 0 && length > 0: awsRange = aws.String(fmt.Sprintf("bytes=%d-%d", start, start+length-1)) case start > 0: awsRange = aws.String(fmt.Sprintf("bytes=%d-", start)) default: // NO RANGE } o, err := b.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), Range: awsRange, }) if err != nil { return nil, err } size, err := b.checkSize(ctx, resourcePath, o) if err != nil { _ = o.Body.Close() return nil, err } return NewRangeReader(o.Body, size, aws.ToString(o.ContentRange)), nil } func (b *s3Bucket) Delete(ctx context.Context, resourcePath string) error { _, err := b.client.DeleteObject(ctx, &s3.DeleteObjectInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), }) return err } func (b *s3Bucket) Put(ctx context.Context, resourcePath string, r io.Reader, mime string) error { _, err := b.client.PutObject(ctx, &s3.PutObjectInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), ContentType: aws.String(mime), Body: r, }) if err != nil { return err } return nil } func (b *s3Bucket) upload(ctx context.Context, resourcePath, filePath string, mime string) error { fd, err := os.Open(filePath) if err != nil { return err } defer fd.Close() // nolint return b.Put(ctx, resourcePath, fd, mime) } func (b *s3Bucket) uploadFilePart(ctx context.Context, resourcePath string, filePath string, mur *s3.CreateMultipartUploadOutput, k chunk) (types.CompletedPart, error) { result := types.CompletedPart{PartNumber: aws.Int32(int32(k.number))} fd, err := os.Open(filePath) if err != nil { return result, err } defer fd.Close() // nolint if _, err := fd.Seek(k.offset, io.SeekStart); err != nil { return result, err } return b.uploadPart(ctx, resourcePath, io.LimitReader(fd, k.size), mur, k) } func (b *s3Bucket) uploadPart(ctx context.Context, resourcePath string, reader io.Reader, mur *s3.CreateMultipartUploadOutput, k chunk) (types.CompletedPart, error) { result := types.CompletedPart{PartNumber: aws.Int32(int32(k.number))} o, err := b.client.UploadPart(ctx, &s3.UploadPartInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), UploadId: mur.UploadId, Body: reader, }) if err != nil { return result, err } result.ETag = o.ETag return result, nil } func (b *s3Bucket) StartUpload(ctx context.Context, resourcePath, filePath string, mime string) error { si, err := os.Stat(filePath) if err != nil { return fmt.Errorf("stat file error: %w", err) } size := si.Size() if size <= b.partSize { return b.upload(ctx, resourcePath, filePath, mime) } chunks := calculateChunk(size, b.partSize) if len(chunks) < 2 { return fmt.Errorf("BUGS BAD CHUNK. size: %d, len(chunks): %d", size, len(chunks)) } mur, err := b.client.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), ContentType: aws.String(mime), }) if err != nil { return err } newCtx, cancelCtx := context.WithCancel(ctx) // defer cancelCtx() results := make(chan types.CompletedPart, len(chunks)) failed := make(chan error) for i := 0; i < len(chunks); i++ { go func(k chunk) { u, err := b.uploadFilePart(newCtx, resourcePath, filePath, mur, k) if err != nil { failed <- fmt.Errorf("upload part-%d error: %w", k.number, err) return } results <- u }(chunks[i]) } parts := make([]types.CompletedPart, len(chunks)) completed := 0 for completed < len(chunks) { select { case part := <-results: completed++ parts[aws.ToInt32(part.PartNumber)-1] = part case err := <-failed: cancelCtx() _, _ = b.client.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), UploadId: mur.UploadId, }) return err } if completed >= len(chunks) { break } } cancelCtx() if _, err := b.client.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), MultipartUpload: &types.CompletedMultipartUpload{ Parts: parts, }, UploadId: mur.UploadId, }); err != nil { _, _ = b.client.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), UploadId: mur.UploadId, }) return fmt.Errorf("complete upload error: %w", err) } return nil } func (b *s3Bucket) LinearUpload(ctx context.Context, resourcePath string, r io.Reader, size int64, mime string) error { if size < maxPartSize { return b.Put(ctx, resourcePath, r, mime) } chunks := calculateChunk(size, b.partSize) if len(chunks) < 2 { return fmt.Errorf("BUGS BAD CHUNK. size: %d, len(chunks): %d", size, len(chunks)) } mur, err := b.client.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), ContentType: aws.String(mime), }) if err != nil { return err } parts := make([]types.CompletedPart, len(chunks)) for i, k := range chunks { u, err := b.uploadPart(ctx, resourcePath, io.LimitReader(r, k.size), mur, k) if err != nil { _, _ = b.client.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), UploadId: mur.UploadId, }) return err } parts[i] = u } if _, err := b.client.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), MultipartUpload: &types.CompletedMultipartUpload{ Parts: parts, }, UploadId: mur.UploadId, }); err != nil { _, _ = b.client.AbortMultipartUpload(ctx, &s3.AbortMultipartUploadInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), UploadId: mur.UploadId, }) return fmt.Errorf("complete upload error: %w", err) } return nil } func (b *s3Bucket) DeleteMultipleObjects(ctx context.Context, objectKeys []string) error { d := &types.Delete{} for _, o := range objectKeys { d.Objects = append(d.Objects, types.ObjectIdentifier{ Key: aws.String(o), }) } _, err := b.client.DeleteObjects(ctx, &s3.DeleteObjectsInput{ Bucket: aws.String(b.bucketName), Delete: d, }) return err } func (b *s3Bucket) ListObjects(ctx context.Context, prefix, continuationToken string) ([]*Object, string, error) { in := &s3.ListObjectsV2Input{ Bucket: aws.String(b.bucketName), Prefix: aws.String(prefix), MaxKeys: aws.Int32(1000), } if len(continuationToken) != 0 { in.ContinuationToken = aws.String(continuationToken) } out, err := b.client.ListObjectsV2(ctx, in) if err != nil { return nil, "", err } objects := make([]*Object, 0, len(out.Contents)) for _, o := range out.Contents { objects = append(objects, &Object{Key: aws.ToString(o.Key), Size: aws.ToInt64(o.Size), ETag: aws.ToString(o.ETag)}) } return objects, aws.ToString(out.ContinuationToken), nil } func (b *s3Bucket) Share(ctx context.Context, resourcePath string, expiresAt int64) string { o, err := b.pc.PresignGetObject(ctx, &s3.GetObjectInput{ Bucket: aws.String(b.bucketName), Key: aws.String(resourcePath), }, func(po *s3.PresignOptions) { po.Expires = time.Second * time.Duration(expiresAt) }) if err != nil { return "" } return o.URL } ================================================ FILE: modules/oss/signature.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "bytes" "crypto/hmac" "crypto/sha256" "encoding/base64" "fmt" "net/http" "net/url" "sort" "strings" "time" ) // https://help.aliyun.com/document_detail/386432.htm?spm=a2c4g.475520.0.0.2c8bc7c3AkNfW5 // https://help.aliyun.com/document_detail/31951.html?spm=a2c4g.31955.4.5.27b86cf05lSqjf&scm=20140722.H_31951._.ID_31951-OR_rec-V_1 // Authorization = "OSS " + AccessKeyId + ":" + Signature // Signature = base64(hmac-sha1(AccessKeySecret, // VERB + "\n" // + Content-MD5 + "\n" // + Content-Type + "\n" // + Date + "\n" // + CanonicalizedOSSHeaders // + CanonicalizedResource)) // CanonicalizedResource // https://help.aliyun.com/document_detail/31951.html?spm=a2c4g.31955.4.5.27b86cf05lSqjf&scm=20140722.H_31951._.ID_31951-OR_rec-V_1#section-rvv-dx2-xdb // CanonicalizedOSSHeaders // https://help.aliyun.com/document_detail/31951.html?spm=a2c4g.31955.4.5.27b86cf05lSqjf&scm=20140722.H_31951._.ID_31951-OR_rec-V_1#section-w2k-sw2-xdb // headerSorter defines the key-value structure for storing the sorted data in signHeader. type headerSorter struct { Keys []string Vals []string } // newHeaderSorter is an additional function for function SignHeader. func newHeaderSorter(m map[string]string) *headerSorter { hs := &headerSorter{ Keys: make([]string, 0, len(m)), Vals: make([]string, 0, len(m)), } for k, v := range m { hs.Keys = append(hs.Keys, k) hs.Vals = append(hs.Vals, v) } return hs } // Sort is an additional function for function SignHeader. func (hs *headerSorter) Sort() { sort.Sort(hs) } // Len is an additional function for function SignHeader. func (hs *headerSorter) Len() int { return len(hs.Vals) } // Less is an additional function for function SignHeader. func (hs *headerSorter) Less(i, j int) bool { return bytes.Compare([]byte(hs.Keys[i]), []byte(hs.Keys[j])) < 0 } // Swap is an additional function for function SignHeader. func (hs *headerSorter) Swap(i, j int) { hs.Vals[i], hs.Vals[j] = hs.Vals[j], hs.Vals[i] hs.Keys[i], hs.Keys[j] = hs.Keys[j], hs.Keys[i] } // NewSignature creates signature for string following Aliyun rules func NewSignature(content, accessKeySecret string) string { // Crypto by HMAC-SHA256 h := hmac.New(sha256.New, []byte(accessKeySecret)) h.Write([]byte(content)) return base64.StdEncoding.EncodeToString(h.Sum(nil)) } // additionalList, _ := conn.getAdditionalHeaderKeys(req) // if len(additionalList) > 0 { // authorizationFmt := "OSS2 AccessKeyId:%v,AdditionalHeaders:%v,Signature:%v" // additionnalHeadersStr := strings.Join(additionalList, ";") // authorizationStr = fmt.Sprintf(authorizationFmt, akIf.GetAccessKeyID(), additionnalHeadersStr, conn.getSignedStr(req, canonicalizedResource, akIf.GetAccessKeySecret())) // } else { // authorizationFmt := "OSS2 AccessKeyId:%v,Signature:%v" // authorizationStr = fmt.Sprintf(authorizationFmt, akIf.GetAccessKeyID(), conn.getSignedStr(req, canonicalizedResource, akIf.GetAccessKeySecret())) // } func (b *bucket) signature(req *http.Request, canonicalizedResource string) { req.Header.Set("x-oss-signature-version", "OSS2") now := time.Now().UTC() req.Header.Set("Date", now.Format(http.TimeFormat)) // Find out the "x-oss-"'s address in header of the request headers := make(map[string]string) for k, v := range req.Header { k = strings.ToLower(k) if strings.HasPrefix(k, "x-oss-") { headers[k] = v[0] } } hs := newHeaderSorter(headers) hs.Sort() var cw strings.Builder for i := range hs.Keys { _, _ = cw.WriteString(hs.Keys[i]) _ = cw.WriteByte(':') _, _ = cw.WriteString(hs.Vals[i]) _ = cw.WriteByte('\n') } date := req.Header.Get("Date") contentType := req.Header.Get("Content-Type") contentMd5 := req.Header.Get("Content-MD5") h := hmac.New(sha256.New, []byte(b.accessKeySecret)) signedText := req.Method + "\n" + contentMd5 + "\n" + contentType + "\n" + date + "\n" + cw.String() + "\n" + canonicalizedResource _, _ = h.Write([]byte(signedText)) signed := base64.StdEncoding.EncodeToString(h.Sum(nil)) authorizationStr := fmt.Sprintf("OSS2 AccessKeyId:%v,Signature:%v", b.accessKeyID, signed) req.Header.Set("Authorization", authorizationStr) } func (b *bucket) getResourceV2(objectName, subResource string) string { if subResource != "" { subResource = "?" + subResource } return url.QueryEscape("/"+b.name+"/") + strings.ReplaceAll(url.QueryEscape(objectName), "+", "%20") + subResource } ================================================ FILE: modules/oss/upload.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package oss import ( "context" "fmt" "io" "os" ) // upload without multipart func (b *bucket) upload(ctx context.Context, resourcePath, filePath string, mime string) error { fd, err := os.Open(filePath) if err != nil { return err } defer fd.Close() // nolint return b.Put(ctx, resourcePath, fd, mime) } func (b *bucket) uploadFilePart(ctx context.Context, resourcePath string, filePath string, mur *InitiateMultipartUploadResult, k chunk) (UploadPart, error) { result := UploadPart{PartNumber: k.number} fd, err := os.Open(filePath) if err != nil { return result, err } defer fd.Close() // nolint if _, err := fd.Seek(k.offset, io.SeekStart); err != nil { return result, err } return b.uploadPart(ctx, resourcePath, io.LimitReader(fd, k.size), mur, k) } func (b *bucket) StartUpload(ctx context.Context, resourcePath, filePath string, mime string) error { si, err := os.Stat(filePath) if err != nil { return fmt.Errorf("stat file error: %w", err) } size := si.Size() if size <= b.partSize { return b.upload(ctx, resourcePath, filePath, mime) } chunks := calculateChunk(size, b.partSize) if len(chunks) < 2 { return fmt.Errorf("BUGS BAD CHUNK. size: %d, len(chunks): %d", size, len(chunks)) } mur, err := b.initiateMultipartUpload(ctx, resourcePath, mime) if err != nil { return err } newCtx, cancelCtx := context.WithCancel(ctx) // defer cancelCtx() results := make(chan UploadPart, len(chunks)) failed := make(chan error) for i := range chunks { go func(k chunk) { u, err := b.uploadFilePart(newCtx, resourcePath, filePath, mur, k) if err != nil { failed <- fmt.Errorf("upload part-%d error: %w", k.number, err) return } results <- u }(chunks[i]) } parts := make([]UploadPart, len(chunks)) completed := 0 for completed < len(chunks) { select { case part := <-results: completed++ parts[part.PartNumber-1] = part case err := <-failed: cancelCtx() _ = b.abortMultipartUpload(resourcePath, mur) return err } if completed >= len(chunks) { break } } cancelCtx() if err := b.completeMultipartUpload(ctx, resourcePath, mur, parts); err != nil { _ = b.abortMultipartUpload(resourcePath, mur) return fmt.Errorf("complete upload error: %w", err) } return nil } ================================================ FILE: modules/patchview/highlight.go ================================================ package patchview import ( "fmt" "io" "strings" "sync" "charm.land/lipgloss/v2" "github.com/alecthomas/chroma/v2" "github.com/alecthomas/chroma/v2/lexers" "github.com/alecthomas/chroma/v2/styles" "github.com/charmbracelet/x/exp/charmtone" "github.com/zeebo/xxh3" ) const ( // defaultCacheSize is the default cache size. defaultCacheSize = 1000 // maxSourceLenForCache is the maximum source length allowed for caching. maxSourceLenForCache = 10000 // tabSpaces is the number of spaces to replace tabs with. tabSpaces = " " // 4 spaces ) // lruCache is an LRU cache implementation. type lruCache struct { mu sync.Mutex items map[uint64]*lruItem head *lruItem tail *lruItem capacity int } type lruItem struct { key uint64 value string prev *lruItem next *lruItem } func newLRUCache(capacity int) *lruCache { return &lruCache{ items: make(map[uint64]*lruItem), capacity: capacity, } } func (c *lruCache) get(key uint64) (string, bool) { c.mu.Lock() defer c.mu.Unlock() if item, ok := c.items[key]; ok { c.moveToFrontLocked(item) return item.value, true } return "", false } func (c *lruCache) set(key uint64, value string) { c.mu.Lock() defer c.mu.Unlock() if item, ok := c.items[key]; ok { item.value = value c.moveToFrontLocked(item) return } item := &lruItem{key: key, value: value} c.items[key] = item if c.head == nil { c.head = item c.tail = item } else { item.next = c.head c.head.prev = item c.head = item } if c.capacity > 0 && len(c.items) > c.capacity { c.evictTailLocked() } } func (c *lruCache) moveToFrontLocked(item *lruItem) { if item == c.head { return } if item.prev != nil { item.prev.next = item.next } if item.next != nil { item.next.prev = item.prev } if item == c.tail { c.tail = item.prev } item.prev = nil item.next = c.head c.head.prev = item c.head = item } func (c *lruCache) evictTailLocked() { if c.tail == nil { return } delete(c.items, c.tail.key) if c.tail.prev != nil { c.tail.prev.next = nil c.tail = c.tail.prev } else { c.head = nil c.tail = nil } } func (c *lruCache) clear() { c.mu.Lock() defer c.mu.Unlock() c.items = make(map[uint64]*lruItem) c.head = nil c.tail = nil } // SyntaxHighlighter is a syntax highlighter. type SyntaxHighlighter struct { style *chroma.Style cachedLexer chroma.Lexer cachedFilename string cache *lruCache cacheEnabled bool } // NewSyntaxHighlighter creates a syntax highlighter. // filename: used for language detection // isDark: whether the background is dark func NewSyntaxHighlighter(filename string, isDark bool) *SyntaxHighlighter { h := &SyntaxHighlighter{ style: getDefaultChromaStyle(isDark), cache: newLRUCache(defaultCacheSize), cacheEnabled: true, } // Warm up lexer if filename != "" { h.cachedLexer = lexers.Match(filename) if h.cachedLexer != nil { h.cachedLexer = chroma.Coalesce(h.cachedLexer) h.cachedFilename = filename } } return h } // Highlight highlights code. // source: original code // bgColor: background color (hex format, e.g. "#303a30") func (h *SyntaxHighlighter) Highlight(source, bgColor string) string { if h.style == nil { return source } // Preprocess: sanitize line (replace tabs, escape control chars) source = sanitizeLine(source) // Check cache if h.cacheEnabled && len(source) <= maxSourceLenForCache { cacheKey := h.createCacheKey(source, h.cachedFilename, bgColor) if cached, ok := h.cache.get(cacheKey); ok { return cached } result := h.doHighlight(source, bgColor) h.cache.set(cacheKey, result) return result } return h.doHighlight(source, bgColor) } // sanitizeLine processes a line of code: // - Replaces tabs with spaces // - Replaces control characters with Unicode Control Picture characters func sanitizeLine(s string) string { var result strings.Builder result.Grow(len(s) + len(s)/4) // extra space for tab expansion for _, r := range s { switch { case r == '\t': result.WriteString(tabSpaces) case r == 0x7F: result.WriteRune('\u2421') // DEL -> ␡ case r >= 0x00 && r <= 0x1F: result.WriteRune('\u2400' + r) // Control chars -> Unicode Control Picture default: result.WriteRune(r) } } return result.String() } // doHighlight performs actual highlighting. func (h *SyntaxHighlighter) doHighlight(source, bgColor string) string { lexer := h.cachedLexer if lexer == nil { return source } it, err := lexer.Tokenise(nil, source) if err != nil { return source } var b strings.Builder formatter := newDiffFormatter(bgColor) if err := formatter.Format(&b, h.style, it); err != nil { return source } return b.String() } // createCacheKey creates a cache key. func (h *SyntaxHighlighter) createCacheKey(source, filename, bgColor string) uint64 { hh := xxh3.New() _, _ = hh.WriteString(filename) _, _ = hh.Write([]byte{0}) _, _ = hh.WriteString(bgColor) _, _ = hh.Write([]byte{0}) _, _ = hh.WriteString(source) return hh.Sum64() } // ClearCache clears the cache. func (h *SyntaxHighlighter) ClearCache() { h.cache.clear() } // Enabled returns whether the highlighter is enabled. func (h *SyntaxHighlighter) Enabled() bool { return h.style != nil } // diffFormatter is a Chroma formatter that forces background color. type diffFormatter struct { bgColor string } func newDiffFormatter(bgColor string) *diffFormatter { return &diffFormatter{ bgColor: bgColor, } } func (f *diffFormatter) Format(w io.Writer, style *chroma.Style, it chroma.Iterator) error { for token := it(); token != chroma.EOF; token = it() { value := strings.TrimRight(token.Value, "\n") if value == "" { continue } entry := style.Get(token.Type) if entry.IsZero() { _, _ = fmt.Fprint(w, value) continue } s := lipgloss.NewStyle().Background(lipgloss.Color(f.bgColor)) if entry.Bold == chroma.Yes { s = s.Bold(true) } if entry.Underline == chroma.Yes { s = s.Underline(true) } if entry.Italic == chroma.Yes { s = s.Italic(true) } if entry.Colour.IsSet() { s = s.Foreground(lipgloss.Color(entry.Colour.String())) } _, _ = fmt.Fprint(w, s.Render(value)) } return nil } // getDefaultChromaStyle returns a theme suitable for terminal background. // Dark theme uses charmtone palette. // Light theme uses catppuccin-latte. func getDefaultChromaStyle(isDark bool) *chroma.Style { if isDark { // Dark theme: charmtone palette return chroma.MustNewStyle("zeta-charmtone-dark", chroma.StyleEntries{ chroma.Text: charmtone.Smoke.Hex() + " bg:" + charmtone.Charcoal.Hex(), chroma.Error: charmtone.Butter.Hex() + " bg:" + charmtone.Sriracha.Hex(), chroma.Comment: charmtone.Oyster.Hex(), chroma.CommentPreproc: charmtone.Bengal.Hex(), chroma.Keyword: charmtone.Malibu.Hex(), chroma.KeywordReserved: charmtone.Pony.Hex(), chroma.KeywordNamespace: charmtone.Pony.Hex(), chroma.KeywordType: charmtone.Guppy.Hex(), chroma.Operator: charmtone.Salmon.Hex(), chroma.Punctuation: charmtone.Zest.Hex(), chroma.Name: charmtone.Smoke.Hex(), chroma.NameBuiltin: charmtone.Cheeky.Hex(), chroma.NameTag: charmtone.Mauve.Hex(), chroma.NameAttribute: charmtone.Hazy.Hex(), chroma.NameClass: "underline bold " + charmtone.Salt.Hex(), chroma.NameConstant: charmtone.Salt.Hex(), chroma.NameDecorator: charmtone.Citron.Hex(), chroma.NameException: charmtone.Coral.Hex(), chroma.NameFunction: charmtone.Guac.Hex(), chroma.NameOther: charmtone.Smoke.Hex(), chroma.Literal: charmtone.Smoke.Hex(), chroma.LiteralNumber: charmtone.Julep.Hex(), chroma.LiteralDate: charmtone.Salt.Hex(), chroma.LiteralString: charmtone.Cumin.Hex(), chroma.LiteralStringEscape: charmtone.Bok.Hex(), chroma.GenericDeleted: charmtone.Coral.Hex(), chroma.GenericEmph: "italic", chroma.GenericInserted: charmtone.Guac.Hex(), chroma.GenericStrong: "bold", chroma.GenericSubheading: charmtone.Squid.Hex(), chroma.Background: "bg:" + charmtone.Charcoal.Hex(), }) } // Light theme: catppuccin-latte return styles.Get("catppuccin-latte") } ================================================ FILE: modules/patchview/highlight_test.go ================================================ package patchview import ( "testing" ) func TestSanitizeLine(t *testing.T) { tests := []struct { name string input string expected string }{ { name: "normal text", input: "hello world", expected: "hello world", }, { name: "tab replacement", input: "hello\tworld", expected: "hello world", // 4 spaces }, { name: "control characters", input: "hello\x00world", expected: "hello\u2400world", // NUL -> ␀ }, { name: "DEL character", input: "hello\x7fworld", expected: "hello\u2421world", // DEL -> ␡ }, { name: "mixed content", input: "\t\x00\x1b", expected: " \u2400\u241b", // tab -> 4 spaces, NUL -> ␀, ESC -> ␛ }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := sanitizeLine(tt.input) if result != tt.expected { t.Errorf("sanitizeLine(%q) = %q, want %q", tt.input, result, tt.expected) } }) } } ================================================ FILE: modules/patchview/renderer.go ================================================ package patchview import ( "fmt" "strings" "charm.land/lipgloss/v2" "github.com/charmbracelet/x/ansi" "github.com/antgroup/hugescm/modules/diferenco" ) const ( minCodeWidth = 10 ) // PatchRenderer renders a diferenco.Patch for display. // It handles line numbers, syntax highlighting, and horizontal scrolling. type PatchRenderer struct { patch *diferenco.Patch style PatchViewStyle width int height int xOffset int yOffset int // Precomputed values totalLines int hunkLineOffsets []int beforeNumDigits int afterNumDigits int // Options lineNumbers bool // Syntax highlighting highlighter *SyntaxHighlighter syntaxHighlight bool isDark bool } // NewPatchRenderer creates a new PatchRenderer with default style. func NewPatchRenderer() *PatchRenderer { return &PatchRenderer{ style: DefaultStyle(), lineNumbers: true, syntaxHighlight: true, isDark: hasDarkBackground(), } } // SetPatch sets the patch to render. func (r *PatchRenderer) SetPatch(p *diferenco.Patch) { r.patch = p r.xOffset = 0 r.yOffset = 0 r.computeMetadata() r.initHighlighter() } // SetSize sets the rendering area size. func (r *PatchRenderer) SetSize(width, height int) { r.width = width r.height = height } // SetStyle sets the style for rendering. func (r *PatchRenderer) SetStyle(style PatchViewStyle) { r.style = style } // SetLineNumbers sets whether to show line numbers. func (r *PatchRenderer) SetLineNumbers(enabled bool) { r.lineNumbers = enabled } // SetSyntaxHighlight sets whether to enable syntax highlighting. func (r *PatchRenderer) SetSyntaxHighlight(enabled bool) { r.syntaxHighlight = enabled if !enabled { r.highlighter = nil } } // SetDarkBackground sets the terminal background mode. func (r *PatchRenderer) SetDarkBackground(dark bool) { r.isDark = dark r.initHighlighter() } // initHighlighter initializes the syntax highlighter. func (r *PatchRenderer) initHighlighter() { if r.patch == nil || !r.syntaxHighlight { r.highlighter = nil return } filename := r.patch.Name() if filename == "" { r.highlighter = nil return } r.highlighter = NewSyntaxHighlighter(filename, r.isDark) } // SetYOffset sets the vertical scroll offset. func (r *PatchRenderer) SetYOffset(offset int) { r.yOffset = max(0, min(offset, r.maxYOffset())) } // SetXOffset sets the horizontal scroll offset. // Note: Unlike SetYOffset, there's no upper bound because line widths vary // and may contain ANSI escape sequences. The render function handles // out-of-bounds offsets gracefully by showing empty content. func (r *PatchRenderer) SetXOffset(offset int) { r.xOffset = max(0, offset) } // YOffset returns the current vertical offset. func (r *PatchRenderer) YOffset() int { return r.yOffset } // XOffset returns the current horizontal offset. func (r *PatchRenderer) XOffset() int { return r.xOffset } // TotalLines returns the total number of lines in the patch. func (r *PatchRenderer) TotalLines() int { return r.totalLines } // HunkOffsets returns the starting line offset for each hunk. // This is used for [ and ] navigation between hunks. func (r *PatchRenderer) HunkOffsets() []int { return r.hunkLineOffsets } // Render renders the patch content for the current viewport. func (r *PatchRenderer) Render() string { if r.patch == nil || r.width <= 0 || r.height <= 0 { return "" } if r.patch.IsBinary { return r.style.DiffStyle.FileName.Render("Binary file differs") } if len(r.patch.Hunks) == 0 { return r.style.DiffStyle.FileMeta.Render("No changes") } showLineNums := r.shouldShowLineNumbers() codeW := r.codeWidth() var sb strings.Builder sb.Grow(r.width * r.height) lineIdx := 0 printed := 0 for _, hunk := range r.patch.Hunks { // Hunk header line if lineIdx >= r.yOffset && printed < r.height { line := r.renderHunkHeader(hunk, showLineNums, codeW) if printed > 0 { sb.WriteString("\n") } sb.WriteString(line) printed++ } lineIdx++ if printed >= r.height { break } // Hunk content lines beforeLine := hunk.FromLine afterLine := hunk.ToLine for _, l := range hunk.Lines { if lineIdx >= r.yOffset && printed < r.height { line := r.renderLine(l, beforeLine, afterLine, showLineNums, codeW) if printed > 0 { sb.WriteString("\n") } sb.WriteString(line) printed++ } switch l.Kind { case diferenco.Delete: beforeLine++ case diferenco.Insert: afterLine++ default: beforeLine++ afterLine++ } lineIdx++ if printed >= r.height { break } } if printed >= r.height { break } } // Fill remaining lines for printed < r.height { if printed > 0 { sb.WriteString("\n") } sb.WriteString(r.renderEmptyLine(showLineNums, codeW)) printed++ } return sb.String() } // computeMetadata precomputes line counts and hunk offsets. func (r *PatchRenderer) computeMetadata() { if r.patch == nil || len(r.patch.Hunks) == 0 { r.totalLines = 0 r.hunkLineOffsets = nil r.beforeNumDigits = 1 r.afterNumDigits = 1 return } maxBefore, maxAfter := 0, 0 r.totalLines = 0 r.hunkLineOffsets = make([]int, 0, len(r.patch.Hunks)) for _, h := range r.patch.Hunks { r.hunkLineOffsets = append(r.hunkLineOffsets, r.totalLines) beforeLine := h.FromLine afterLine := h.ToLine for _, l := range h.Lines { switch l.Kind { case diferenco.Delete: beforeLine++ case diferenco.Insert: afterLine++ default: beforeLine++ afterLine++ } } maxBefore = max(maxBefore, beforeLine) maxAfter = max(maxAfter, afterLine) r.totalLines += 1 + len(h.Lines) // 1 for hunk header } r.beforeNumDigits = digitCount(maxBefore) r.afterNumDigits = digitCount(maxAfter) } // maxYOffset returns the maximum vertical scroll offset. func (r *PatchRenderer) maxYOffset() int { return max(0, r.totalLines-r.height) } // shouldShowLineNumbers determines if line numbers should be shown. func (r *PatchRenderer) shouldShowLineNumbers() bool { if !r.lineNumbers { return false } return r.width-r.lineNumWidth() >= minCodeWidth } // lineNumWidth returns the width needed for line numbers. func (r *PatchRenderer) lineNumWidth() int { if !r.lineNumbers { return 0 } // (before digits + padding*2) + (after digits + padding*2) return (r.beforeNumDigits + lineNumPadding*2) + (r.afterNumDigits + lineNumPadding*2) } // codeWidth returns the width available for code content. func (r *PatchRenderer) codeWidth() int { w := r.width - r.lineNumWidth() if w < minCodeWidth && r.lineNumbers { // Hide line numbers if width is insufficient return r.width } return max(w, 0) } // renderHunkHeader renders a hunk header line (@@ -1,3 +1,4 @@). func (r *PatchRenderer) renderHunkHeader(hunk *diferenco.Hunk, showLineNums bool, codeW int) string { style := &r.style.DiffStyle.DividerLine // Build hunk header with section fromCount := hunkFromCount(hunk) toCount := hunkToCount(hunk) header := formatHunkHeader(hunk.FromLine, fromCount, hunk.ToLine, toCount, hunk.Section) // Remove leading @@ if present headerContent := header if len(headerContent) > 2 && headerContent[:2] == "@@" { headerContent = headerContent[2:] } var sb strings.Builder if showLineNums { sb.WriteString(style.LineNumber.Render(pad("…", r.beforeNumDigits))) sb.WriteString(style.LineNumber.Render(pad("…", r.afterNumDigits))) } sb.WriteString(style.Code.Width(codeW).Render(headerContent)) return sb.String() } // renderLine renders a single diff line. func (r *PatchRenderer) renderLine(l diferenco.Line, beforeLine, afterLine int, showLineNums bool, codeW int) string { var style *LineStyle var sym string var beforeNum, afterNum string switch l.Kind { case diferenco.Insert: style = &r.style.DiffStyle.InsertLine sym = "+" beforeNum = pad(" ", r.beforeNumDigits) afterNum = pad(afterLine, r.afterNumDigits) case diferenco.Delete: style = &r.style.DiffStyle.DeleteLine sym = "-" beforeNum = pad(beforeLine, r.beforeNumDigits) afterNum = pad(" ", r.afterNumDigits) default: style = &r.style.DiffStyle.EqualLine sym = " " beforeNum = pad(beforeLine, r.beforeNumDigits) afterNum = pad(afterLine, r.afterNumDigits) } var sb strings.Builder // Line numbers with background if showLineNums { sb.WriteString(style.LineNumber.Render(beforeNum)) sb.WriteString(style.LineNumber.Render(afterNum)) } // Get original content and remove trailing newlines (\r\n or \n) content := strings.TrimRight(l.Content, "\r\n") // Apply syntax highlighting (on full code before adding symbol) if r.highlighter != nil && r.syntaxHighlight && content != "" { bgColor := extractBgColor(style.Code) content = r.highlighter.Highlight(content, bgColor) } // Build full content (symbol + content) fullContent := sym + " " + content // Apply horizontal scroll if r.xOffset > 0 && len(fullContent) > 0 { contentWidth := lipgloss.Width(fullContent) if contentWidth > r.xOffset { fullContent = ansi.TruncateLeftWc(fullContent, r.xOffset, "") } else { fullContent = "" } } // Truncate to fit width and render with background fill truncated := ansi.TruncateWc(fullContent, codeW, "") sb.WriteString(style.Code.Width(codeW).Render(truncated)) return sb.String() } // renderEmptyLine renders an empty line for padding. func (r *PatchRenderer) renderEmptyLine(showLineNums bool, codeW int) string { style := &r.style.DiffStyle.EqualLine var sb strings.Builder if showLineNums { blank := strings.Repeat(" ", r.beforeNumDigits) blankAfter := strings.Repeat(" ", r.afterNumDigits) sb.WriteString(style.LineNumber.Render(blank)) sb.WriteString(style.LineNumber.Render(blankAfter)) } // Use Width() to fill background color sb.WriteString(style.Code.Width(codeW).Render("")) return sb.String() } // hunkFromCount calculates the number of lines in hunk from source. func hunkFromCount(hunk *diferenco.Hunk) int { count := 0 for _, l := range hunk.Lines { if l.Kind != diferenco.Insert { count++ } } return count } // hunkToCount calculates the number of lines in hunk to target. func hunkToCount(hunk *diferenco.Hunk) int { count := 0 for _, l := range hunk.Lines { if l.Kind != diferenco.Delete { count++ } } return count } // formatHunkHeader formats a hunk header. func formatHunkHeader(fromLine, fromCount, toLine, toCount int, section string) string { var sb strings.Builder sb.WriteString("@@") sb.WriteString(formatHunkRange(fromLine, fromCount, "-")) sb.WriteString(formatHunkRange(toLine, toCount, "+")) sb.WriteString(" @@") if section != "" { sb.WriteString(" ") sb.WriteString(section) } return sb.String() } // formatHunkRange formats a hunk range like "-1,3" or "-1". func formatHunkRange(start, count int, prefix string) string { switch count { case 0: return fmt.Sprintf(" %s%d,0", prefix, start) case 1: return fmt.Sprintf(" %s%d", prefix, start) default: return fmt.Sprintf(" %s%d,%d", prefix, start, count) } } // digitCount returns the number of digits in n. func digitCount(n int) int { if n <= 0 { return 1 } count := 0 for n > 0 { count++ n /= 10 } return count } // pad left-pads a value to the target width (right-aligned). func pad(v any, width int) string { s := fmt.Sprintf("%v", v) w := ansi.StringWidth(s) if w >= width { return s } return strings.Repeat(" ", width-w) + s } ================================================ FILE: modules/patchview/status_bar.go ================================================ package patchview import ( "fmt" "strconv" "strings" "charm.land/lipgloss/v2" "github.com/charmbracelet/x/ansi" "github.com/antgroup/hugescm/modules/diferenco" ) // StatusBar is the interface for rendering a status bar in the patch view. type StatusBar interface { View(width int) string Height() int } // CursorSetter is an optional interface for StatusBar implementations // that need to be notified when the cursor changes. type CursorSetter interface { SetCursor(idx int) } // PatchesSetter is an optional interface for StatusBar implementations // that need access to the patches data. type PatchesSetter interface { SetPatches(patches []*diferenco.Patch) } // DefaultStatusBar is the default status bar implementation. // It displays: status + separator + path + stats + file count. type DefaultStatusBar struct { patches []*diferenco.Patch cursor int style PatchViewStyle } // NewDefaultStatusBar creates a new DefaultStatusBar. func NewDefaultStatusBar() *DefaultStatusBar { return &DefaultStatusBar{ style: DefaultStyle(), } } // SetStyle sets the style for the status bar. func (s *DefaultStatusBar) SetStyle(style PatchViewStyle) { s.style = style } // SetPatches sets the patches data. func (s *DefaultStatusBar) SetPatches(patches []*diferenco.Patch) { s.patches = patches } // SetCursor sets the current cursor position. func (s *DefaultStatusBar) SetCursor(idx int) { s.cursor = idx } // Height returns the height of the status bar (always 1). func (s *DefaultStatusBar) Height() int { return 1 } // View renders the status bar. func (s *DefaultStatusBar) View(width int) string { if len(s.patches) == 0 { return s.style.HeaderBg.Width(width).Render(" No changes") } p := s.patches[s.cursor] stat := p.Stat() ps := patchStatus(p) // Status indicator status := s.statusStyle(ps).Render(ps) // Stats var stats string switch { case stat.Addition > 0 && stat.Deletion > 0: stats = s.style.Addition.Render("+"+strconv.Itoa(stat.Addition)) + " " + s.style.Deletion.Render("-"+strconv.Itoa(stat.Deletion)) case stat.Addition > 0: stats = s.style.Addition.Render("+" + strconv.Itoa(stat.Addition)) case stat.Deletion > 0: stats = s.style.Deletion.Render("-" + strconv.Itoa(stat.Deletion)) } // File count fileCount := s.style.FileCount.Render( strconv.Itoa(s.cursor+1) + "/" + strconv.Itoa(len(s.patches))) // Separator sep := s.style.Separator.Render("│") // Path display pathDisplay := patchName(p) fileCountWidth := lipgloss.Width(fileCount) statsWidth := lipgloss.Width(stats) fixedWidth := 1 + 1 + 3 + fileCountWidth + 2 // space + status + space + sep + space + count + padding availableForPathAndStats := width - fixedWidth showStats := availableForPathAndStats > statsWidth+10 var pathWidth int if showStats { pathWidth = availableForPathAndStats - statsWidth - 1 } else { pathWidth = availableForPathAndStats } pathWidth = max(pathWidth, 0) if pathWidth > 0 && lipgloss.Width(pathDisplay) > pathWidth { remove := lipgloss.Width(pathDisplay) - pathWidth + 1 pathDisplay = ansi.TruncateLeftWc(pathDisplay, remove, "…") } pathDisplay = s.style.PathDisplay.Render(pathDisplay) // Build left side var left string if showStats { left = fmt.Sprintf(" %s %s %s %s", status, sep, pathDisplay, stats) } else { left = fmt.Sprintf(" %s %s %s", status, sep, pathDisplay) } // Calculate spacing leftWidth := lipgloss.Width(left) rightWidth := lipgloss.Width(fileCount) spaceWidth := max(width-leftWidth-rightWidth, 0) return s.style.HeaderBg.Width(width).Render( left + strings.Repeat(" ", spaceWidth) + fileCount) } // statusStyle returns the style for a status character. func (s *DefaultStatusBar) statusStyle(status string) lipgloss.Style { switch status { case "A": return s.style.StatusAdded case "D": return s.style.StatusDeleted case "R": return s.style.StatusRenamed default: return s.style.StatusModified } } // patchName returns the display name for a patch. func patchName(p *diferenco.Patch) string { if p == nil { return "" } switch { case p.From == nil && p.To != nil: return p.To.Name case p.From != nil && p.To == nil: return p.From.Name case p.From != nil && p.To != nil && p.From.Name != p.To.Name: return p.From.Name + " → " + p.To.Name case p.To != nil: return p.To.Name case p.From != nil: return p.From.Name default: return "" } } // patchStatus returns the status character for a patch. func patchStatus(p *diferenco.Patch) string { if p == nil { return "M" } switch { case p.From == nil: return "A" case p.To == nil: return "D" case p.From != nil && p.To != nil && p.From.Name != p.To.Name: return "R" default: return "M" } } ================================================ FILE: modules/patchview/styles.go ================================================ package patchview import ( "fmt" "os" "charm.land/lipgloss/v2" "github.com/charmbracelet/x/exp/charmtone" ) const lineNumPadding = 1 // LineStyle defines the style for a single line. type LineStyle struct { LineNumber lipgloss.Style // Line number style Code lipgloss.Style // Code content style } // DiffViewStyle defines the complete style for DiffView. type DiffViewStyle struct { DividerLine LineStyle // Hunk divider line style (@@ -1,3 +1,4 @@) MissingLine LineStyle // Missing line style (used in Split view) EqualLine LineStyle // Unchanged line style InsertLine LineStyle // Inserted line style DeleteLine LineStyle // Deleted line style FileName lipgloss.Style // File name style FileMeta lipgloss.Style // File metadata style } // PatchViewStyle defines the visual style for the patch view. type PatchViewStyle struct { // File list styles Addition lipgloss.Style Deletion lipgloss.Style Selected lipgloss.Style // Diff view styles (using LineStyle for background fill) DiffStyle DiffViewStyle // UI styles HeaderBg lipgloss.Style FileCount lipgloss.Style Separator lipgloss.Style PathDisplay lipgloss.Style FilesTitle lipgloss.Style FooterBg lipgloss.Style // Status styles for header StatusAdded lipgloss.Style StatusDeleted lipgloss.Style StatusRenamed lipgloss.Style StatusModified lipgloss.Style } // DefaultDarkDiffViewStyle returns the dark theme style. func DefaultDarkDiffViewStyle() DiffViewStyle { setPadding := func(s lipgloss.Style) lipgloss.Style { return s.Padding(0, lineNumPadding).Align(lipgloss.Right) } return DiffViewStyle{ DividerLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(charmtone.Smoke). Background(charmtone.BBQ)), Code: lipgloss.NewStyle(). Foreground(charmtone.Smoke). Background(charmtone.BBQ), }, MissingLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Background(charmtone.BBQ)), Code: lipgloss.NewStyle(). Background(charmtone.BBQ), }, EqualLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(charmtone.Squid). Background(charmtone.Pepper)), Code: lipgloss.NewStyle(). Foreground(charmtone.Squid). Background(charmtone.Pepper), }, InsertLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(lipgloss.Color("#629657")). Background(lipgloss.Color("#2b322a"))), Code: lipgloss.NewStyle(). Background(lipgloss.Color("#323931")), }, DeleteLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(lipgloss.Color("#a45c59")). Background(lipgloss.Color("#312929"))), Code: lipgloss.NewStyle(). Background(lipgloss.Color("#383030")), }, FileName: lipgloss.NewStyle(). Bold(true). Foreground(lipgloss.Color("#79B8FF")), FileMeta: lipgloss.NewStyle(). Foreground(lipgloss.Color("#959DA5")), } } // DefaultLightDiffViewStyle returns the light theme style. // Color scheme based on One Light Pro (clear, bright, moderate contrast). func DefaultLightDiffViewStyle() DiffViewStyle { setPadding := func(s lipgloss.Style) lipgloss.Style { return s.Padding(0, lineNumPadding).Align(lipgloss.Right) } return DiffViewStyle{ DividerLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(lipgloss.Color("#696C77")). Background(lipgloss.Color("#E5E5E6"))), Code: lipgloss.NewStyle(). Foreground(lipgloss.Color("#696C77")). Background(lipgloss.Color("#E5E5E6")), }, MissingLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Background(lipgloss.Color("#F0F0F0"))), Code: lipgloss.NewStyle(). Background(lipgloss.Color("#F5F5F5")), }, EqualLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(lipgloss.Color("#9D9D9F")). Background(lipgloss.Color("#F0F0F0"))), Code: lipgloss.NewStyle(). Foreground(lipgloss.Color("#383A42")). Background(lipgloss.Color("#F5F5F5")), }, InsertLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(lipgloss.Color("#50A14F")). Background(lipgloss.Color("#E0F0E0"))), Code: lipgloss.NewStyle(). Foreground(lipgloss.Color("#383A42")). Background(lipgloss.Color("#D4EDD4")), }, DeleteLine: LineStyle{ LineNumber: setPadding(lipgloss.NewStyle(). Foreground(lipgloss.Color("#E45649")). Background(lipgloss.Color("#FAE8E6"))), Code: lipgloss.NewStyle(). Foreground(lipgloss.Color("#383A42")). Background(lipgloss.Color("#F5D4D1")), }, FileName: lipgloss.NewStyle(). Bold(true). Foreground(lipgloss.Color("#4078F2")), FileMeta: lipgloss.NewStyle(). Foreground(lipgloss.Color("#696C77")), } } // DefaultDiffViewStyle automatically selects theme based on terminal background. func DefaultDiffViewStyle() DiffViewStyle { if hasDarkBackground() { return DefaultDarkDiffViewStyle() } return DefaultLightDiffViewStyle() } // hasDarkBackground detects terminal background color. func hasDarkBackground() bool { return lipgloss.HasDarkBackground(os.Stdin, os.Stdout) } // DefaultStyle returns the default style with auto-detected theme. func DefaultStyle() PatchViewStyle { if hasDarkBackground() { return DefaultDarkStyle() } return DefaultLightStyle() } // DefaultDarkStyle returns the dark theme style. func DefaultDarkStyle() PatchViewStyle { return PatchViewStyle{ Addition: lipgloss.NewStyle().Foreground(lipgloss.Color("#85E89D")), Deletion: lipgloss.NewStyle().Foreground(lipgloss.Color("#F97583")), Selected: lipgloss.NewStyle().Background(lipgloss.Color("#282a38")), DiffStyle: DefaultDarkDiffViewStyle(), HeaderBg: lipgloss.NewStyle(), FileCount: lipgloss.NewStyle().Foreground(lipgloss.Color("8")), Separator: lipgloss.NewStyle().Foreground(lipgloss.Color("8")), PathDisplay: lipgloss.NewStyle().Foreground(lipgloss.Color("15")).Bold(true), FilesTitle: lipgloss.NewStyle().Foreground(lipgloss.Color("12")).Bold(true), FooterBg: lipgloss.NewStyle().Foreground(lipgloss.Color("7")).Padding(0, 1), StatusAdded: lipgloss.NewStyle().Foreground(lipgloss.Color("2")).Bold(true), StatusDeleted: lipgloss.NewStyle().Foreground(lipgloss.Color("1")).Bold(true), StatusRenamed: lipgloss.NewStyle().Foreground(lipgloss.Color("6")).Bold(true), StatusModified: lipgloss.NewStyle().Foreground(lipgloss.Color("3")).Bold(true), } } // DefaultLightStyle returns the light theme style. func DefaultLightStyle() PatchViewStyle { return PatchViewStyle{ Addition: lipgloss.NewStyle().Foreground(lipgloss.Color("#22863A")), Deletion: lipgloss.NewStyle().Foreground(lipgloss.Color("#CB2431")), Selected: lipgloss.NewStyle().Background(lipgloss.Color("#ebf1fc")), DiffStyle: DefaultLightDiffViewStyle(), HeaderBg: lipgloss.NewStyle(), FileCount: lipgloss.NewStyle().Foreground(lipgloss.Color("8")), Separator: lipgloss.NewStyle().Foreground(lipgloss.Color("8")), PathDisplay: lipgloss.NewStyle().Foreground(lipgloss.Color("0")).Bold(true), FilesTitle: lipgloss.NewStyle().Foreground(lipgloss.Color("4")).Bold(true), FooterBg: lipgloss.NewStyle().Foreground(lipgloss.Color("0")).Padding(0, 1), StatusAdded: lipgloss.NewStyle().Foreground(lipgloss.Color("2")).Bold(true), StatusDeleted: lipgloss.NewStyle().Foreground(lipgloss.Color("1")).Bold(true), StatusRenamed: lipgloss.NewStyle().Foreground(lipgloss.Color("6")).Bold(true), StatusModified: lipgloss.NewStyle().Foreground(lipgloss.Color("3")).Bold(true), } } // extractBgColor extracts background color hex value from lipgloss.Style. func extractBgColor(s lipgloss.Style) string { bg := s.GetBackground() if bg == nil { return "" } r, g, b, a := bg.RGBA() if a == 0 { return "" } return fmt.Sprintf("#%02x%02x%02x", r>>8, g>>8, b>>8) } ================================================ FILE: modules/patchview/view.go ================================================ package patchview import ( "fmt" "os" "strconv" "strings" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/item" "github.com/clipperhouse/displaywidth" ) const ( headerHeight = 1 footerHeight = 1 gapWidth = 1 borderSize = 2 titleHeight = 1 hScrollStep = 10 hScrollFastStep = 20 ) // PatchView is an interactive patch navigation view. type PatchView struct { patches []*diferenco.Patch cursor int renderer *PatchRenderer listVp *viewport.Model[*patchItem] statusBar StatusBar width int height int listWidthPct int focusRight bool yOffset int xOffset int style PatchViewStyle } // Ensure patchItem implements viewport.Object var _ viewport.Object = (*patchItem)(nil) // patchItem wraps a patch for the viewport. type patchItem struct { patch *diferenco.Patch selected bool width int style PatchViewStyle } func newPatchItem(p *diferenco.Patch, selected bool, width int, style PatchViewStyle) *patchItem { return &patchItem{patch: p, selected: selected, width: width, style: style} } func (p *patchItem) GetItem() item.Item { return item.NewItem(p.render()) } func (p *patchItem) render() string { path := patchName(p.patch) stat := p.patch.Stat() additions := stat.Addition deletions := stat.Deletion added := strconv.Itoa(additions) deleted := strconv.Itoa(deletions) var statsWidth int switch { case additions > 0 && deletions > 0: statsWidth = len(added) + 3 + len(deleted) case additions != 0: statsWidth = len(added) + 1 case deletions != 0: statsWidth = len(deleted) + 1 } reserved := 2 + statsWidth + 1 availableForPath := max(p.width-reserved, 0) var line strings.Builder if p.selected { line.WriteString(p.style.Selected.Render("▌ ")) if availableForPath > 0 { if displaywidth.String(path) > availableForPath { line.WriteString(p.style.Selected.Render(truncatePath(path, availableForPath))) } else { line.WriteString(p.style.Selected.Render(path)) } } line.WriteString(p.style.Selected.Render(" ")) switch { case additions > 0 && deletions > 0: addStyle := p.style.Selected.Foreground(p.style.Addition.GetForeground()) delStyle := p.style.Selected.Foreground(p.style.Deletion.GetForeground()) line.WriteString(addStyle.Render("+" + added)) line.WriteString(p.style.Selected.Render(" ")) line.WriteString(delStyle.Render("-" + deleted)) case additions != 0: addStyle := p.style.Selected.Foreground(p.style.Addition.GetForeground()) line.WriteString(addStyle.Render("+" + added)) case deletions != 0: delStyle := p.style.Selected.Foreground(p.style.Deletion.GetForeground()) line.WriteString(delStyle.Render("-" + deleted)) } } else { line.WriteString(" ") if availableForPath > 0 { if displaywidth.String(path) > availableForPath { line.WriteString(truncatePath(path, availableForPath)) } else { line.WriteString(path) } } line.WriteString(" ") switch { case additions > 0 && deletions > 0: line.WriteString(p.style.Addition.Render("+" + added + " ")) line.WriteString(p.style.Deletion.Render("-" + deleted)) case additions != 0: line.WriteString(p.style.Addition.Render("+" + added)) case deletions != 0: line.WriteString(p.style.Deletion.Render("-" + deleted)) } } return line.String() } // Option configures the patch view. type Option func(*PatchView) // WithStyle sets a custom style. func WithStyle(style PatchViewStyle) Option { return func(pv *PatchView) { pv.style = style } } // WithListWidth sets the file list width percentage (default 20). func WithListWidth(pct int) Option { return func(pv *PatchView) { pv.listWidthPct = pct } } // WithStatusBar sets a custom status bar. func WithStatusBar(sb StatusBar) Option { return func(pv *PatchView) { pv.statusBar = sb } } // Run starts the interactive patch navigation view. func Run(patches []*diferenco.Patch, opts ...Option) error { if len(patches) == 0 { return nil } pv := NewPatchView(patches, opts...) p := tea.NewProgram(pv, tea.WithOutput(os.Stdout)) _, err := p.Run() return err } // NewPatchView creates a new PatchView. func NewPatchView(patches []*diferenco.Patch, opts ...Option) *PatchView { pv := &PatchView{ patches: patches, renderer: NewPatchRenderer(), listVp: viewport.New(0, 0, viewport.WithSelectionEnabled[*patchItem](true)), listWidthPct: 20, style: DefaultStyle(), } for _, opt := range opts { opt(pv) } // Set up default status bar if not provided if pv.statusBar == nil { pv.statusBar = NewDefaultStatusBar() } // Apply style to components pv.renderer.SetStyle(pv.style) if sb, ok := pv.statusBar.(interface{ SetStyle(PatchViewStyle) }); ok { sb.SetStyle(pv.style) } if sb, ok := pv.statusBar.(PatchesSetter); ok { sb.SetPatches(patches) } return pv } func (pv *PatchView) Init() tea.Cmd { return nil } func (pv *PatchView) Update(msg tea.Msg) (tea.Model, tea.Cmd) { switch msg := msg.(type) { case tea.WindowSizeMsg: pv.width = msg.Width pv.height = msg.Height pv.setupLayout() return pv, nil case tea.KeyPressMsg: switch msg.String() { case "q", "ctrl+c": return pv, tea.Quit case "n": if pv.cursor < len(pv.patches)-1 { pv.selectFile(pv.cursor + 1) } return pv, nil case "p": if pv.cursor > 0 { pv.selectFile(pv.cursor - 1) } return pv, nil case "tab": pv.focusRight = !pv.focusRight return pv, nil case "left": if pv.focusRight { pv.focusRight = false } return pv, nil case "right": if !pv.focusRight { pv.focusRight = true } return pv, nil } // Right panel focus: handle diff scrolling if pv.focusRight { switch msg.String() { case "j", "down": pv.yOffset++ pv.clampYOffset() case "k", "up": pv.yOffset-- pv.clampYOffset() case "h": pv.xOffset = max(0, pv.xOffset-hScrollStep) case "l": pv.xOffset += hScrollStep case "ctrl+h", "ctrl+left": pv.xOffset = max(0, pv.xOffset-hScrollFastStep) case "ctrl+l", "ctrl+right": pv.xOffset += hScrollFastStep case "ctrl+d": pv.yOffset += pv.diffViewportHeight() / 2 pv.clampYOffset() case "ctrl+u": pv.yOffset -= pv.diffViewportHeight() / 2 pv.clampYOffset() case "g", "home": pv.yOffset = 0 case "G", "end": pv.yOffset = pv.renderer.TotalLines() - pv.diffViewportHeight() pv.clampYOffset() case "]": pv.jumpToNextHunk() case "[": pv.jumpToPrevHunk() } return pv, nil } // Left panel focus: 'l' switches to right panel if msg.String() == "l" { pv.focusRight = true return pv, nil } // Forward to list viewport vp, cmd := pv.listVp.Update(msg) pv.listVp = vp newCursor := pv.listVp.GetSelectedItemIdx() if newCursor != pv.cursor && newCursor >= 0 && newCursor < len(pv.patches) { pv.cursor = newCursor pv.renderer.SetPatch(pv.patches[newCursor]) pv.yOffset = 0 pv.xOffset = 0 if sb, ok := pv.statusBar.(CursorSetter); ok { sb.SetCursor(newCursor) } pv.updateFileListSelection() } return pv, cmd } return pv, nil } func (pv *PatchView) View() tea.View { if pv.width <= 0 || pv.height <= 0 { return tea.NewView("") } header := pv.renderHeader() fileList := pv.renderFileList() gap := " " diffContent := pv.renderDiffContent() footer := pv.renderFooter() mainContent := lipgloss.JoinHorizontal(lipgloss.Top, fileList, gap, diffContent) fullView := lipgloss.JoinVertical(lipgloss.Left, header, mainContent, footer) view := tea.NewView(fullView) view.AltScreen = true return view } // Layout calculations func (pv *PatchView) headerHeight() int { if pv.statusBar != nil { return pv.statusBar.Height() } return headerHeight } func (pv *PatchView) listPaneHeight() int { return max(pv.height-pv.headerHeight()-footerHeight, 0) } func (pv *PatchView) listContentHeight() int { return max(pv.listPaneHeight()-borderSize-titleHeight, 1) } func (pv *PatchView) listWidth() int { return max(pv.width*pv.listWidthPct/100, 1) } func (pv *PatchView) diffPaneWidth() int { return max(pv.width-pv.listWidth()-gapWidth, 0) } func (pv *PatchView) diffPaneHeight() int { return max(pv.height-pv.headerHeight()-footerHeight, 0) } func (pv *PatchView) diffViewportWidth() int { return max(pv.diffPaneWidth()-borderSize, 0) } func (pv *PatchView) diffViewportHeight() int { return max(pv.diffPaneHeight()-borderSize-titleHeight, 0) } // Actions func (pv *PatchView) selectFile(idx int) { if len(pv.patches) == 0 { return } idx = max(0, min(idx, len(pv.patches)-1)) if idx == pv.cursor { return } pv.cursor = idx pv.renderer.SetPatch(pv.patches[idx]) pv.yOffset = 0 pv.xOffset = 0 if sb, ok := pv.statusBar.(CursorSetter); ok { sb.SetCursor(idx) } } func (pv *PatchView) setupLayout() { listWidth := pv.listWidth() - borderSize listHeight := pv.listContentHeight() if listWidth > 0 && listHeight > 0 { pv.listVp.SetWidth(listWidth) pv.listVp.SetHeight(listHeight) pv.updateFileList() } vpWidth := pv.diffViewportWidth() vpHeight := pv.diffViewportHeight() pv.renderer.SetSize(vpWidth, vpHeight) if len(pv.patches) > 0 && pv.renderer.patch == nil { pv.renderer.SetPatch(pv.patches[pv.cursor]) } } func (pv *PatchView) updateFileList() { if len(pv.patches) == 0 { pv.listVp.SetObjects(nil) return } width := pv.listVp.GetWidth() items := make([]*patchItem, len(pv.patches)) for i, p := range pv.patches { items[i] = newPatchItem(p, i == pv.cursor, width, pv.style) } pv.listVp.SetObjects(items) pv.listVp.SetSelectedItemIdx(pv.cursor) } func (pv *PatchView) updateFileListSelection() { if len(pv.patches) == 0 { return } width := pv.listVp.GetWidth() items := make([]*patchItem, len(pv.patches)) for i, p := range pv.patches { items[i] = newPatchItem(p, i == pv.cursor, width, pv.style) } pv.listVp.SetObjects(items) } func (pv *PatchView) clampYOffset() { maxY := max(0, pv.renderer.TotalLines()-pv.diffViewportHeight()) pv.yOffset = max(0, min(pv.yOffset, maxY)) } func (pv *PatchView) jumpToNextHunk() { offsets := pv.renderer.HunkOffsets() for _, off := range offsets { if off > pv.yOffset { pv.yOffset = off pv.clampYOffset() return } } } func (pv *PatchView) jumpToPrevHunk() { offsets := pv.renderer.HunkOffsets() for i := len(offsets) - 1; i >= 0; i-- { if offsets[i] < pv.yOffset { pv.yOffset = offsets[i] pv.clampYOffset() return } } } // Rendering func (pv *PatchView) renderHeader() string { if pv.statusBar != nil { return pv.statusBar.View(pv.width) } return pv.style.HeaderBg.Width(pv.width).Render(" No changes") } func (pv *PatchView) renderFileList() string { listHeight := pv.listPaneHeight() borderColor := lipgloss.Color("8") if !pv.focusRight { borderColor = lipgloss.Color("12") } listStyle := lipgloss.NewStyle(). Width(pv.listWidth()). Height(listHeight). Border(lipgloss.RoundedBorder()). BorderForeground(borderColor) if len(pv.patches) == 0 { return listStyle.Render(" No changes") } title := pv.style.FilesTitle.Render(" Files ") content := pv.listVp.View() return listStyle.Render(title + "\n" + content) } func (pv *PatchView) renderDiffContent() string { paneWidth := pv.diffPaneWidth() paneHeight := pv.diffPaneHeight() borderColor := lipgloss.Color("8") if pv.focusRight { borderColor = lipgloss.Color("12") } diffStyle := lipgloss.NewStyle(). Width(paneWidth). Height(paneHeight). Border(lipgloss.RoundedBorder()). BorderForeground(borderColor) if len(pv.patches) > 0 { pv.renderer.SetYOffset(pv.yOffset) pv.renderer.SetXOffset(pv.xOffset) content := pv.renderer.Render() pctText := "" total := pv.renderer.TotalLines() if total > 0 { vpH := pv.diffViewportHeight() pct := min(100, (pv.yOffset+vpH)*100/max(total, 1)) pctText = fmt.Sprintf(" (%d%%)", pct) } title := pv.style.FilesTitle.Render(fmt.Sprintf(" Diff%s ", pctText)) return diffStyle.Render(title + "\n" + content) } return diffStyle.Render(" No diff content") } func (pv *PatchView) renderFooter() string { var scrollInfo string total := pv.renderer.TotalLines() if total > 0 { vpH := pv.diffViewportHeight() pct := min(100, (pv.yOffset+vpH)*100/max(total, 1)) scrollInfo = fmt.Sprintf("Lines: %d-%d/%d (%d%%)", pv.yOffset+1, min(pv.yOffset+vpH, total), total, pct) if pv.xOffset > 0 { scrollInfo += fmt.Sprintf(" Col: %d+", pv.xOffset) } } var keys string if pv.focusRight { keys = "j/k:scroll h/l:hscroll [/]:hunk g/G:top/bottom tab:files n/p:file q:quit" } else { keys = "j/k:navigate l/→:diff tab:diff n/p:file q:quit" } leftWidth := lipgloss.Width(scrollInfo) rightWidth := lipgloss.Width(keys) spaceWidth := max(pv.width-leftWidth-rightWidth-2, 0) content := scrollInfo + " " + strings.Repeat(" ", spaceWidth) + keys return pv.style.FooterBg.Width(pv.width).Render(content) } // truncatePath truncates a path from the left to fit within maxWidth. func truncatePath(path string, maxWidth int) string { if maxWidth <= 0 { return "" } if displaywidth.String(path) <= maxWidth { return path } if maxWidth == 1 { return "…" } target := maxWidth - 1 runes := []rune(path) width := 0 cut := len(runes) for i := len(runes) - 1; i >= 0; i-- { w := displaywidth.Rune(runes[i]) if width+w > target { break } width += w cut = i } return "…" + string(runes[cut:]) } ================================================ FILE: modules/plumbing/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2018 Sourced Technologies, S.L. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: modules/plumbing/error.go ================================================ package plumbing import ( "errors" "fmt" ) var ( //ErrStop is used to stop a ForEach function in an Iter ErrStop = errors.New("stop iter") ) // noSuchObject is an error type that occurs when no object with a given object // ID is available. type noSuchObject struct { oid Hash } // Error implements the error.Error() function. func (e *noSuchObject) Error() string { return fmt.Sprintf("zeta: no such object: %s", e.oid) } // NoSuchObject creates a new error representing a missing object with a given // object ID. func NoSuchObject(oid Hash) error { return &noSuchObject{oid: oid} } // IsNoSuchObject indicates whether an error is a noSuchObject and is non-nil. func IsNoSuchObject(e error) bool { var err *noSuchObject return errors.As(e, &err) } func AsNoSuchObjectErr(e error) (Hash, bool) { if e, ok := errors.AsType[*noSuchObject](e); ok { return e.oid, true } return ZeroHash, false } type ErrResourceLocked struct { name ReferenceName t string } func (err *ErrResourceLocked) Error() string { return fmt.Sprintf("%s '%s' locked", err.t, err.name) } func IsErrResourceLocked(err error) bool { var e *ErrResourceLocked return errors.As(err, &e) } func NewErrResourceLocked(t string, name ReferenceName) error { return &ErrResourceLocked{t: t, name: name} } type ErrRevNotFound struct { Reason string } func (e *ErrRevNotFound) Error() string { return e.Reason } func NewErrRevNotFound(format string, a ...any) error { return &ErrRevNotFound{Reason: fmt.Sprintf(format, a...)} } func IsErrRevNotFound(err error) bool { var e *ErrRevNotFound return errors.As(err, &e) } ================================================ FILE: modules/plumbing/filemode/filemode.go ================================================ package filemode import ( "encoding/binary" "errors" "fmt" "os" "strconv" "strings" "github.com/antgroup/hugescm/modules/strengthen" ) // A FileMode represents the kind of tree entries used by git. It // resembles regular file systems modes, although FileModes are // considerably simpler (there are not so many), and there are some, // like Submodule that has no file system equivalent. type FileMode uint32 const ( // Empty is used as the FileMode of tree elements when comparing // trees in the following situations: // // - the mode of tree elements before their creation. - the mode of // tree elements after their deletion. - the mode of unmerged // elements when checking the index. // // Empty has no file system equivalent. As Empty is the zero value // of FileMode, it is also returned by New and // NewFromOsNewFromOSFileMode along with an error, when they fail. Empty FileMode = 0 // Dir represent a Directory. Dir FileMode = 0040000 // Regular represent non-executable files. Please note this is not // the same as golang regular files, which include executable files. Regular FileMode = 0100644 // Deprecated represent non-executable files with the group writable // bit set. This mode was supported by the first versions of git, // but it has been deprecated nowadays. This library uses them // internally, so you can read old packfiles, but will treat them as // Regulars when interfacing with the outside world. This is the // standard git behavior. Deprecated FileMode = 0100664 // Executable represents executable files. Executable FileMode = 0100755 // Symlink represents symbolic links to files. Symlink FileMode = 0120000 // Submodule represents git submodules. This mode has no file system // equivalent. Submodule FileMode = 0160000 // Fragmentation of large files Fragments FileMode = 0400000 ) // New takes the octal string representation of a FileMode and returns // the FileMode and a nil error. If the string can not be parsed to a // 32 bit unsigned octal number, it returns Empty and the parsing error. // // Example: "40000" means Dir, "100644" means Regular. // // Please note this function does not check if the returned FileMode // is valid in git or if it is malformed. For instance, "1" will // return the malformed FileMode(1) and a nil error. func New(s string) (FileMode, error) { n, err := strconv.ParseUint(s, 8, 32) if err != nil { return Empty, err } return FileMode(n), nil } // NewFromOS returns the FileMode used by git to represent // the provided file system modes and a nil error on success. If the // file system mode cannot be mapped to any valid git mode (as with // sockets or named pipes), it will return Empty and an error. // // Note that some git modes cannot be generated from os.FileModes, like // Deprecated and Submodule; while Empty will be returned, along with an // error, only when the method fails. func NewFromOS(m os.FileMode) (FileMode, error) { if m.IsRegular() { if isSetTemporary(m) { return Empty, fmt.Errorf("no equivalent git mode for %s", m) } if isSetCharDevice(m) { return Empty, fmt.Errorf("no equivalent git mode for %s", m) } if isSetUserExecutable(m) { return Executable, nil } return Regular, nil } if m.IsDir() { return Dir, nil } if isSetSymLink(m) { return Symlink, nil } return Empty, fmt.Errorf("no equivalent git mode for %s", m) } func isSetCharDevice(m os.FileMode) bool { return m&os.ModeCharDevice != 0 } func isSetTemporary(m os.FileMode) bool { return m&os.ModeTemporary != 0 } func isSetUserExecutable(m os.FileMode) bool { return m&0100 != 0 } func isSetSymLink(m os.FileMode) bool { return m&os.ModeSymlink != 0 } func (m FileMode) Origin() FileMode { return m &^ Fragments } // Bytes return a slice of 4 bytes with the mode in little endian // encoding. func (m FileMode) Bytes() []byte { ret := make([]byte, 4) binary.LittleEndian.PutUint32(ret, uint32(m)) return ret } // IsMalformed returns if the FileMode should not appear in a git packfile, // this is: Empty and any other mode not mentioned as a constant in this // package. func (m FileMode) IsMalformed() bool { originMode := m &^ Fragments return originMode != Dir && originMode != Regular && originMode != Deprecated && originMode != Executable && originMode != Symlink && originMode != Submodule } func (m FileMode) IsFragments() bool { return m&Fragments != 0 } // String returns the FileMode as a string in the standard git format, // this is, an octal number padded with ceros to 7 digits. Malformed // modes are printed in that same format, for easier debugging. // // Example: Regular is "0100644", Empty is "0000000". func (m FileMode) String() string { return fmt.Sprintf("%07o", uint32(m)) } // IsRegular returns if the FileMode represents that of a regular file, // this is, either Regular or Deprecated. Please note that Executable // are not regular even though in the UNIX tradition, they usually are: // See the IsFile method. func (m FileMode) IsRegular() bool { originMode := m &^ Fragments return originMode == Regular || originMode == Deprecated } // IsFile returns if the FileMode represents that of a file, this is, // Regular, Deprecated, Executable or Link. func (m FileMode) IsFile() bool { originMode := m &^ Fragments return originMode == Regular || originMode == Deprecated || originMode == Executable || originMode == Symlink } func (m FileMode) Unmask() FileMode { return m &^ Fragments } type ErrMalformedMode struct { m FileMode } func (e *ErrMalformedMode) Error() string { return fmt.Sprintf("malformed mode (%s)", e.m) } func IsErrMalformedMode(err error) bool { var e *ErrMalformedMode return errors.As(err, &e) } // ToOSFileMode returns the os.FileMode to be used when creating file // system elements with the given git mode and a nil error on success. // // When the provided mode cannot be mapped to a valid file system mode // (e.g. Submodule) it returns os.FileMode(0) and an error. // // The returned file mode does not take into account the umask. func (m FileMode) ToOSFileMode() (os.FileMode, error) { originMode := m &^ Fragments switch originMode { case Dir: return os.ModePerm | os.ModeDir, nil case Submodule: return os.ModePerm | os.ModeDir, nil case Regular: return os.FileMode(0644), nil // Deprecated is no longer allowed: treated as a Regular instead case Deprecated: return os.FileMode(0644), nil case Executable: return os.FileMode(0755), nil case Symlink: return os.ModePerm | os.ModeSymlink, nil } return os.FileMode(0), &ErrMalformedMode{m: m} } func (m FileMode) MarshalJSON() ([]byte, error) { return strengthen.BufferCat("\"", m.String(), "\""), nil } func (m *FileMode) UnmarshalJSON(b []byte) error { s := string(b) v, err := strconv.ParseInt(strings.TrimSuffix(strings.TrimPrefix(s, "\""), "\""), 8, 64) if err != nil { return err } *m = FileMode(v) return nil } ================================================ FILE: modules/plumbing/filemode/filemode_test.go ================================================ package filemode import ( "encoding/json" "fmt" "io" "os" "strings" "testing" ) func TestFragments(t *testing.T) { mode := Executable | Fragments fmt.Fprintf(os.Stderr, "mode: %o\n", mode) if mode&Executable != 0 { fmt.Fprintf(os.Stderr, "Execute: %o\n", mode) } if mode&Regular != 0 { fmt.Fprintf(os.Stderr, "mode: %o\n", mode) } fmt.Fprintf(os.Stderr, "mode: %o: %o\n", mode^Fragments, Fragments^0170000) } func TestFragments2(t *testing.T) { ms := []FileMode{ Regular, Regular | Fragments, Executable, Executable | Fragments, Dir, Dir | Fragments, Symlink, Symlink | Fragments, Submodule, Submodule | Fragments, } for _, m := range ms { om, err := m.ToOSFileMode() if err != nil { fmt.Fprintf(os.Stderr, "bad filemode: %v\n", err) return } fmt.Fprintf(os.Stderr, "%s --> %s\n", m, om) } } func TestFileModeJSON(t *testing.T) { type J struct { A FileMode `json:"a"` } j := &J{ A: Executable, } var s strings.Builder _ = json.NewEncoder(io.MultiWriter(&s, os.Stderr)).Encode(j) var j2 J if err := json.NewDecoder(strings.NewReader(s.String())).Decode(&j2); err != nil { return } fmt.Fprintf(os.Stderr, "III: %s\n", j2.A) } ================================================ FILE: modules/plumbing/format/ignore/dir.go ================================================ package ignore import ( "bufio" "os" "path/filepath" "strings" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/vfs" ) const ( commentPrefix = "#" zetaDir = ".zeta" gitignoreFile = ".gitignore" zetaignoreFile = ".zetaignore" infoExcludeFile = zetaDir + "/info/exclude" ) // readIgnoreFile reads a specific git ignore file. func readIgnoreFile(fs vfs.VFS, path []string, ignoreFile string) (ps []Pattern, err error) { ignoreFile = strengthen.ExpandPath(ignoreFile) f, err := os.Open(fs.Join(append(path, ignoreFile)...)) if err == nil { defer f.Close() // nolint scanner := bufio.NewScanner(f) for scanner.Scan() { s := scanner.Text() if !strings.HasPrefix(s, commentPrefix) && len(strings.TrimSpace(s)) > 0 { ps = append(ps, ParsePattern(s, path)) } } } if !os.IsNotExist(err) { return nil, err } return } // ReadPatterns reads the .zeta/info/exclude and then the zetaignore patterns // recursively traversing through the directory structure. The result is in // the ascending order of priority (last higher). func ReadPatterns(fs vfs.VFS, path []string) (ps []Pattern, err error) { ps, _ = readIgnoreFile(fs, path, infoExcludeFile) subps, _ := readIgnoreFile(fs, path, zetaignoreFile) ps = append(ps, subps...) subps, _ = readIgnoreFile(fs, path, gitignoreFile) ps = append(ps, subps...) dirs, err := fs.ReadDir(filepath.Join(path...)) if err != nil { return } for _, d := range dirs { if d.IsDir() && d.Name() != zetaDir { if NewMatcher(ps).Match(append(path, d.Name()), true) { continue } var subps []Pattern subps, err = ReadPatterns(fs, append(path, d.Name())) if err != nil { return } if len(subps) > 0 { ps = append(ps, subps...) } } } return } ================================================ FILE: modules/plumbing/format/ignore/doc.go ================================================ // Package gitignore implements matching file system paths to gitignore patterns that // can be automatically read from a git repository tree in the order of definition // priorities. It support all pattern formats as specified in the original gitignore // documentation, copied below: // // Pattern format // ============== // // - A blank line matches no files, so it can serve as a separator for readability. // // - A line starting with # serves as a comment. Put a backslash ("\") in front of // the first hash for patterns that begin with a hash. // // - Trailing spaces are ignored unless they are quoted with backslash ("\"). // // - An optional prefix "!" which negates the pattern; any matching file excluded // by a previous pattern will become included again. It is not possible to // re-include a file if a parent directory of that file is excluded. // Git doesn’t list excluded directories for performance reasons, so // any patterns on contained files have no effect, no matter where they are // defined. Put a backslash ("\") in front of the first "!" for patterns // that begin with a literal "!", for example, "\!important!.txt". // // - If the pattern ends with a slash, it is removed for the purpose of the // following description, but it would only find a match with a directory. // In other words, foo/ will match a directory foo and paths underneath it, // but will not match a regular file or a symbolic link foo (this is consistent // with the way how pathspec works in general in Git). // // - If the pattern does not contain a slash /, Git treats it as a shell glob // pattern and checks for a match against the pathname relative to the location // of the .gitignore file (relative to the toplevel of the work tree if not // from a .gitignore file). // // - Otherwise, Git treats the pattern as a shell glob suitable for consumption // by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will // not match a / in the pathname. For example, "Documentation/*.html" matches // "Documentation/git.html" but not "Documentation/ppc/ppc.html" or // "tools/perf/Documentation/perf.html". // // - A leading slash matches the beginning of the pathname. For example, // "/*.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c". // // Two consecutive asterisks ("**") in patterns matched against full pathname // may have special meaning: // // - A leading "**" followed by a slash means match in all directories. // For example, "**/foo" matches file or directory "foo" anywhere, the same as // pattern "foo". "**/foo/bar" matches file or directory "bar" // anywhere that is directly under directory "foo". // // - A trailing "/**" matches everything inside. For example, "abc/**" matches // all files inside directory "abc", relative to the location of the // .gitignore file, with infinite depth. // // - A slash followed by two consecutive asterisks then a slash matches // zero or more directories. For example, "a/**/b" matches "a/b", "a/x/b", // "a/x/y/b" and so on. // // - Other consecutive asterisks are considered invalid. // // Copyright and license // ===================== // // Copyright (c) Oleg Sklyar, Silvertern and source{d} // // The package code was donated to source{d} to include, modify and develop // further as a part of the `go-git` project, release it on the license of // the whole project or delete it from the project. package ignore ================================================ FILE: modules/plumbing/format/ignore/ignore_test.go ================================================ package ignore import ( "fmt" "os" "testing" ) func TestMatch(t *testing.T) { p := ParsePattern("**/*lue/vol?ano", nil) r := p.Match([]string{"head", "value", "volcano", "tail"}, false) fmt.Fprintf(os.Stderr, "%v\n", r) } ================================================ FILE: modules/plumbing/format/ignore/matcher.go ================================================ package ignore // Matcher defines a global multi-pattern matcher for gitignore patterns type Matcher interface { // Match matches patterns in the order of priorities. As soon as an inclusion or // exclusion is found, not further matching is performed. Match(path []string, isDir bool) bool } // NewMatcher constructs a new global matcher. Patterns must be given in the order of // increasing priority. That is most generic settings files first, then the content of // the repo .gitignore, then content of .gitignore down the path or the repo and then // the content command line arguments. func NewMatcher(ps []Pattern) Matcher { return &matcher{ps} } type matcher struct { patterns []Pattern } func (m *matcher) Match(path []string, isDir bool) bool { n := len(m.patterns) for i := n - 1; i >= 0; i-- { if match := m.patterns[i].Match(path, isDir); match > NoMatch { return match == Exclude } } return false } ================================================ FILE: modules/plumbing/format/ignore/pattern.go ================================================ package ignore import ( "path/filepath" "slices" "strings" ) // MatchResult defines outcomes of a match, no match, exclusion or inclusion. type MatchResult int const ( // NoMatch defines the no match outcome of a match check NoMatch MatchResult = iota // Exclude defines an exclusion of a file as a result of a match check Exclude // Include defines an explicit inclusion of a file as a result of a match check Include ) const ( inclusionPrefix = "!" zeroToManyDirs = "**" patternDirSep = "/" ) // Pattern defines a single gitignore pattern. type Pattern interface { // Match matches the given path to the pattern. Match(path []string, isDir bool) MatchResult } type pattern struct { domain []string pattern []string inclusion bool dirOnly bool isGlob bool } // ParsePattern parses a gitignore pattern string into the Pattern structure. func ParsePattern(p string, domain []string) Pattern { // storing domain, copy it to ensure it isn't changed externally domain = slices.Clone(domain) res := pattern{domain: domain} if strings.HasPrefix(p, inclusionPrefix) { res.inclusion = true p = p[1:] } if !strings.HasSuffix(p, "\\ ") { p = strings.TrimRight(p, " ") } if strings.HasSuffix(p, patternDirSep) { res.dirOnly = true p = p[:len(p)-1] } if strings.Contains(p, patternDirSep) { res.isGlob = true } res.pattern = strings.Split(p, patternDirSep) return &res } func (p *pattern) Match(path []string, isDir bool) MatchResult { if len(path) <= len(p.domain) { return NoMatch } for i, e := range p.domain { if path[i] != e { return NoMatch } } path = path[len(p.domain):] if p.isGlob { if !p.globMatch(path, isDir) { return NoMatch } } else { if !p.simpleNameMatch(path, isDir) { return NoMatch } } if p.inclusion { return Include } return Exclude } func (p *pattern) simpleNameMatch(path []string, isDir bool) bool { for i, name := range path { match, err := filepath.Match(p.pattern[0], name) if err != nil { return false } if !match { continue } if p.dirOnly && !isDir && i == len(path)-1 { return false } return true } return false } func (p *pattern) globMatch(path []string, isDir bool) bool { matched := false canTraverse := false for i, pattern := range p.pattern { if pattern == "" { canTraverse = false continue } if pattern == zeroToManyDirs { if i == len(p.pattern)-1 { break } canTraverse = true continue } if strings.Contains(pattern, zeroToManyDirs) { return false } if len(path) == 0 { return false } if canTraverse { canTraverse = false for len(path) > 0 { e := path[0] path = path[1:] match, err := filepath.Match(pattern, e) if err != nil { return false } if match { matched = true break } if len(path) == 0 { // if nothing left then fail matched = false } } continue } if match, err := filepath.Match(pattern, path[0]); err != nil || !match { return false } matched = true path = path[1:] // files matching dir globs, don't match if len(path) == 0 && i < len(p.pattern)-1 { matched = false } } if matched && p.dirOnly && !isDir && len(path) == 0 { matched = false } return matched } ================================================ FILE: modules/plumbing/format/index/decoder.go ================================================ package index import ( "bufio" "bytes" "errors" "hash" "io" "strconv" "time" "github.com/antgroup/hugescm/modules/binary" "github.com/antgroup/hugescm/modules/plumbing" "github.com/zeebo/blake3" ) var ( // DecodeVersionSupported is the range of supported index versions DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4} // ErrMalformedSignature is returned by Decode when the index header file is // malformed ErrMalformedSignature = errors.New("malformed index signature file") // ErrInvalidChecksum is returned by Decode if the SHA1 hash mismatch with // the read content ErrInvalidChecksum = errors.New("invalid checksum") // ErrUnknownExtension is returned when an index extension is encountered that is considered mandatory ErrUnknownExtension = errors.New("unknown extension") ) const ( entryHeaderLength = 62 entryExtended = 0x4000 entryValid = 0x8000 nameMask = 0xfff intentToAddMask = 1 << 13 skipWorkTreeMask = 1 << 14 ) // A Decoder reads and decodes index files from an input stream. type Decoder struct { buf *bufio.Reader r io.Reader hash hash.Hash lastEntry *Entry extReader *bufio.Reader } // NewDecoder returns a new decoder that reads from r. func NewDecoder(r io.Reader) *Decoder { h := blake3.New() buf := bufio.NewReader(r) return &Decoder{ buf: buf, r: io.TeeReader(buf, h), hash: h, extReader: bufio.NewReader(nil), } } // Decode reads the whole index object from its input and stores it in the // value pointed to by idx. func (d *Decoder) Decode(idx *Index) error { var err error idx.Version, err = validateHeader(d.r) if err != nil { return err } entryCount, err := binary.ReadUint32(d.r) if err != nil { return err } if err := d.readEntries(idx, int(entryCount)); err != nil { return err } return d.readExtensions(idx) } func (d *Decoder) readEntries(idx *Index, count int) error { for range count { e, err := d.readEntry(idx) if err != nil { return err } d.lastEntry = e idx.Entries = append(idx.Entries, e) } return nil } func (d *Decoder) readEntry(idx *Index) (*Entry, error) { e := &Entry{} var msec, mnsec, sec, nsec uint32 var flags uint16 flow := []any{ &sec, &nsec, &msec, &mnsec, &e.Dev, &e.Inode, &e.Mode, &e.UID, &e.GID, &e.Size, &e.Hash, &flags, } if err := binary.Read(d.r, flow...); err != nil { return nil, err } read := entryHeaderLength if sec != 0 || nsec != 0 { e.CreatedAt = time.Unix(int64(sec), int64(nsec)) } if msec != 0 || mnsec != 0 { e.ModifiedAt = time.Unix(int64(msec), int64(mnsec)) } e.Stage = Stage(flags>>12) & 0x3 if flags&entryExtended != 0 { extended, err := binary.ReadUint16(d.r) if err != nil { return nil, err } read += 2 e.IntentToAdd = extended&intentToAddMask != 0 e.SkipWorktree = extended&skipWorkTreeMask != 0 } if err := d.readEntryName(idx, e, flags); err != nil { return nil, err } return e, d.padEntry(idx, e, read) } func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error { var name string var err error switch idx.Version { case 2, 3: nameLen := flags & nameMask name, err = d.doReadEntryName(nameLen) case 4: name, err = d.doReadEntryNameV4() default: return ErrUnsupportedVersion } if err != nil { return err } e.Name = name return nil } func (d *Decoder) doReadEntryNameV4() (string, error) { l, err := binary.ReadVariableWidthInt(d.r) if err != nil { return "", err } var base string if d.lastEntry != nil { base = d.lastEntry.Name[:len(d.lastEntry.Name)-int(l)] } name, err := binary.ReadUntil(d.r, '\x00') if err != nil { return "", err } return base + string(name), nil } func (d *Decoder) doReadEntryName(nameLen uint16) (string, error) { name := make([]byte, nameLen) _, err := io.ReadFull(d.r, name) return string(name), err } // Index entries are padded out to the next 8 byte alignment // for historical reasons related to how C Git read the files. func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error { if idx.Version == 4 { return nil } entrySize := read + len(e.Name) padLen := 8 - entrySize%8 _, err := io.CopyN(io.Discard, d.r, int64(padLen)) return err } func (d *Decoder) readExtensions(idx *Index) error { // TODO: support 'Split index' and 'Untracked cache' extensions, take in // count that they are not supported by jgit or libgit var expected []byte var peeked []byte var err error // we should always be able to peek for 4 bytes (header) + 4 bytes (extlen) + final hash // if this fails, we know that we're at the end of the index peekLen := 4 + 4 + d.hash.Size() for { expected = d.hash.Sum(nil) peeked, err = d.buf.Peek(peekLen) if len(peeked) < peekLen { // there can't be an extension at this point, so let's bail out //err = nil break } if err != nil { return err } err = d.readExtension(idx) if err != nil { return err } } return d.readChecksum(expected) } func (d *Decoder) readExtension(idx *Index) error { var header [4]byte if _, err := io.ReadFull(d.r, header[:]); err != nil { return err } r, err := d.getExtensionReader() if err != nil { return err } switch { case bytes.Equal(header[:], treeExtSignature): idx.Cache = &Tree{} d := &treeExtensionDecoder{r} if err := d.Decode(idx.Cache); err != nil { return err } case bytes.Equal(header[:], resolveUndoExtSignature): idx.ResolveUndo = &ResolveUndo{} d := &resolveUndoDecoder{r} if err := d.Decode(idx.ResolveUndo); err != nil { return err } case bytes.Equal(header[:], endOfIndexEntryExtSignature): idx.EndOfIndexEntry = &EndOfIndexEntry{} d := &endOfIndexEntryDecoder{r} if err := d.Decode(idx.EndOfIndexEntry); err != nil { return err } default: // See https://git-scm.com/docs/index-format, which says: // If the first byte is 'A'..'Z' the extension is optional and can be ignored. if header[0] < 'A' || header[0] > 'Z' { return ErrUnknownExtension } d := &unknownExtensionDecoder{r} if err := d.Decode(); err != nil { return err } } return nil } func (d *Decoder) getExtensionReader() (*bufio.Reader, error) { extLen, err := binary.ReadUint32(d.r) if err != nil { return nil, err } d.extReader.Reset(&io.LimitedReader{R: d.r, N: int64(extLen)}) return d.extReader, nil } func (d *Decoder) readChecksum(expected []byte) error { var h plumbing.Hash if _, err := io.ReadFull(d.r, h[:]); err != nil { return err } if !bytes.Equal(h[:], expected) { return ErrInvalidChecksum } return nil } func validateHeader(r io.Reader) (version uint32, err error) { var s = make([]byte, 4) if _, err := io.ReadFull(r, s); err != nil { return 0, err } if !bytes.Equal(s, indexSignature) { return 0, ErrMalformedSignature } version, err = binary.ReadUint32(r) if err != nil { return 0, err } if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max { return 0, ErrUnsupportedVersion } return } type treeExtensionDecoder struct { r *bufio.Reader } func (d *treeExtensionDecoder) Decode(t *Tree) error { for { e, err := d.readEntry() if err != nil { if errors.Is(err, io.EOF) { return nil } return err } if e == nil { continue } t.Entries = append(t.Entries, *e) } } func (d *treeExtensionDecoder) readEntry() (*TreeEntry, error) { e := &TreeEntry{} path, err := binary.ReadUntil(d.r, '\x00') if err != nil { return nil, err } e.Path = string(path) count, err := binary.ReadUntil(d.r, ' ') if err != nil { return nil, err } i, err := strconv.Atoi(string(count)) if err != nil { return nil, err } // An entry can be in an invalidated state and is represented by having a // negative number in the entry_count field. if i == -1 { return nil, nil } e.Entries = i trees, err := binary.ReadUntil(d.r, '\n') if err != nil { return nil, err } i, err = strconv.Atoi(string(trees)) if err != nil { return nil, err } e.Trees = i _, err = io.ReadFull(d.r, e.Hash[:]) if err != nil { return nil, err } return e, nil } type resolveUndoDecoder struct { r *bufio.Reader } func (d *resolveUndoDecoder) Decode(ru *ResolveUndo) error { for { e, err := d.readEntry() if err != nil { if errors.Is(err, io.EOF) { return nil } return err } ru.Entries = append(ru.Entries, *e) } } func (d *resolveUndoDecoder) readEntry() (*ResolveUndoEntry, error) { e := &ResolveUndoEntry{ Stages: make(map[Stage]plumbing.Hash), } path, err := binary.ReadUntil(d.r, '\x00') if err != nil { return nil, err } e.Path = string(path) for i := range 3 { if err := d.readStage(e, Stage(i+1)); err != nil { return nil, err } } for s := range e.Stages { var hash plumbing.Hash if _, err := io.ReadFull(d.r, hash[:]); err != nil { return nil, err } e.Stages[s] = hash } return e, nil } func (d *resolveUndoDecoder) readStage(e *ResolveUndoEntry, s Stage) error { ascii, err := binary.ReadUntil(d.r, '\x00') if err != nil { return err } stage, err := strconv.ParseInt(string(ascii), 8, 64) if err != nil { return err } if stage != 0 { e.Stages[s] = plumbing.ZeroHash } return nil } type endOfIndexEntryDecoder struct { r *bufio.Reader } func (d *endOfIndexEntryDecoder) Decode(e *EndOfIndexEntry) error { var err error e.Offset, err = binary.ReadUint32(d.r) if err != nil { return err } _, err = io.ReadFull(d.r, e.Hash[:]) return err } type unknownExtensionDecoder struct { r *bufio.Reader } func (d *unknownExtensionDecoder) Decode() error { var buf [1024]byte for { _, err := d.r.Read(buf[:]) if errors.Is(err, io.EOF) { break } if err != nil { return err } } return nil } ================================================ FILE: modules/plumbing/format/index/decoder_test.go ================================================ package index import ( "fmt" "os" "testing" ) func TestDecode(t *testing.T) { fd, err := os.Open("/private/tmp/k3/.zeta/index") if err != nil { fmt.Fprintf(os.Stderr, "open index error: %v\n", err) return } defer fd.Close() // nolint d := NewDecoder(fd) idx := &Index{} if err := d.Decode(idx); err != nil { fmt.Fprintf(os.Stderr, "decode index error: %v\n", err) return } for _, e := range idx.Entries { fmt.Fprintf(os.Stderr, "%v %s\n", e.SkipWorktree, e.Name) } } func TestDecodeSkip(t *testing.T) { fd, err := os.Open("/private/tmp/k4/.zeta/index") if err != nil { fmt.Fprintf(os.Stderr, "open index error: %v\n", err) return } defer fd.Close() // nolint d := NewDecoder(fd) idx := &Index{} if err := d.Decode(idx); err != nil { fmt.Fprintf(os.Stderr, "decode index error: %v\n", err) return } checkout := 0 for _, e := range idx.Entries { if e.SkipWorktree { continue } checkout++ } fmt.Fprintf(os.Stderr, "%v total: %d\n", checkout, len(idx.Entries)) } func TestDecode2(t *testing.T) { fd, err := os.Open("/private/tmp/xh5/.zeta/index") if err != nil { fmt.Fprintf(os.Stderr, "open index error: %v\n", err) return } defer fd.Close() // nolint d := NewDecoder(fd) idx := &Index{} if err := d.Decode(idx); err != nil { fmt.Fprintf(os.Stderr, "decode index error: %v\n", err) return } for _, e := range idx.Entries { if e.Name != "go.pkg" { continue } fmt.Fprintf(os.Stderr, "%v %s\n", e.String(), e.Mode) } } func TestIndexGlob(t *testing.T) { fd, err := os.Open("/private/tmp/k4/.zeta/index") if err != nil { fmt.Fprintf(os.Stderr, "open index error: %v\n", err) return } defer fd.Close() // nolint d := NewDecoder(fd) idx := &Index{} if err := d.Decode(idx); err != nil { fmt.Fprintf(os.Stderr, "decode index error: %v\n", err) return } patterns := []string{ "sigma", "sigma/", "s*", "sigma/*", } for _, p := range patterns { eee, err := idx.Glob(p) if err != nil { fmt.Fprintf(os.Stderr, "glob error: %v\n", err) continue } for _, e := range eee { fmt.Fprintf(os.Stderr, "%s: %s\n", p, e.Name) } } } ================================================ FILE: modules/plumbing/format/index/doc.go ================================================ // Package index implements encoding and decoding of index format files. // // Zeta index format // ================ // // == The Zeta index file has the following format (Refer to the Git Index format) // // All binary numbers are in network byte order. Version 2 is described // here unless stated otherwise. // // - A 12-byte header consisting of // // 4-byte signature: // The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") // // 4-byte version number: // The current supported versions are 2, 3 and 4. // // 32-bit number of index entries. // // - A number of sorted index entries (see below). // // - Extensions // // Extensions are identified by signature. Optional extensions can // be ignored if Zeta does not understand them. // // Zeta currently supports cached tree and resolve undo extensions. // // 4-byte extension signature. If the first byte is 'A'..'Z' the // extension is optional and can be ignored. // // 32-bit size of the extension // // Extension data // // - 256-bit BLAKE3 over the content of the index file before this // checksum. // // == Index entry // // Index entries are sorted in ascending order on the name field, // interpreted as a string of unsigned bytes (i.e. memcmp() order, no // localization, no special casing of directory separator '/'). Entries // with the same name are sorted by their stage field. // // 32-bit ctime seconds, the last time a file's metadata changed // this is stat(2) data // // 32-bit ctime nanosecond fractions // this is stat(2) data // // 32-bit mtime seconds, the last time a file's data changed // this is stat(2) data // // 32-bit mtime nanosecond fractions // this is stat(2) data // // 32-bit dev // this is stat(2) data // // 32-bit ino // this is stat(2) data // // 32-bit mode, split into (high to low bits) // // 4-bit object type // valid values in binary are 1000 (regular file), 1010 (symbolic link) // and 1110 (gitlink) // // 3-bit unused // // 9-bit unix permission. Only 0755 and 0644 are valid for regular files. // Symbolic links and gitlinks have value 0 in this field. // // 32-bit uid // this is stat(2) data // // 32-bit gid // this is stat(2) data // // 32-bit file size // This is the on-disk size from stat(2), truncated to 32-bit. // // 256-bit BLAKE3 for the represented object // // A 16-bit 'flags' field split into (high to low bits) // // 1-bit assume-valid flag // // 1-bit extended flag (must be zero in version 2) // // 2-bit stage (during merge) // // 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF // is stored in this field. // // (Version 3 or later) A 16-bit field, only applicable if the // "extended flag" above is 1, split into (high to low bits). // // 1-bit reserved for future // // 1-bit skip-worktree flag (used by sparse checkout) // // 1-bit intent-to-add flag (used by "zeta add -N") // // 13-bit unused, must be zero // // Entry path name (variable length) relative to top level directory // (without leading slash). '/' is used as path separator. The special // path components ".", ".." and ".zeta" (without quotes) are disallowed. // Trailing slash is also disallowed. // // The exact encoding is undefined, but the '.' and '/' characters // are encoded in 7-bit ASCII and the encoding cannot contain a NUL // byte (iow, this is a UNIX pathname). // // (Version 4) In version 4, the entry path name is prefix-compressed // relative to the path name for the previous entry (the very first // entry is encoded as if the path name for the previous entry is an // empty string). At the beginning of an entry, an integer N in the // variable width encoding (the same encoding as the offset is encoded // for OFS_DELTA pack entries; see pack-format.txt) is stored, followed // by a NUL-terminated string S. Removing N bytes from the end of the // path name for the previous entry, and replacing it with the string S // yields the path name for this entry. // // 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes // while keeping the name NUL-terminated. // // (Version 4) In version 4, the padding after the pathname does not // exist. // // Interpretation of index entries in split index mode is completely // different. See below for details. // // == Extensions // // === Cached tree // // Cached tree extension contains pre-computed hashes for trees that can // be derived from the index. It helps speed up tree object generation // from index for a new commit. // // When a path is updated in index, the path must be invalidated and // removed from tree cache. // // The signature for this extension is { 'T', 'R', 'E', 'E' }. // // A series of entries fill the entire extension; each of which // consists of: // // - NUL-terminated path component (relative to its parent directory); // // - ASCII decimal number of entries in the index that is covered by the // tree this entry represents (entry_count); // // - A space (ASCII 32); // // - ASCII decimal number that represents the number of subtrees this // tree has; // // - A newline (ASCII 10); and // // - 256-bit object name for the object that would result from writing // this span of index as a tree. // // An entry can be in an invalidated state and is represented by having // a negative number in the entry_count field. In this case, there is no // object name and the next entry starts immediately after the newline. // When writing an invalid entry, -1 should always be used as entry_count. // // The entries are written out in the top-down, depth-first order. The // first entry represents the root level of the repository, followed by the // first subtree--let's call this A--of the root level (with its name // relative to the root level), followed by the first subtree of A (with // its name relative to A), ... // // === Resolve undo // // A conflict is represented in the index as a set of higher stage entries. // When a conflict is resolved (e.g. with "zeta add path"), these higher // stage entries will be removed and a stage-0 entry with proper resolution // is added. // // When these higher stage entries are removed, they are saved in the // resolve undo extension, so that conflicts can be recreated (e.g. with // "zeta checkout -m"), in case users want to redo a conflict resolution // from scratch. // // The signature for this extension is { 'R', 'E', 'U', 'C' }. // // A series of entries fill the entire extension; each of which // consists of: // // - NUL-terminated pathname the entry describes (relative to the root of // the repository, i.e. full pathname); // // - Three NUL-terminated ASCII octal numbers, entry mode of entries in // stage 1 to 3 (a missing stage is represented by "0" in this field); // and // // - At most three 256-bit object names of the entry in stages from 1 to 3 // (nothing is written for a missing stage). // // === Split index // // In split index mode, the majority of index entries could be stored // in a separate file. This extension records the changes to be made on // top of that to produce the final index. // // The signature for this extension is { 'l', 'i', 'n', 'k' }. // // The extension consists of: // // - 256-bit BLAKE3 of the shared index file. The shared index file path // is $GIT_DIR/sharedindex.. If all 160 bits are zero, the // index does not require a shared index file. // // - An ewah-encoded delete bitmap, each bit represents an entry in the // shared index. If a bit is set, its corresponding entry in the // shared index will be removed from the final index. Note, because // a delete operation changes index entry positions, but we do need // original positions in replace phase, it's best to just mark // entries for removal, then do a mass deletion after replacement. // // - An ewah-encoded replace bitmap, each bit represents an entry in // the shared index. If a bit is set, its corresponding entry in the // shared index will be replaced with an entry in this index // file. All replaced entries are stored in sorted order in this // index. The first "1" bit in the replace bitmap corresponds to the // first index entry, the second "1" bit to the second entry and so // on. Replaced entries may have empty path names to save space. // // The remaining index entries after replaced ones will be added to the // final index. These added entries are also sorted by entry name then // stage. // // == Untracked cache // // Untracked cache saves the untracked file list and necessary data to // verify the cache. The signature for this extension is { 'U', 'N', // 'T', 'R' }. // // The extension starts with // // - A sequence of NUL-terminated strings, preceded by the size of the // sequence in variable width encoding. Each string describes the // environment where the cache can be used. // // - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from // ctime field until "file size". // // - Stat data of plumbing.excludesfile // // - 32-bit dir_flags (see struct dir_struct) // // - 256-bit BLAKE3 of $GIT_DIR/info/exclude. Null BLAKE3 means the file // does not exist. // // - 256-bit BLAKE3 of plumbing.excludesfile. Null BLAKE3 means the file does // not exist. // // - NUL-terminated string of per-dir exclude file name. This usually // is ".gitignore/.zetaignore". // // - The number of following directory blocks, variable width // encoding. If this number is zero, the extension ends here with a // following NUL. // // - A number of directory blocks in depth-first-search order, each // consists of // // - The number of untracked entries, variable width encoding. // // - The number of sub-directory blocks, variable width encoding. // // - The directory name terminated by NUL. // // - A number of untracked file/dir names terminated by NUL. // // The remaining data of each directory block is grouped by type: // // - An ewah bitmap, the n-th bit marks whether the n-th directory has // valid untracked cache entries. // // - An ewah bitmap, the n-th bit records "check-only" bit of // read_directory_recursive() for the n-th directory. // // - An ewah bitmap, the n-th bit indicates whether BLAKE3 and stat data // is valid for the n-th directory and exists in the next data. // // - An array of stat data. The n-th data corresponds with the n-th // "one" bit in the previous ewah bitmap. // // - An array of BLAKE3. The n-th BLAKE3 corresponds with the n-th "one" bit // in the previous ewah bitmap. // // - One NUL. // // == File System Monitor cache // // The file system monitor cache tracks files for which the core.fsmonitor // hook has told us about changes. The signature for this extension is // { 'F', 'S', 'M', 'N' }. // // The extension starts with // // - 32-bit version number: the current supported version is 1. // // - 64-bit time: the extension data reflects all changes through the given // time which is stored as the nanoseconds elapsed since midnight, // January 1, 1970. // // - 32-bit bitmap size: the size of the CE_FSMONITOR_VALID bitmap. // // - An ewah bitmap, the n-th bit indicates whether the n-th index entry // is not CE_FSMONITOR_VALID. // // == End of Index Entry // // The End of Index Entry (EOIE) is used to locate the end of the variable // length index entries and the beginning of the extensions. Code can take // advantage of this to quickly locate the index extensions without having // to parse through all of the index entries. // // Because it must be able to be loaded before the variable length cache // entries and other index extensions, this extension must be written last. // The signature for this extension is { 'E', 'O', 'I', 'E' }. // // The extension consists of: // // - 32-bit offset to the end of the index entries // // - 256-bit BLAKE3 over the extension types and their sizes (but not // their contents). E.g. if we have "TREE" extension that is N-bytes // long, "REUC" extension that is M-bytes long, followed by "EOIE", // then the hash would be: // // BLAKE3("TREE" + + // "REUC" + ) // // == Index Entry Offset Table // // The Index Entry Offset Table (IEOT) is used to help address the CPU // cost of loading the index by enabling multi-threading the process of // converting cache entries from the on-disk format to the in-memory format. // The signature for this extension is { 'I', 'E', 'O', 'T' }. // // The extension consists of: // // - 32-bit version (currently 1) // // - A number of index offset entries each consisting of: // // - 32-bit offset from the beginning of the file to the first cache entry // in this block of entries. // // - 32-bit count of cache entries in this blockpackage index package index ================================================ FILE: modules/plumbing/format/index/encoder.go ================================================ package index import ( "bytes" "errors" "fmt" "io" "path" "sort" "strings" "time" "hash" "github.com/antgroup/hugescm/modules/binary" "github.com/zeebo/blake3" ) const ( // EncodeVersionSupported is the range of supported index versions EncodeVersionSupported uint32 = 4 ) var ( // ErrInvalidTimestamp is returned by Encode if a Index with a Entry with // negative timestamp values ErrInvalidTimestamp = errors.New("negative timestamps are not allowed") ) // An Encoder writes an Index to an output stream. type Encoder struct { w io.Writer hash hash.Hash lastEntry *Entry } // NewEncoder returns a new encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { h := blake3.New() mw := io.MultiWriter(w, h) return &Encoder{mw, h, nil} } // Encode writes the Index to the stream of the encoder. func (e *Encoder) Encode(idx *Index) error { return e.encode(idx, true) } func (e *Encoder) encode(idx *Index, footer bool) error { // TODO: support extensions if idx.Version > EncodeVersionSupported { return ErrUnsupportedVersion } if err := e.encodeHeader(idx); err != nil { return err } if err := e.encodeEntries(idx); err != nil { return err } if footer { return e.encodeFooter() } return nil } func (e *Encoder) encodeHeader(idx *Index) error { return binary.Write(e.w, indexSignature, idx.Version, uint32(len(idx.Entries)), ) } func (e *Encoder) encodeEntries(idx *Index) error { sort.Sort(byName(idx.Entries)) for _, entry := range idx.Entries { if err := e.encodeEntry(idx, entry); err != nil { return err } entryLength := entryHeaderLength if entry.IntentToAdd || entry.SkipWorktree { entryLength += 2 } wrote := entryLength + len(entry.Name) if err := e.padEntry(idx, wrote); err != nil { return err } } return nil } func (e *Encoder) encodeEntry(idx *Index, entry *Entry) error { sec, nsec, err := e.timeToUint32(&entry.CreatedAt) if err != nil { return err } msec, mnsec, err := e.timeToUint32(&entry.ModifiedAt) if err != nil { return err } flags := uint16(entry.Stage&0x3) << 12 if l := len(entry.Name); l < nameMask { flags |= uint16(l) } else { flags |= nameMask } flow := []any{ sec, nsec, msec, mnsec, entry.Dev, entry.Inode, entry.Mode, entry.UID, entry.GID, entry.Size, entry.Hash[:], } flagsFlow := []any{flags} if entry.IntentToAdd || entry.SkipWorktree { var extendedFlags uint16 if entry.IntentToAdd { extendedFlags |= intentToAddMask } if entry.SkipWorktree { extendedFlags |= skipWorkTreeMask } flagsFlow = []any{flags | entryExtended, extendedFlags} } flow = append(flow, flagsFlow...) if err := binary.Write(e.w, flow...); err != nil { return err } switch idx.Version { case 2, 3: err = e.encodeEntryName(entry) case 4: err = e.encodeEntryNameV4(entry) default: err = ErrUnsupportedVersion } return err } func (e *Encoder) encodeEntryName(entry *Entry) error { return binary.Write(e.w, []byte(entry.Name)) } func (e *Encoder) encodeEntryNameV4(entry *Entry) error { name := entry.Name l := 0 if e.lastEntry != nil { dir := path.Dir(e.lastEntry.Name) + "/" if strings.HasPrefix(entry.Name, dir) { l = len(e.lastEntry.Name) - len(dir) name = strings.TrimPrefix(entry.Name, dir) } else { l = len(e.lastEntry.Name) } } e.lastEntry = entry err := binary.WriteVariableWidthInt(e.w, int64(l)) if err != nil { return err } return binary.Write(e.w, []byte(name+string('\x00'))) } func (e *Encoder) EncodeRawExtension(signature string, data []byte) error { if len(signature) != 4 { return fmt.Errorf("invalid signature length") } _, err := e.w.Write([]byte(signature)) if err != nil { return err } err = binary.WriteUint32(e.w, uint32(len(data))) if err != nil { return err } _, err = e.w.Write(data) if err != nil { return err } return nil } func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) { if t.IsZero() { return 0, 0, nil } if t.Unix() < 0 || t.UnixNano() < 0 { return 0, 0, ErrInvalidTimestamp } return uint32(t.Unix()), uint32(t.Nanosecond()), nil } func (e *Encoder) padEntry(idx *Index, wrote int) error { if idx.Version == 4 { return nil } padLen := 8 - wrote%8 _, err := e.w.Write(bytes.Repeat([]byte{'\x00'}, padLen)) return err } func (e *Encoder) encodeFooter() error { return binary.Write(e.w, e.hash.Sum(nil)) } type byName []*Entry func (l byName) Len() int { return len(l) } func (l byName) Swap(i, j int) { l[i], l[j] = l[j], l[i] } func (l byName) Less(i, j int) bool { return l[i].Name < l[j].Name } ================================================ FILE: modules/plumbing/format/index/encoder_test.go ================================================ package index import ( "bytes" "fmt" "os" "strings" "testing" "time" "github.com/antgroup/hugescm/modules/plumbing" ) func TestIndex(t *testing.T) { fd, err := os.Create("/tmp/abc.index") if err != nil { return } defer fd.Close() // nolint treeEntries := make([]TreeEntry, 0, 100) e := NewEncoder(fd) _ = e.Encode(&Index{ Version: EncodeVersionSupported, Cache: &Tree{ Entries: treeEntries, }, }) } func TestEncodeV4(t *testing.T) { idx := &Index{ Version: 4, Entries: []*Entry{{ CreatedAt: time.Now(), ModifiedAt: time.Now(), Dev: 4242, Inode: 424242, UID: 84, GID: 8484, Size: 42, Stage: TheirMode, Hash: plumbing.NewHash("e25b29c8946e0e192fae2edc1dabf7be71e8ecf3"), Name: "foo", }, { CreatedAt: time.Now(), ModifiedAt: time.Now(), Name: "bar", Size: 82, }, { CreatedAt: time.Now(), ModifiedAt: time.Now(), Name: strings.Repeat(" ", 20), Size: 82, }, { CreatedAt: time.Now(), ModifiedAt: time.Now(), Name: "baz/bar", Size: 82, }, { CreatedAt: time.Now(), ModifiedAt: time.Now(), Name: "baz/bar/bar", Size: 82, }}, } buf := bytes.NewBuffer(nil) e := NewEncoder(buf) if err := e.Encode(idx); err != nil { fmt.Fprintf(os.Stderr, "error %v\n", err) return } output := &Index{} d := NewDecoder(buf) if err := d.Decode(output); err != nil { fmt.Fprintf(os.Stderr, "%v\n", err) return } for _, e := range output.Entries { fmt.Fprintf(os.Stderr, "%s\n", e.Name) } } ================================================ FILE: modules/plumbing/format/index/index.go ================================================ package index import ( "bytes" "errors" "fmt" "path/filepath" "strings" "time" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" ) var ( // ErrUnsupportedVersion is returned by Decode when the index file version // is not supported. ErrUnsupportedVersion = errors.New("unsupported version") // ErrEntryNotFound is returned by Index.Entry, if an entry is not found. ErrEntryNotFound = errors.New("entry not found") indexSignature = []byte{'D', 'I', 'R', 'C'} treeExtSignature = []byte{'T', 'R', 'E', 'E'} resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'} endOfIndexEntryExtSignature = []byte{'E', 'O', 'I', 'E'} ) // Stage during merge type Stage int const ( // Merged is the default stage, fully merged Merged Stage = 1 // AncestorMode is the base revision AncestorMode Stage = 1 // OurMode is the first tree revision, ours OurMode Stage = 2 // TheirMode is the second tree revision, theirs TheirMode Stage = 3 ) // Index contains the information about which objects are currently checked out // in the worktree, having information about the working files. Changes in // worktree are detected using this Index. The Index is also used during merges type Index struct { // Version is index version Version uint32 // Entries collection of entries represented by this Index. The order of // this collection is not guaranteed Entries []*Entry // Cache represents the 'Cached tree' extension Cache *Tree // ResolveUndo represents the 'Resolve undo' extension ResolveUndo *ResolveUndo // EndOfIndexEntry represents the 'End of Index Entry' extension EndOfIndexEntry *EndOfIndexEntry } // Add creates a new Entry and returns it. The caller should first check that // another entry with the same path does not exist. func (i *Index) Add(path string) *Entry { e := &Entry{ Name: filepath.ToSlash(path), } i.Entries = append(i.Entries, e) return e } func (i *Index) Rename(source, destination string, prefix bool) error { if prefix { source = filepath.ToSlash(source) + "/" destination = filepath.ToSlash(destination) + "/" for _, e := range i.Entries { if suffix, ok := strings.CutPrefix(e.Name, source); ok { e.Name = destination + suffix } } return nil } source = filepath.ToSlash(source) destination = filepath.ToSlash(destination) for _, e := range i.Entries { if e.Name == source { e.Name = destination return nil } } return ErrEntryNotFound } // Entry returns the entry that match the given path, if any. func (i *Index) Entry(path string) (*Entry, error) { path = filepath.ToSlash(path) for _, e := range i.Entries { if e.Name == path { return e, nil } } return nil, ErrEntryNotFound } // Remove remove the entry that match the give path and returns deleted entry. func (i *Index) Remove(path string) (*Entry, error) { path = filepath.ToSlash(path) for index, e := range i.Entries { if e.Name == path { i.Entries = append(i.Entries[:index], i.Entries[index+1:]...) return e, nil } } return nil, ErrEntryNotFound } // Glob returns the all entries matching pattern or nil if there is no matching // entry. The syntax of patterns is the same as in filepath.Glob. func (i *Index) Glob(pattern string) (matches []*Entry, err error) { pattern = filepath.ToSlash(pattern) for _, e := range i.Entries { m, err := match(pattern, e.Name) if err != nil { return nil, err } if m { matches = append(matches, e) } } return } // String is equivalent to `git ls-files --stage --debug` func (i *Index) String() string { buf := bytes.NewBuffer(nil) for _, e := range i.Entries { buf.WriteString(e.String()) } return buf.String() } // Entry represents a single file (or stage of a file) in the cache. An entry // represents exactly one stage of a file. If a file path is unmerged then // multiple Entry instances may appear for the same path name. type Entry struct { // Hash is the BLAKE3 of the represented file Hash plumbing.Hash // Name is the Entry path name relative to top level directory Name string // CreatedAt time when the tracked path was created CreatedAt time.Time // ModifiedAt time when the tracked path was changed ModifiedAt time.Time // Dev and Inode of the tracked path Dev, Inode uint32 // Mode of the path Mode filemode.FileMode // UID and GID, userid and group id of the owner UID, GID uint32 // Size is the length in bytes for regular files Size uint64 // Stage on a merge is defines what stage is representing this entry // https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging Stage Stage // SkipWorktree used in sparse checkouts // https://git-scm.com/docs/git-read-tree#_sparse_checkout SkipWorktree bool // IntentToAdd record only the fact that the path will be added later // https://git-scm.com/docs/git-add ("git add -N") IntentToAdd bool } func (e Entry) String() string { buf := bytes.NewBuffer(nil) fmt.Fprintf(buf, "%06o %s %d\t%s\n", e.Mode, e.Hash, e.Stage, e.Name) fmt.Fprintf(buf, " ctime: %d:%d\n", e.CreatedAt.Unix(), e.CreatedAt.Nanosecond()) fmt.Fprintf(buf, " mtime: %d:%d\n", e.ModifiedAt.Unix(), e.ModifiedAt.Nanosecond()) fmt.Fprintf(buf, " dev: %d\tino: %d\n", e.Dev, e.Inode) fmt.Fprintf(buf, " uid: %d\tgid: %d\n", e.UID, e.GID) fmt.Fprintf(buf, " size: %d\tflags: %x\n", e.Size, 0) return buf.String() } // Tree contains pre-computed hashes for trees that can be derived from the // index. It helps speed up tree object generation from index for a new commit. type Tree struct { Entries []TreeEntry } // TreeEntry entry of a cached Tree type TreeEntry struct { // Path component (relative to its parent directory) Path string // Entries is the number of entries in the index that is covered by the tree // this entry represents. Entries int // Trees is the number that represents the number of subtrees this tree has Trees int // Hash object name for the object that would result from writing this span // of index as a tree. Hash plumbing.Hash } // ResolveUndo is used when a conflict is resolved (e.g. with "git add path"), // these higher stage entries are removed and a stage-0 entry with proper // resolution is added. When these higher stage entries are removed, they are // saved in the resolve undo extension. type ResolveUndo struct { Entries []ResolveUndoEntry } // ResolveUndoEntry contains the information about a conflict when is resolved type ResolveUndoEntry struct { Path string Stages map[Stage]plumbing.Hash } // EndOfIndexEntry is the End of Index Entry (EOIE) is used to locate the end of // the variable length index entries and the beginning of the extensions. Code // can take advantage of this to quickly locate the index extensions without // having to parse through all of the index entries. // // Because it must be able to be loaded before the variable length cache // entries and other index extensions, this extension must be written last. type EndOfIndexEntry struct { // Offset to the end of the index entries Offset uint32 // Hash is a SHA-1 over the extension types and their sizes (but not // their contents). Hash plumbing.Hash } ================================================ FILE: modules/plumbing/format/index/match.go ================================================ package index import ( "path/filepath" "runtime" "unicode/utf8" ) // match is filepath.Match with support to match fullpath and not only filenames // code from: // https://github.com/golang/go/blob/39852bf4cce6927e01d0136c7843f65a801738cb/src/path/filepath/match.go#L44-L224 func match(pattern, name string) (matched bool, err error) { Pattern: for len(pattern) > 0 { var star bool var chunk string star, chunk, pattern = scanChunk(pattern) // Look for match at current position. t, ok, err := matchChunk(chunk, name) // if we're the last chunk, make sure we've exhausted the name // otherwise we'll give a false result even if we could still match // using the star if ok && (len(t) == 0 || len(pattern) > 0) { name = t continue } if err != nil { return false, err } if star { // Look for match skipping i+1 bytes. // Cannot skip /. for i := 0; i < len(name); i++ { t, ok, err := matchChunk(chunk, name[i+1:]) if ok { // if we're the last chunk, make sure we exhausted the name if len(pattern) == 0 && len(t) > 0 { continue } name = t continue Pattern } if err != nil { return false, err } } } return false, nil } return len(name) == 0, nil } // scanChunk gets the next segment of pattern, which is a non-star string // possibly preceded by a star. func scanChunk(pattern string) (star bool, chunk, rest string) { for len(pattern) > 0 && pattern[0] == '*' { pattern = pattern[1:] star = true } inrange := false var i int Scan: for i = 0; i < len(pattern); i++ { switch pattern[i] { case '\\': if runtime.GOOS != "windows" { // error check handled in matchChunk: bad pattern. if i+1 < len(pattern) { i++ } } case '[': inrange = true case ']': inrange = false case '*': if !inrange { break Scan } } } return star, pattern[0:i], pattern[i:] } // matchChunk checks whether chunk matches the beginning of s. // If so, it returns the remainder of s (after the match). // Chunk is all single-character operators: literals, char classes, and ?. func matchChunk(chunk, s string) (rest string, ok bool, err error) { for len(chunk) > 0 { if len(s) == 0 { return } switch chunk[0] { case '[': // character class r, n := utf8.DecodeRuneInString(s) s = s[n:] chunk = chunk[1:] // We can't end right after '[', we're expecting at least // a closing bracket and possibly a caret. if len(chunk) == 0 { err = filepath.ErrBadPattern return } // possibly negated negated := chunk[0] == '^' if negated { chunk = chunk[1:] } // parse all ranges match := false nrange := 0 for { if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { chunk = chunk[1:] break } var lo, hi rune if lo, chunk, err = getEsc(chunk); err != nil { return } hi = lo if chunk[0] == '-' { if hi, chunk, err = getEsc(chunk[1:]); err != nil { return } } if lo <= r && r <= hi { match = true } nrange++ } if match == negated { return } case '?': _, n := utf8.DecodeRuneInString(s) s = s[n:] chunk = chunk[1:] case '\\': if runtime.GOOS != "windows" { chunk = chunk[1:] if len(chunk) == 0 { err = filepath.ErrBadPattern return } } fallthrough default: if chunk[0] != s[0] { return } s = s[1:] chunk = chunk[1:] } } return s, true, nil } // getEsc gets a possibly-escaped character from chunk, for a character class. func getEsc(chunk string) (r rune, nchunk string, err error) { if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { err = filepath.ErrBadPattern return } if chunk[0] == '\\' && runtime.GOOS != "windows" { chunk = chunk[1:] if len(chunk) == 0 { err = filepath.ErrBadPattern return } } r, n := utf8.DecodeRuneInString(chunk) if r == utf8.RuneError && n == 1 { err = filepath.ErrBadPattern } nchunk = chunk[n:] if len(nchunk) == 0 { err = filepath.ErrBadPattern } return } ================================================ FILE: modules/plumbing/format/pktline/encoder.go ================================================ // Package pktline implements reading payloads form pkt-lines and encoding // pkt-lines from payloads. package pktline import ( "bytes" "errors" "fmt" "io" ) // An Encoder writes pkt-lines to an output stream. type Encoder struct { w io.Writer } const ( // MaxPayloadSize is the maximum payload size of a pkt-line in bytes. MaxPayloadSize = 65516 // For compatibility with canonical Git implementation, accept longer pkt-lines OversizePayloadMax = 65520 ) var ( // FlushPkt are the contents of a flush-pkt pkt-line. FlushPkt = []byte{'0', '0', '0', '0'} // Flush is the payload to use with the Encode method to encode a flush-pkt. Flush = []byte{} // FlushString is the payload to use with the EncodeString method to encode a flush-pkt. FlushString = "" // ErrPayloadTooLong is returned by the Encode methods when any of the // provided payloads is bigger than MaxPayloadSize. ErrPayloadTooLong = errors.New("payload is too long") ) // NewEncoder returns a new encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { return &Encoder{ w: w, } } // Flush encodes a flush-pkt to the output stream. func (e *Encoder) Flush() error { _, err := e.w.Write(FlushPkt) return err } // Encode encodes a pkt-line with the payload specified and write it to // the output stream. If several payloads are specified, each of them // will get streamed in their own pkt-lines. func (e *Encoder) Encode(payloads ...[]byte) error { for _, p := range payloads { if err := e.encodeLine(p); err != nil { return err } } return nil } func (e *Encoder) encodeLine(p []byte) error { if len(p) > MaxPayloadSize { return ErrPayloadTooLong } if bytes.Equal(p, Flush) { return e.Flush() } n := len(p) + 4 if _, err := e.w.Write(asciiHex16(n)); err != nil { return err } _, err := e.w.Write(p) return err } const ( hexChar = "0123456789abcdef" ) // Returns the hexadecimal ascii representation of the 16 less // significant bits of n. The length of the returned slice will always // be 4. Example: if n is 1234 (0x4d2), the return value will be // []byte{'0', '4', 'd', '2'}. func asciiHex16(n int) []byte { var ret [4]byte byteToASCIIHex := func(n byte) byte { return hexChar[n&15] } ret[0] = byteToASCIIHex(byte(n & 0xf000 >> 12)) ret[1] = byteToASCIIHex(byte(n & 0x0f00 >> 8)) ret[2] = byteToASCIIHex(byte(n & 0x00f0 >> 4)) ret[3] = byteToASCIIHex(byte(n & 0x000f)) return ret[:] } // EncodeString works similarly as Encode but payloads are specified as strings. func (e *Encoder) EncodeString(payloads ...string) error { for _, p := range payloads { if err := e.Encode([]byte(p)); err != nil { return err } } return nil } // Encodef encodes a single pkt-line with the payload formatted as // the format specifier. The rest of the arguments will be used in // the format string. func (e *Encoder) Encodef(format string, a ...any) error { return e.EncodeString( fmt.Sprintf(format, a...), ) } ================================================ FILE: modules/plumbing/format/pktline/encoder_test.go ================================================ package pktline import ( "fmt" "os" "testing" ) func TestEncodeLen(t *testing.T) { nums := []int{0, 1, 2, 3, 4, 7, 65535, 1000, 2000, 445, 7236} for _, n := range nums { fmt.Fprintf(os.Stderr, "%d %s %04x\n", n, asciiHex16(n), n) } } ================================================ FILE: modules/plumbing/format/pktline/scanner.go ================================================ package pktline import ( "errors" "io" ) const ( lenSize = 4 ) // ErrInvalidPktLen is returned by Err() when an invalid pkt-len is found. var ErrInvalidPktLen = errors.New("invalid pkt-len found") // Scanner provides a convenient interface for reading the payloads of a // series of pkt-lines. It takes an io.Reader providing the source, // which then can be tokenized through repeated calls to the Scan // method. // // After each Scan call, the Bytes method will return the payload of the // corresponding pkt-line on a shared buffer, which will be 65516 bytes // or smaller. Flush pkt-lines are represented by empty byte slices. // // Scanning stops at EOF or the first I/O error. type Scanner struct { r io.Reader // The reader provided by the client err error // Sticky error payload []byte // Last pkt-payload len [lenSize]byte // Last pkt-len } // NewScanner returns a new Scanner to read from r. func NewScanner(r io.Reader) *Scanner { return &Scanner{ r: r, } } // Err returns the first error encountered by the Scanner. func (s *Scanner) Err() error { return s.err } // Scan advances the Scanner to the next pkt-line, whose payload will // then be available through the Bytes method. Scanning stops at EOF // or the first I/O error. After Scan returns false, the Err method // will return any error that occurred during scanning, except that if // it was io.EOF, Err will return nil. func (s *Scanner) Scan() bool { var l int l, s.err = s.readPayloadLen() if errors.Is(s.err, io.EOF) { s.err = nil return false } if s.err != nil { return false } if cap(s.payload) < l { s.payload = make([]byte, 0, l) } if _, s.err = io.ReadFull(s.r, s.payload[:l]); s.err != nil { return false } s.payload = s.payload[:l] return true } // Bytes returns the most recent payload generated by a call to Scan. // The underlying array may point to data that will be overwritten by a // subsequent call to Scan. It does no allocation. func (s *Scanner) Bytes() []byte { return s.payload } // Method readPayloadLen returns the payload length by reading the // pkt-len and subtracting the pkt-len size. func (s *Scanner) readPayloadLen() (int, error) { if _, err := io.ReadFull(s.r, s.len[:]); err != nil { if errors.Is(err, io.ErrUnexpectedEOF) { return 0, ErrInvalidPktLen } return 0, err } n, err := hexDecode(s.len) if err != nil { return 0, err } switch { case n == 0: return 0, nil case n <= lenSize: return 0, ErrInvalidPktLen case n > OversizePayloadMax+lenSize: return 0, ErrInvalidPktLen default: return n - lenSize, nil } } const ( reverseHexTable = "" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ) func hexval(b byte) int { return int(reverseHexTable[b]) } // Turns the hexadecimal representation of a number in a byte slice into // a number. This function substitute strconv.ParseUint(string(buf), 16, // 16) and/or hex.Decode, to avoid generating new strings, thus helping the // GC. func hexDecode(lenBytes [lenSize]byte) (int, error) { a := hexval(lenBytes[0]) b := hexval(lenBytes[1]) c := hexval(lenBytes[2]) d := hexval(lenBytes[3]) if a > 0xf || b > 0xf || c > 0xf || d > 0xf { return 0, ErrInvalidPktLen } return (a << 12) | (b << 8) | (c << 4) | d, nil } ================================================ FILE: modules/plumbing/format/pktline/scanner_test.go ================================================ package pktline import ( "fmt" "os" "testing" ) func TestHexDecode(t *testing.T) { ss := []string{ "0014", "ffff", "abcd", "wwwww", "1186", "0000", } for _, s := range ss { var b [lenSize]byte copy(b[:], []byte(s)) v, err := hexDecode(b) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) continue } fmt.Fprintf(os.Stderr, "%s %d 0x%04x\n", s, v, v) } } ================================================ FILE: modules/plumbing/format/readme.md ================================================ # Keep ================================================ FILE: modules/plumbing/hash.go ================================================ package plumbing import ( "bytes" "encoding/hex" "encoding/json" "fmt" "hash" "sort" "github.com/antgroup/hugescm/modules/strengthen" "github.com/zeebo/blake3" ) const ( HASH_DIGEST_SIZE = 32 HASH_HEX_SIZE = 64 reverseHexTable = "" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ) const ( BLANK_BLOB = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" BLANK_TREE = "e448b21e70d321c1ee07c7b3ca6effa275aee59cdba662afb7152182a3706eb7" ZERO_OID = "0000000000000000000000000000000000000000000000000000000000000000" ) // Hash BLAKE3 hashed content type Hash [HASH_DIGEST_SIZE]byte func (h Hash) MarshalJSON() ([]byte, error) { return strengthen.BufferCat("\"", h.String(), "\""), nil } func (h *Hash) UnmarshalJSON(b []byte) error { var s string if err := json.Unmarshal(b, &s); err != nil { return err } hashBytes, _ := hex.DecodeString(s) copy(h[:], hashBytes) return nil } // TOML func (h Hash) MarshalText() ([]byte, error) { return []byte(h.String()), nil } func (h *Hash) UnmarshalText(text []byte) error { hashBytes, _ := hex.DecodeString(string(text)) copy(h[:], hashBytes) return nil } var ( // ZeroHash is Hash with value zero ZeroHash Hash // EmptyBlob is Hash with empty blob EmptyBlob = NewHash(BLANK_BLOB) // EmptyTree is Hash with empty tree EmptyTree = NewHash(BLANK_TREE) ) // NewHash return a new Hash from a hexadecimal hash representation func NewHash(s string) Hash { b, _ := hex.DecodeString(s) var h Hash copy(h[:], b) return h } func (h Hash) IsZero() bool { return h == ZeroHash } func (h Hash) String() string { return hex.EncodeToString(h[:]) } func (h Hash) Shorten() int { i := HASH_DIGEST_SIZE - 1 for ; i >= 4; i-- { if h[i] != 0 { return i + 1 } } return i + 1 } func (h Hash) Prefix() string { return hex.EncodeToString(h[:h.Shorten()]) } // HashesSort sorts a slice of Hashes in increasing order. func HashesSort(a []Hash) { sort.Sort(HashSlice(a)) } // HashSlice attaches the methods of sort.Interface to []Hash, sorting in // increasing order. type HashSlice []Hash func (p HashSlice) Len() int { return len(p) } func (p HashSlice) Less(i, j int) bool { return bytes.Compare(p[i][:], p[j][:]) < 0 } func (p HashSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } // ValidateHashHex returns true if the given string is a valid hash. func ValidateHashHex(s string) bool { if len(s) != HASH_HEX_SIZE { return false } bs := []byte(s) for _, b := range bs { if c := reverseHexTable[b]; c > 0x0f { return false } } return true } func NewHashEx(s string) (Hash, error) { if !ValidateHashHex(s) { return ZeroHash, fmt.Errorf("zeta: '%s' not a valid object name", s) } return NewHash(s), nil } func IsLooseDir(s string) bool { if len(s) != 2 { return false } bs := []byte(s) for _, b := range bs { if c := reverseHexTable[b]; c > 0x0f { return false } } return true } type Hasher struct { hash.Hash } func NewHasher() Hasher { return Hasher{Hash: blake3.New()} } func (h Hasher) Sum() (hash Hash) { copy(hash[:], h.Hash.Sum(nil)) return } ================================================ FILE: modules/plumbing/reference.go ================================================ package plumbing import ( "errors" "fmt" "strings" ) const ( ReferencePrefix = "refs/" refHeadPrefix = ReferencePrefix + "heads/" refTagPrefix = ReferencePrefix + "tags/" refRemotePrefix = ReferencePrefix + "remotes/" symrefPrefix = "ref: " ) const ( Origin = "origin" ) const ( RefRevParseRulesCount = 6 ) // RefRevParseRules are a set of rules to parse references into short names. // These are the same rules as used by git in shorten_unambiguous_ref. // See: https://github.com/git/git/blob/9857273be005833c71e2d16ba48e193113e12276/refs.c#L610 var RefRevParseRules = []string{ "%s", "refs/%s", "refs/tags/%s", "refs/heads/%s", "refs/remotes/%s", "refs/remotes/%s/HEAD", } var ( ErrReferenceNotFound = errors.New("reference does not exist") ) // ReferenceType reference type's type ReferenceType int8 const ( InvalidReference ReferenceType = 0 HashReference ReferenceType = 1 SymbolicReference ReferenceType = 2 ) func (r ReferenceType) String() string { switch r { case InvalidReference: return "invalid-reference" case HashReference: return "hash-reference" case SymbolicReference: return "symbolic-reference" } return "" } // ReferenceName reference name's type ReferenceName string // NewBranchReferenceName returns a reference name describing a branch based on // his short name. func NewBranchReferenceName(name string) ReferenceName { return ReferenceName(refHeadPrefix + name) } // NewRemoteReferenceName returns a reference name describing a remote branch // based on his short name and the remote name. func NewRemoteReferenceName(remote, name string) ReferenceName { return ReferenceName(refRemotePrefix + fmt.Sprintf("%s/%s", remote, name)) } // NewRemoteHEADReferenceName returns a reference name describing a the HEAD // branch of a remote. func NewRemoteHEADReferenceName(remote string) ReferenceName { return ReferenceName(refRemotePrefix + fmt.Sprintf("%s/%s", remote, HEAD)) } // NewTagReferenceName returns a reference name describing a tag based on short // his name. func NewTagReferenceName(name string) ReferenceName { return ReferenceName(refTagPrefix + name) } func (r ReferenceName) HasReferencePrefix() bool { return strings.HasPrefix(string(r), ReferencePrefix) } // IsBranch check if a reference is a branch func (r ReferenceName) IsBranch() bool { return strings.HasPrefix(string(r), refHeadPrefix) } func (r ReferenceName) BranchName() string { return strings.TrimPrefix(string(r), refHeadPrefix) } // IsRemote check if a reference is a remote func (r ReferenceName) IsRemote() bool { return strings.HasPrefix(string(r), refRemotePrefix) } // IsTag check if a reference is a tag func (r ReferenceName) IsTag() bool { return strings.HasPrefix(string(r), refTagPrefix) } func (r ReferenceName) TagName() string { return strings.TrimPrefix(string(r), refTagPrefix) } func (r ReferenceName) String() string { return string(r) } // Short returns the short name of a ReferenceName // // un strict, does not check whether the name is ambiguous func (r ReferenceName) Short() string { s := string(r) res := s // skip first for _, format := range RefRevParseRules[1:] { _, err := fmt.Sscanf(s, format, &res) if err == nil { continue } } return res } func (r ReferenceName) Prefix() string { if r.IsBranch() { return "refs/heads" } if r.IsTag() { return "refs/tags" } if r.IsRemote() { return "refs/remotes" } return string(r) } const ( HEAD ReferenceName = "HEAD" Mainline ReferenceName = "refs/heads/mainline" ) // Reference is a representation of git reference type Reference struct { t ReferenceType n ReferenceName h Hash target ReferenceName } // NewReferenceFromStrings creates a reference from name and target as string, // the resulting reference can be a SymbolicReference or a HashReference base // on the target provided func NewReferenceFromStrings(name, target string) *Reference { n := ReferenceName(name) if strings.HasPrefix(target, symrefPrefix) { target := ReferenceName(target[len(symrefPrefix):]) return NewSymbolicReference(n, target) } return NewHashReference(n, NewHash(target)) } // NewSymbolicReference creates a new SymbolicReference reference func NewSymbolicReference(n, target ReferenceName) *Reference { return &Reference{ t: SymbolicReference, n: n, target: target, } } // NewHashReference creates a new HashReference reference func NewHashReference(n ReferenceName, h Hash) *Reference { return &Reference{ t: HashReference, n: n, h: h, } } // Type returns the type of a reference func (r *Reference) Type() ReferenceType { return r.t } // Name returns the name of a reference func (r *Reference) Name() ReferenceName { return r.n } // Hash returns the hash of a hash reference func (r *Reference) Hash() Hash { return r.h } // Target returns the target of a symbolic reference func (r *Reference) Target() ReferenceName { return r.target } // Strings dump a reference as a [2]string func (r *Reference) Strings() [2]string { var o [2]string o[0] = r.Name().String() switch r.Type() { case HashReference: o[1] = r.h.String() case SymbolicReference: o[1] = symrefPrefix + r.Target().String() } return o } func (r *Reference) String() string { var ref string switch r.Type() { case HashReference: ref = r.h.String() case SymbolicReference: ref = symrefPrefix + r.Target().String() default: return "" } name := r.Name().String() var v strings.Builder v.Grow(len(ref) + len(name) + 1) v.WriteString(ref) v.WriteString(" ") v.WriteString(name) return v.String() } type ReferenceSlice []*Reference func (p ReferenceSlice) Len() int { return len(p) } func (p ReferenceSlice) Less(i, j int) bool { return p[i].Name() < p[j].Name() } func (p ReferenceSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } ================================================ FILE: modules/plumbing/validate.go ================================================ package plumbing import ( "bytes" "errors" "fmt" ) type ErrBadReferenceName struct { Name string } func (err ErrBadReferenceName) Error() string { return fmt.Sprintf("bad revision name: '%s'", err.Name) } func IsErrBadReferenceName(err error) bool { var e *ErrBadReferenceName return errors.As(err, &e) } // https://github.com/git/git/blob/ae73b2c8f1da39c39335ee76a0f95857712c22a7/refs.c#L41-L290 var ( // refnameDisposition table // // Here golang's logic is different from C's, golang's strings are not NULL-terminated, so byte(0) is a forbidden character. refnameDisposition = [256]byte{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4, } ) /* * How to handle various characters in refnames: * 0: An acceptable character for refs * 1: End-of-component * 2: ., look for a preceding . to reject .. in refs * 3: {, look for a preceding @ to reject @{ in refs * 4: A bad character: ASCII control characters, and * ":", "?", "[", "\", "^", "~", SP, or TAB * 5: *, reject unless REFNAME_REFSPEC_PATTERN is set */ func checkReferenceNameComponent(refname []byte) int { last := byte(0) var i int for ; i < len(refname); i++ { ch := refname[i] & 255 disp := refnameDisposition[ch] switch disp { case 1: goto OUT // Do not use range, which causes extra processing for goto statements. case 2: if last == '.' { return -1 } case 3: if last == '@' { return -1 } case 4: return -1 case 5: // we not use pattern mode return -1 } last = ch } OUT: if i == 0 { return 0 } if refname[0] == '.' { return -1 } if bytes.HasSuffix(refname, []byte(".lock")) { return -1 } return i } /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is * not legal. It is legal if it is something reasonable to have under * ".zeta/refs/"; We do not like it if: * * - it begins with ".", or * - it has double dots "..", or * - it has ASCII control characters, or * - it has ":", "?", "[", "\", "^", "~", SP, or TAB anywhere, or * - it has "*" anywhere unless REFNAME_REFSPEC_PATTERN is set, or * - it ends with a "/", or * - it ends with ".lock", or * - it contains a "@{" portion * * When sanitized is not NULL, instead of rejecting the input refname * as an error, try to come up with a usable replacement for the input * refname in it. */ func ValidateReferenceName(refname []byte) bool { if bytes.Equal(refname, []byte("@")) { return false } var componentLen int for { /* We are at the start of a path component. */ if componentLen = checkReferenceNameComponent(refname); componentLen <= 0 { return false } if len(refname) == componentLen { break } refname = refname[componentLen+1:] } return refname[componentLen-1] != '.' } // ValidateBranchName: creating branches starting with - is not supported func ValidateBranchName(branch []byte) bool { if len(branch) == 0 || branch[0] == '-' { return false } return ValidateReferenceName(branch) } // ValidateTagName: creating tags starting with - is not supported func ValidateTagName(tag []byte) bool { if len(tag) == 0 || tag[0] == '-' { return false } return ValidateReferenceName(tag) } ================================================ FILE: modules/progressbar/LICENSE ================================================ MIT License Copyright (c) 2017 Zack Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/progressbar/VERSION ================================================ https://github.com/schollz/progressbar 03fc4e907750adc6f00a004986a63c80616923b8 ================================================ FILE: modules/progressbar/colorstring/LICENSE ================================================ The MIT License (MIT) Copyright (c) 2014 Mitchell Hashimoto Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/progressbar/colorstring/colorstring.go ================================================ // colorstring provides functions for colorizing strings for terminal // output. package colorstring import ( "bytes" "fmt" "io" "regexp" "strings" ) // Color colorizes your strings using the default settings. // // Strings given to Color should use the syntax `[color]` to specify the // color for text following. For example: `[blue]Hello` will return "Hello" // in blue. See DefaultColors for all the supported colors and attributes. // // If an unrecognized color is given, it is ignored and assumed to be part // of the string. For example: `[hi]world` will result in "[hi]world". // // A color reset is appended to the end of every string. This will reset // the color of following strings when you output this text to the same // terminal session. // // If you want to customize any of this behavior, use the Colorize struct. func Color(v string) string { return def.Color(v) } // ColorPrefix returns the color sequence that prefixes the given text. // // This is useful when wrapping text if you want to inherit the color // of the wrapped text. For example, "[green]foo" will return "[green]". // If there is no color sequence, then this will return "". func ColorPrefix(v string) string { return def.ColorPrefix(v) } // Colorize colorizes your strings, giving you the ability to customize // some of the colorization process. // // The options in Colorize can be set to customize colorization. If you're // only interested in the defaults, just use the top Color function directly, // which creates a default Colorize. type Colorize struct { // Colors maps a color string to the code for that color. The code // is a string so that you can use more complex colors to set foreground, // background, attributes, etc. For example, "boldblue" might be // "1;34" Colors map[string]string // If true, color attributes will be ignored. This is useful if you're // outputting to a location that doesn't support colors and you just // want the strings returned. Disable bool // Reset, if true, will reset the color after each colorization by // adding a reset code at the end. Reset bool } // Color colorizes a string according to the settings setup in the struct. // // For more details on the syntax, see the top-level Color function. func (c *Colorize) Color(v string) string { matches := parseRe.FindAllStringIndex(v, -1) if len(matches) == 0 { return v } result := new(bytes.Buffer) colored := false m := []int{0, 0} for _, nm := range matches { // Write the text in between this match and the last result.WriteString(v[m[1]:nm[0]]) m = nm var replace string if code, ok := c.Colors[v[m[0]+1:m[1]-1]]; ok { colored = true if !c.Disable { replace = fmt.Sprintf("\033[%sm", code) } } else { replace = v[m[0]:m[1]] } result.WriteString(replace) } result.WriteString(v[m[1]:]) if colored && c.Reset && !c.Disable { // Write the clear byte at the end result.WriteString("\033[0m") } return result.String() } // ColorPrefix returns the first color sequence that exists in this string. // // For example: "[green]foo" would return "[green]". If no color sequence // exists, then "" is returned. This is especially useful when wrapping // colored texts to inherit the color of the wrapped text. func (c *Colorize) ColorPrefix(v string) string { return prefixRe.FindString(strings.TrimSpace(v)) } // DefaultColors are the default colors used when colorizing. // // If the color is surrounded in underscores, such as "_blue_", then that // color will be used for the background color. var DefaultColors map[string]string func init() { DefaultColors = map[string]string{ // Default foreground/background colors "default": "39", "_default_": "49", // Foreground colors "black": "30", "red": "31", "green": "32", "yellow": "33", "blue": "34", "magenta": "35", "cyan": "36", "light_gray": "37", "dark_gray": "90", "light_red": "91", "light_green": "92", "light_yellow": "93", "light_blue": "94", "light_magenta": "95", "light_cyan": "96", "white": "97", // Background colors "_black_": "40", "_red_": "41", "_green_": "42", "_yellow_": "43", "_blue_": "44", "_magenta_": "45", "_cyan_": "46", "_light_gray_": "47", "_dark_gray_": "100", "_light_red_": "101", "_light_green_": "102", "_light_yellow_": "103", "_light_blue_": "104", "_light_magenta_": "105", "_light_cyan_": "106", "_white_": "107", // Attributes "bold": "1", "dim": "2", "underline": "4", "blink_slow": "5", "blink_fast": "6", "invert": "7", "hidden": "8", // Reset to reset everything to their defaults "reset": "0", "reset_bold": "21", } def = Colorize{ Colors: DefaultColors, Reset: true, } } var def Colorize var parseReRaw = `\[[a-z0-9_-]+\]` var parseRe = regexp.MustCompile(`(?i)` + parseReRaw) var prefixRe = regexp.MustCompile(`^(?i)(` + parseReRaw + `)+`) // Print is a convenience wrapper for fmt.Print with support for color codes. // // Print formats using the default formats for its operands and writes to // standard output with support for color codes. Spaces are added between // operands when neither is a string. It returns the number of bytes written // and any write error encountered. func Print(a string) (n int, err error) { return fmt.Print(Color(a)) } // Println is a convenience wrapper for fmt.Println with support for color // codes. // // Println formats using the default formats for its operands and writes to // standard output with support for color codes. Spaces are always added // between operands and a newline is appended. It returns the number of bytes // written and any write error encountered. func Println(a string) (n int, err error) { return fmt.Println(Color(a)) } // Printf is a convenience wrapper for fmt.Printf with support for color codes. // // Printf formats according to a format specifier and writes to standard output // with support for color codes. It returns the number of bytes written and any // write error encountered. func Printf(format string, a ...any) (n int, err error) { return fmt.Printf(Color(format), a...) } // Fprint is a convenience wrapper for fmt.Fprint with support for color codes. // // Fprint formats using the default formats for its operands and writes to w // with support for color codes. Spaces are added between operands when neither // is a string. It returns the number of bytes written and any write error // encountered. func Fprint(w io.Writer, a string) (n int, err error) { return fmt.Fprint(w, Color(a)) } // Fprintln is a convenience wrapper for fmt.Fprintln with support for color // codes. // // Fprintln formats using the default formats for its operands and writes to w // with support for color codes. Spaces are always added between operands and a // newline is appended. It returns the number of bytes written and any write // error encountered. func Fprintln(w io.Writer, a string) (n int, err error) { return fmt.Fprintln(w, Color(a)) } // Fprintf is a convenience wrapper for fmt.Fprintf with support for color // codes. // // Fprintf formats according to a format specifier and writes to w with support // for color codes. It returns the number of bytes written and any write error // encountered. func Fprintf(w io.Writer, format string, a ...any) (n int, err error) { return fmt.Fprintf(w, Color(format), a...) } ================================================ FILE: modules/progressbar/progressbar.go ================================================ package progressbar import ( "bytes" "errors" "fmt" "io" "math" "os" "strings" "sync" "time" "github.com/antgroup/hugescm/modules/progressbar/colorstring" "github.com/charmbracelet/x/ansi" "golang.org/x/term" ) // ProgressBar is a thread-safe, simple // progress bar type ProgressBar struct { state state config config lock sync.Mutex } // State is the basic properties of the bar type State struct { Max int64 CurrentNum int64 CurrentPercent float64 CurrentBytes float64 SecondsSince float64 SecondsLeft float64 KBsPerSecond float64 Description string } type state struct { currentNum int64 currentPercent int lastPercent int currentSaucerSize int isAltSaucerHead bool lastShown time.Time startTime time.Time // time when the progress bar start working counterTime time.Time counterNumSinceLast int64 counterLastTenRates []float64 spinnerIdx int // the index of spinner maxLineWidth int currentBytes float64 finished bool exit bool // Progress bar exit halfway details []string // details to show,only used when detail row is set to more than 0 rendered string } type config struct { max int64 // max number of the counter maxHumanized string maxHumanizedSuffix string width int writer io.Writer theme Theme renderWithBlankState bool description string iterationString string ignoreLength bool // ignoreLength if max bytes not known // whether the output is expected to contain color codes colorCodes bool // show rate of change in kB/sec or MB/sec showBytes bool // show the iterations per second showIterationsPerSecond bool showIterationsCount bool // whether the progress bar should show the total bytes (e.g. 23/24 or 23/-, vs. just 23). showTotalBytes bool // whether the progress bar should show elapsed time. // always enabled if predictTime is true. elapsedTime bool showElapsedTimeOnFinish bool // whether the progress bar should attempt to predict the finishing // time of the progress based on the start time and the average // number of seconds between increments. predictTime bool // minimum time to wait in between updates throttleDuration time.Duration // clear bar once finished clearOnFinish bool // spinnerType should be a number between 0-75 spinnerType int // spinnerTypeOptionUsed remembers if the spinnerType was changed manually spinnerTypeOptionUsed bool // spinnerChangeInterval the change interval of spinner // if set this attribute to 0, the spinner only change when renderProgressBar was called // for example, each time when Add() was called,which will call renderProgressBar function spinnerChangeInterval time.Duration // spinner represents the spinner as a slice of string spinner []string // fullWidth specifies whether to measure and set the bar to a specific width fullWidth bool // invisible doesn't render the bar at all, useful for debugging invisible bool onCompletion func() // whether the render function should make use of ANSI codes to reduce console I/O useANSICodes bool // whether to use the IEC units (e.g. MiB) instead of the default SI units (e.g. MB) useIECUnits bool // showDescriptionAtLineEnd specifies whether description should be written at line end instead of line start showDescriptionAtLineEnd bool // specifies how many rows of details to show,default value is 0 and no details will be shown maxDetailRow int stdBuffer bytes.Buffer } // Theme defines the elements of the bar type Theme struct { Saucer string AltSaucerHead string SaucerHead string SaucerPadding string BarStart string BarEnd string // BarStartFilled is used after the Bar starts filling, if set. Otherwise, it defaults to BarStart. BarStartFilled string // BarEndFilled is used once the Bar finishes, if set. Otherwise, it defaults to BarEnd. BarEndFilled string } var ( // ThemeDefault is given by default (if not changed with OptionSetTheme), and it looks like "|████ |". ThemeDefault = Theme{Saucer: "█", SaucerPadding: " ", BarStart: "|", BarEnd: "|"} // ThemeASCII is a predefined Theme that uses ASCII symbols. It looks like "[===>...]". // Configure it with OptionSetTheme(ThemeASCII). ThemeASCII = Theme{ Saucer: "=", SaucerHead: ">", SaucerPadding: ".", BarStart: "[", BarEnd: "]", } // ThemeUnicode is a predefined Theme that uses Unicode characters, displaying a graphic bar. // It looks like "" (rendering will depend on font being used). // It requires special symbols usually found in "nerd fonts" [2], or in Fira Code [1], and other sources. // Configure it with OptionSetTheme(ThemeUnicode). // // [1] https://github.com/tonsky/FiraCode // [2] https://www.nerdfonts.com/ ThemeUnicode = Theme{ Saucer: "\uEE04", //  SaucerHead: "\uEE04", //  SaucerPadding: "\uEE01", //  BarStart: "\uEE00", //  BarStartFilled: "\uEE03", //  BarEnd: "\uEE02", //  BarEndFilled: "\uEE05", //  } ) // Option is the type all options need to adhere to type Option func(p *ProgressBar) // OptionSetWidth sets the width of the bar func OptionSetWidth(s int) Option { return func(p *ProgressBar) { p.config.width = s } } // OptionSetSpinnerChangeInterval sets the spinner change interval // the spinner will change according to this value. // By default, this value is 100 * time.Millisecond // If you don't want to let this progressbar update by specified time interval // you can set this value to zero, then the spinner will change each time rendered, // such as when Add() or Describe() was called func OptionSetSpinnerChangeInterval(interval time.Duration) Option { return func(p *ProgressBar) { p.config.spinnerChangeInterval = interval } } // OptionSpinnerType sets the type of spinner used for indeterminate bars func OptionSpinnerType(spinnerType int) Option { return func(p *ProgressBar) { p.config.spinnerTypeOptionUsed = true p.config.spinnerType = spinnerType } } // OptionSpinnerCustom sets the spinner used for indeterminate bars to the passed // slice of string func OptionSpinnerCustom(spinner []string) Option { return func(p *ProgressBar) { p.config.spinner = spinner } } // OptionSetTheme sets the elements the bar is constructed with. // There are two pre-defined themes you can use: ThemeASCII and ThemeUnicode. func OptionSetTheme(t Theme) Option { return func(p *ProgressBar) { p.config.theme = t } } // OptionSetVisibility sets the visibility func OptionSetVisibility(visibility bool) Option { return func(p *ProgressBar) { p.config.invisible = !visibility } } // OptionFullWidth sets the bar to be full width func OptionFullWidth() Option { return func(p *ProgressBar) { p.config.fullWidth = true } } // OptionSetWriter sets the output writer (defaults to os.StdOut) func OptionSetWriter(w io.Writer) Option { return func(p *ProgressBar) { p.config.writer = w } } // OptionSetRenderBlankState sets whether or not to render a 0% bar on construction func OptionSetRenderBlankState(r bool) Option { return func(p *ProgressBar) { p.config.renderWithBlankState = r } } // OptionSetDescription sets the description of the bar to render in front of it func OptionSetDescription(description string) Option { return func(p *ProgressBar) { p.config.description = description } } // OptionEnableColorCodes enables or disables support for color codes // using mitchellh/colorstring func OptionEnableColorCodes(colorCodes bool) Option { return func(p *ProgressBar) { p.config.colorCodes = colorCodes } } // OptionSetElapsedTime will enable elapsed time. Always enabled if OptionSetPredictTime is true. func OptionSetElapsedTime(elapsedTime bool) Option { return func(p *ProgressBar) { p.config.elapsedTime = elapsedTime } } // OptionSetPredictTime will also attempt to predict the time remaining. func OptionSetPredictTime(predictTime bool) Option { return func(p *ProgressBar) { p.config.predictTime = predictTime } } // OptionShowCount will also print current count out of total func OptionShowCount() Option { return func(p *ProgressBar) { p.config.showIterationsCount = true } } // OptionShowIts will also print the iterations/second func OptionShowIts() Option { return func(p *ProgressBar) { p.config.showIterationsPerSecond = true } } // OptionShowElapsedTimeOnFinish will keep the display of elapsed time on finish. func OptionShowElapsedTimeOnFinish() Option { return func(p *ProgressBar) { p.config.showElapsedTimeOnFinish = true } } // OptionShowTotalBytes will keep the display of total bytes. func OptionShowTotalBytes(flag bool) Option { return func(p *ProgressBar) { p.config.showTotalBytes = flag } } // OptionSetItsString sets what's displayed for iterations a second. The default is "it" which would display: "it/s" func OptionSetItsString(iterationString string) Option { return func(p *ProgressBar) { p.config.iterationString = iterationString } } // OptionThrottle will wait the specified duration before updating again. The default // duration is 0 seconds. func OptionThrottle(duration time.Duration) Option { return func(p *ProgressBar) { p.config.throttleDuration = duration } } // OptionClearOnFinish will clear the bar once its finished. func OptionClearOnFinish() Option { return func(p *ProgressBar) { p.config.clearOnFinish = true } } // OptionOnCompletion will invoke cmpl function once its finished func OptionOnCompletion(cmpl func()) Option { return func(p *ProgressBar) { p.config.onCompletion = cmpl } } // OptionShowBytes will update the progress bar // configuration settings to display/hide kBytes/Sec func OptionShowBytes(val bool) Option { return func(p *ProgressBar) { p.config.showBytes = val } } // OptionUseANSICodes will use more optimized terminal i/o. // // Only useful in environments with support for ANSI escape sequences. func OptionUseANSICodes(val bool) Option { return func(p *ProgressBar) { p.config.useANSICodes = val } } // OptionUseIECUnits will enable IEC units (e.g. MiB) instead of the default // SI units (e.g. MB). func OptionUseIECUnits(val bool) Option { return func(p *ProgressBar) { p.config.useIECUnits = val } } // OptionShowDescriptionAtLineEnd defines whether description should be written at line end instead of line start func OptionShowDescriptionAtLineEnd() Option { return func(p *ProgressBar) { p.config.showDescriptionAtLineEnd = true } } // OptionSetMaxDetailRow sets the max row of details // the row count should be less than the terminal height, otherwise it will not give you the output you want func OptionSetMaxDetailRow(row int) Option { return func(p *ProgressBar) { p.config.maxDetailRow = row } } // OptionSeekTo seek to offset func OptionSeekTo(offset int64) Option { return func(p *ProgressBar) { p.state.currentNum = offset } } // NewOptions constructs a new instance of ProgressBar, with any options you specify func NewOptions(maxVal int, options ...Option) *ProgressBar { return NewOptions64(int64(maxVal), options...) } // NewOptions64 constructs a new instance of ProgressBar, with any options you specify func NewOptions64(maxVal int64, options ...Option) *ProgressBar { b := ProgressBar{ state: state{ startTime: time.Time{}, lastShown: time.Time{}, counterTime: time.Time{}, }, config: config{ writer: os.Stdout, theme: ThemeDefault, iterationString: "it", width: 40, max: maxVal, throttleDuration: 0 * time.Nanosecond, elapsedTime: maxVal == -1, predictTime: true, spinnerType: 9, invisible: false, spinnerChangeInterval: 100 * time.Millisecond, showTotalBytes: true, }, } for _, o := range options { o(&b) } if b.config.spinnerType < 0 || b.config.spinnerType > 75 { panic("invalid spinner type, must be between 0 and 75") } if b.config.maxDetailRow < 0 { panic("invalid max detail row, must be greater than 0") } // ignoreLength if max bytes not known if b.config.max == -1 { b.lengthUnknown() } b.config.maxHumanized, b.config.maxHumanizedSuffix = humanizeBytes(float64(b.config.max), b.config.useIECUnits) if b.config.renderWithBlankState { _ = b.RenderBlank() } // if the render time interval attribute is set if b.config.spinnerChangeInterval != 0 && !b.config.invisible && b.config.ignoreLength { go func() { ticker := time.NewTicker(b.config.spinnerChangeInterval) defer ticker.Stop() for range ticker.C { if b.IsFinished() { return } if b.IsStarted() { b.lock.Lock() _ = b.render() b.lock.Unlock() } } }() } return &b } func getBasicState() state { now := time.Now() return state{ startTime: now, lastShown: now, counterTime: now, } } // New returns a new ProgressBar // with the specified maximum func New(maxVal int) *ProgressBar { return NewOptions(maxVal) } // DefaultBytes provides a progressbar to measure byte // throughput with recommended defaults. // Set maxBytes to -1 to use as a spinner. func DefaultBytes(maxBytes int64, description ...string) *ProgressBar { desc := "" if len(description) > 0 { desc = description[0] } return NewOptions64( maxBytes, OptionSetDescription(desc), OptionSetWriter(os.Stderr), OptionShowBytes(true), OptionShowTotalBytes(true), OptionSetWidth(10), OptionThrottle(65*time.Millisecond), OptionShowCount(), OptionOnCompletion(func() { fmt.Fprint(os.Stderr, "\n") }), OptionSpinnerType(14), OptionFullWidth(), OptionSetRenderBlankState(true), ) } // DefaultBytesSilent is the same as DefaultBytes, but does not output anywhere. // String() can be used to get the output instead. func DefaultBytesSilent(maxBytes int64, description ...string) *ProgressBar { // Mostly the same bar as DefaultBytes desc := "" if len(description) > 0 { desc = description[0] } return NewOptions64( maxBytes, OptionSetDescription(desc), OptionSetWriter(io.Discard), OptionShowBytes(true), OptionShowTotalBytes(true), OptionSetWidth(10), OptionThrottle(65*time.Millisecond), OptionShowCount(), OptionSpinnerType(14), OptionFullWidth(), ) } // Default provides a progressbar with recommended defaults. // Set max to -1 to use as a spinner. func Default(maxVal int64, description ...string) *ProgressBar { desc := "" if len(description) > 0 { desc = description[0] } return NewOptions64( maxVal, OptionSetDescription(desc), OptionSetWriter(os.Stderr), OptionSetWidth(10), OptionShowTotalBytes(true), OptionThrottle(65*time.Millisecond), OptionShowCount(), OptionShowIts(), OptionOnCompletion(func() { fmt.Fprint(os.Stderr, "\n") }), OptionSpinnerType(14), OptionFullWidth(), OptionSetRenderBlankState(true), ) } // DefaultSilent is the same as Default, but does not output anywhere. // String() can be used to get the output instead. func DefaultSilent(maxVal int64, description ...string) *ProgressBar { // Mostly the same bar as Default desc := "" if len(description) > 0 { desc = description[0] } return NewOptions64( maxVal, OptionSetDescription(desc), OptionSetWriter(io.Discard), OptionSetWidth(10), OptionShowTotalBytes(true), OptionThrottle(65*time.Millisecond), OptionShowCount(), OptionShowIts(), OptionSpinnerType(14), OptionFullWidth(), ) } // String returns the current rendered version of the progress bar. // It will never return an empty string while the progress bar is running. func (p *ProgressBar) String() string { return p.state.rendered } // RenderBlank renders the current bar state, you can use this to render a 0% state func (p *ProgressBar) RenderBlank() error { p.lock.Lock() defer p.lock.Unlock() if p.config.invisible { return nil } if p.state.currentNum == 0 { p.state.lastShown = time.Time{} } return p.render() } // StartWithoutRender will start the progress bar without rendering it // this method is created for the use case where you want to start the progress // but don't want to render it immediately. // If you want to start the progress and render it immediately, use RenderBlank instead, // or maybe you can use Add to start it automatically, but it will make the time calculation less precise. func (p *ProgressBar) StartWithoutRender() { p.lock.Lock() defer p.lock.Unlock() if p.IsStarted() { return } p.state.startTime = time.Now() // the counterTime should be set to the current time p.state.counterTime = time.Now() } // Reset will reset the clock that is used // to calculate current time and the time left. func (p *ProgressBar) Reset() { p.lock.Lock() defer p.lock.Unlock() p.state = getBasicState() } // Finish will fill the bar to full func (p *ProgressBar) Finish() error { p.lock.Lock() p.state.currentNum = p.config.max if !p.config.ignoreLength { p.state.currentBytes = float64(p.config.max) } p.lock.Unlock() return p.Add(0) } // Exit will exit the bar to keep current state func (p *ProgressBar) Exit() error { p.lock.Lock() defer p.lock.Unlock() p.state.exit = true if p.config.onCompletion != nil { p.config.onCompletion() } return nil } // Add will add the specified amount to the progressbar func (p *ProgressBar) Add(num int) error { return p.Add64(int64(num)) } // Set will set the bar to a current number func (p *ProgressBar) Set(num int) error { return p.Set64(int64(num)) } // Set64 will set the bar to a current number func (p *ProgressBar) Set64(num int64) error { p.lock.Lock() toAdd := num - int64(p.state.currentBytes) p.lock.Unlock() return p.Add64(toAdd) } // Add64 will add the specified amount to the progressbar func (p *ProgressBar) Add64(num int64) error { if p.config.invisible { return nil } p.lock.Lock() defer p.lock.Unlock() if p.state.exit { return nil } // error out since OptionSpinnerCustom will always override a manually set spinnerType if p.config.spinnerTypeOptionUsed && len(p.config.spinner) > 0 { return errors.New("OptionSpinnerType and OptionSpinnerCustom cannot be used together") } if p.config.max == 0 { return errors.New("max must be greater than 0") } if p.state.currentNum < p.config.max { if p.config.ignoreLength { p.state.currentNum = (p.state.currentNum + num) % p.config.max } else { p.state.currentNum += num } } p.state.currentBytes += float64(num) if p.state.counterTime.IsZero() { p.state.counterTime = time.Now() } // reset the countdown timer every second to take rolling average p.state.counterNumSinceLast += num if time.Since(p.state.counterTime).Seconds() > 0.5 { p.state.counterLastTenRates = append(p.state.counterLastTenRates, float64(p.state.counterNumSinceLast)/time.Since(p.state.counterTime).Seconds()) if len(p.state.counterLastTenRates) > 10 { p.state.counterLastTenRates = p.state.counterLastTenRates[1:] } p.state.counterTime = time.Now() p.state.counterNumSinceLast = 0 } percent := float64(p.state.currentNum) / float64(p.config.max) p.state.currentSaucerSize = int(percent * float64(p.config.width)) p.state.currentPercent = int(percent * 100) updateBar := p.state.currentPercent != p.state.lastPercent && p.state.currentPercent > 0 p.state.lastPercent = p.state.currentPercent if p.state.currentNum > p.config.max { return errors.New("current number exceeds max") } // always update if show bytes/second or its/second if updateBar || p.config.showIterationsPerSecond || p.config.showIterationsCount { return p.render() } return nil } // AddDetail adds a detail to the progress bar. Only used when maxDetailRow is set to a value greater than 0 func (p *ProgressBar) AddDetail(detail string) error { if p.config.maxDetailRow == 0 { return errors.New("maxDetailRow is set to 0, cannot add detail") } if p.IsFinished() { return errors.New("cannot add detail to a finished progress bar") } p.lock.Lock() defer p.lock.Unlock() if p.state.details == nil { // if we add a detail before the first add, it will be weird that we have detail but don't have the progress bar in the top. // so when we add the first detail, we will render the progress bar first. if err := p.render(); err != nil { return err } } p.state.details = append(p.state.details, detail) if len(p.state.details) > p.config.maxDetailRow { p.state.details = p.state.details[1:] } if err := p.renderDetails(); err != nil { return err } return nil } // renderDetails renders the details of the progress bar func (p *ProgressBar) renderDetails() error { if p.config.invisible { return nil } if p.state.finished { return nil } if p.config.maxDetailRow == 0 { return nil } b := strings.Builder{} b.WriteString("\n") // render the details row for _, detail := range p.state.details { fmt.Fprintf(&b, "\u001B[K\r%s\n", detail) } // add empty lines to fill the maxDetailRow for i := len(p.state.details); i < p.config.maxDetailRow; i++ { b.WriteString("\u001B[K\n") } // move the cursor up to the start of the details row fmt.Fprintf(&b, "\u001B[%dF", p.config.maxDetailRow+1) _ = writeString(p.config, b.String()) return nil } // Clear erases the progress bar from the current line func (p *ProgressBar) Clear() error { return clearProgressBar(p.config, p.state) } // Describe will change the description shown before the progress, which // can be changed on the fly (as for a slow running process). func (p *ProgressBar) Describe(description string) { p.lock.Lock() defer p.lock.Unlock() p.config.description = description if p.config.invisible { return } _ = p.render() } // New64 returns a new ProgressBar // with the specified maximum func New64(maxVal int64) *ProgressBar { return NewOptions64(maxVal) } // GetMax returns the max of a bar func (p *ProgressBar) GetMax() int { p.lock.Lock() defer p.lock.Unlock() return int(p.config.max) } // GetMax64 returns the current max func (p *ProgressBar) GetMax64() int64 { p.lock.Lock() defer p.lock.Unlock() return p.config.max } // ChangeMax takes in a int // and changes the max value // of the progress bar func (p *ProgressBar) ChangeMax(newMax int) { p.ChangeMax64(int64(newMax)) } // ChangeMax64 is basically // the same as ChangeMax, // but takes in a int64 // to avoid casting func (p *ProgressBar) ChangeMax64(newMax int64) { p.lock.Lock() p.config.max = newMax if p.config.showBytes { p.config.maxHumanized, p.config.maxHumanizedSuffix = humanizeBytes(float64(p.config.max), p.config.useIECUnits) } if newMax == -1 { p.lengthUnknown() } else { p.lengthKnown(newMax) } p.lock.Unlock() // so p.Add can lock _ = p.Add(0) // re-render } // AddMax takes in a int // and adds it to the max // value of the progress bar func (p *ProgressBar) AddMax(added int) { p.AddMax64(int64(added)) } // AddMax64 is basically // the same as AddMax, // but takes in a int64 // to avoid casting func (p *ProgressBar) AddMax64(added int64) { p.lock.Lock() p.config.max += added if p.config.showBytes { p.config.maxHumanized, p.config.maxHumanizedSuffix = humanizeBytes(float64(p.config.max), p.config.useIECUnits) } if p.config.max == -1 { p.lengthUnknown() } else { p.lengthKnown(p.config.max) } p.lock.Unlock() // so p.Add can lock _ = p.Add(0) // re-render } // IsFinished returns true if progress bar is completed func (p *ProgressBar) IsFinished() bool { p.lock.Lock() defer p.lock.Unlock() return p.state.finished } // IsStarted returns true if progress bar is started func (p *ProgressBar) IsStarted() bool { return !p.state.startTime.IsZero() } // render renders the progress bar, updating the maximum // rendered line width. this function is not thread-safe, // so it must be called with an acquired lock. func (p *ProgressBar) render() error { // make sure that the rendering is not happening too quickly // but always show if the currentNum reaches the max if !p.IsStarted() { p.state.startTime = time.Now() } else if time.Since(p.state.lastShown).Nanoseconds() < p.config.throttleDuration.Nanoseconds() && p.state.currentNum < p.config.max { return nil } if !p.config.useANSICodes { // first, clear the existing progress bar, if not yet finished. if !p.state.finished { err := clearProgressBar(p.config, p.state) if err != nil { return err } } } // check if the progress bar is finished if !p.state.finished && p.state.currentNum >= p.config.max { p.state.finished = true if !p.config.clearOnFinish { _, _ = io.Copy(p.config.writer, &p.config.stdBuffer) _, _ = renderProgressBar(p.config, &p.state) } if p.config.maxDetailRow > 0 { _ = p.renderDetails() // put the cursor back to the last line of the details _ = writeString(p.config, fmt.Sprintf("\u001B[%dB\r\u001B[%dC", p.config.maxDetailRow, len(p.state.details[len(p.state.details)-1]))) } if p.config.onCompletion != nil { p.config.onCompletion() } } if p.state.finished { // when using ANSI codes we don't pre-clean the current line if p.config.useANSICodes && p.config.clearOnFinish { err := clearProgressBar(p.config, p.state) if err != nil { return err } } return nil } // then, re-render the current progress bar _, _ = io.Copy(p.config.writer, &p.config.stdBuffer) w, err := renderProgressBar(p.config, &p.state) if err != nil { return err } if w > p.state.maxLineWidth { p.state.maxLineWidth = w } p.state.lastShown = time.Now() return nil } // lengthUnknown sets the progress bar to ignore the length func (p *ProgressBar) lengthUnknown() { p.config.ignoreLength = true p.config.max = int64(p.config.width) p.config.predictTime = false } // lengthKnown sets the progress bar to do not ignore the length func (p *ProgressBar) lengthKnown(maxVal int64) { p.config.ignoreLength = false p.config.max = maxVal p.config.predictTime = true } // State returns the current state func (p *ProgressBar) State() State { p.lock.Lock() defer p.lock.Unlock() s := State{} s.CurrentNum = p.state.currentNum s.Max = p.config.max if p.config.ignoreLength { s.Max = -1 } s.CurrentPercent = float64(p.state.currentNum) / float64(p.config.max) s.CurrentBytes = p.state.currentBytes if p.IsStarted() { s.SecondsSince = time.Since(p.state.startTime).Seconds() } else { s.SecondsSince = 0 } if p.state.currentNum > 0 { s.SecondsLeft = s.SecondsSince / float64(p.state.currentNum) * (float64(p.config.max) - float64(p.state.currentNum)) } s.KBsPerSecond = float64(p.state.currentBytes) / 1024.0 / s.SecondsSince s.Description = p.config.description return s } // getStringWidth returns the display width of a string, accounting for color codes. func getStringWidth(c config, str string) int { if c.colorCodes { // convert any color codes in the progress bar into the respective ANSI codes str = colorstring.Color(str) } // the width of the string, if printed to the console // does not include the carriage return character cleanString := strings.ReplaceAll(str, "\r", "") // ansi.StringWidth returns the width of a string in cells. // It automatically ignores ANSI escape codes and accounts for wide characters. return ansi.StringWidth(cleanString) } func renderProgressBar(c config, s *state) (int, error) { averageRate := calculateAverageRate(s) // build statistics string (iterations count, rate, etc.) statsStr := buildStatsString(c, s, averageRate) // calculate time brackets leftBrac, rightBrac := calculateTimeBrackets(c, s, averageRate) // calculate bar width for full width mode calculateBarWidth(c, s, statsStr, leftBrac, rightBrac) // calculate bar visual elements barStart, barEnd, saucer, saucerHead := calculateBarElements(c, s) // build the final progress bar string repeatAmount := max(c.width-s.currentSaucerSize, 0) str := buildProgressBarString(c, s, statsStr, leftBrac, rightBrac, barStart, barEnd, saucer, saucerHead, repeatAmount, averageRate) if c.colorCodes { str = colorstring.Color(str) } s.rendered = str return getStringWidth(c, str), writeString(c, str) } // calculateAverageRate calculates the average rate of progress func calculateAverageRate(s *state) float64 { averageRate := average(s.counterLastTenRates) if len(s.counterLastTenRates) == 0 || s.finished { if t := time.Since(s.startTime).Seconds(); t > 0 { averageRate = s.currentBytes / t } else { averageRate = 0 } } return averageRate } // buildStatsString builds the statistics string (iterations count, rate, etc.) func buildStatsString(c config, s *state, averageRate float64) string { var sb strings.Builder appendIterationCount(&sb, c, s) appendByteRate(&sb, c, averageRate) appendIterationsRate(&sb, c, averageRate) if sb.Len() > 0 { sb.WriteString(")") } return sb.String() } // appendIterationCount appends iteration count to the string builder func appendIterationCount(sb *strings.Builder, c config, s *state) { if !c.showIterationsCount { return } appendSeparator(sb) if !c.ignoreLength { appendIterationCountWithTotal(sb, c, s) } else { appendIterationCountWithoutTotal(sb, c, s) } } // appendSeparator appends opening parenthesis or comma separator func appendSeparator(sb *strings.Builder) { if sb.Len() == 0 { sb.WriteString("(") } else { sb.WriteString(", ") } } // appendIterationCountWithTotal appends iteration count when total is known func appendIterationCountWithTotal(sb *strings.Builder, c config, s *state) { if c.showBytes { appendBytesCountWithTotal(sb, c, s) return } appendNumericCountWithTotal(sb, c, s) } // appendBytesCountWithTotal appends bytes count when total is known func appendBytesCountWithTotal(sb *strings.Builder, c config, s *state) { currentHumanize, currentSuffix := humanizeBytes(s.currentBytes, c.useIECUnits) // same unit suffix - use compact format if currentSuffix == c.maxHumanizedSuffix { appendSameUnitBytesCount(sb, c, currentHumanize) return } // different unit suffix - show both suffixes appendDifferentUnitBytesCount(sb, c, currentHumanize, currentSuffix) } // appendSameUnitBytesCount appends bytes count with same unit suffix func appendSameUnitBytesCount(sb *strings.Builder, c config, currentHumanize string) { if c.showTotalBytes { fmt.Fprintf(sb, "%s/%s%s", currentHumanize, c.maxHumanized, c.maxHumanizedSuffix) } else { fmt.Fprintf(sb, "%s%s", currentHumanize, c.maxHumanizedSuffix) } } // appendDifferentUnitBytesCount appends bytes count with different unit suffixes func appendDifferentUnitBytesCount(sb *strings.Builder, c config, currentHumanize, currentSuffix string) { if c.showTotalBytes { fmt.Fprintf(sb, "%s%s/%s%s", currentHumanize, currentSuffix, c.maxHumanized, c.maxHumanizedSuffix) } else { fmt.Fprintf(sb, "%s%s", currentHumanize, currentSuffix) } } // appendNumericCountWithTotal appends numeric count when total is known func appendNumericCountWithTotal(sb *strings.Builder, c config, s *state) { if c.showTotalBytes { fmt.Fprintf(sb, "%.0f/%d", s.currentBytes, c.max) } else { fmt.Fprintf(sb, "%.0f", s.currentBytes) } } // appendIterationCountWithoutTotal appends iteration count when total is unknown func appendIterationCountWithoutTotal(sb *strings.Builder, c config, s *state) { if c.showBytes { currentHumanize, currentSuffix := humanizeBytes(s.currentBytes, c.useIECUnits) fmt.Fprintf(sb, "%s%s", currentHumanize, currentSuffix) } else if c.showTotalBytes { fmt.Fprintf(sb, "%.0f/%s", s.currentBytes, "-") } else { fmt.Fprintf(sb, "%.0f", s.currentBytes) } } // appendByteRate appends byte rate to the string builder func appendByteRate(sb *strings.Builder, c config, averageRate float64) { if !c.showBytes || averageRate <= 0 || math.IsInf(averageRate, 1) { return } appendSeparator(sb) currentHumanize, currentSuffix := humanizeBytes(averageRate, c.useIECUnits) fmt.Fprintf(sb, "%s%s/s", currentHumanize, currentSuffix) } // appendIterationsRate appends iterations rate to the string builder func appendIterationsRate(sb *strings.Builder, c config, averageRate float64) { if !c.showIterationsPerSecond { return } appendSeparator(sb) switch { case averageRate > 1: fmt.Fprintf(sb, "%0.0f %s/s", averageRate, c.iterationString) case averageRate*60 > 1: fmt.Fprintf(sb, "%0.0f %s/min", 60*averageRate, c.iterationString) default: fmt.Fprintf(sb, "%0.0f %s/hr", 3600*averageRate, c.iterationString) } } // calculateTimeBrackets calculates left and right time brackets func calculateTimeBrackets(c config, s *state, averageRate float64) (string, string) { leftBrac, rightBrac := "", "" switch { case c.predictTime: rightBracNum := time.Duration((1/averageRate)*(float64(c.max)-float64(s.currentNum))) * time.Second if rightBracNum.Seconds() < 0 { rightBracNum = 0 * time.Second } rightBrac = rightBracNum.String() fallthrough case c.elapsedTime || c.showElapsedTimeOnFinish: leftBrac = (time.Duration(time.Since(s.startTime).Seconds()) * time.Second).String() } return leftBrac, rightBrac } // calculateBarWidth calculates the bar width for full width mode func calculateBarWidth(c config, s *state, statsStr, leftBrac, rightBrac string) { if !c.fullWidth || c.ignoreLength { return } width, err := termWidth() if err != nil { width = 80 } if width > 120 { width = 120 } amend := calculateAmend(leftBrac, rightBrac, c.showDescriptionAtLineEnd) // Use getStringWidth to properly handle ANSI codes and multi-byte characters c.width = width - getStringWidth(c, c.description) - 10 - amend - getStringWidth(c, statsStr) - ansi.StringWidth(leftBrac) - ansi.StringWidth(rightBrac) s.currentSaucerSize = int(float64(s.currentPercent) / 100.0 * float64(c.width)) } // calculateAmend calculates the amend value for bar width calculation func calculateAmend(leftBrac, rightBrac string, showDescriptionAtLineEnd bool) int { amend := 1 // an extra space at eol switch { case leftBrac != "" && rightBrac != "": amend = 4 // space, square brackets and colon case leftBrac != "" && rightBrac == "": amend = 4 // space and square brackets and another space case leftBrac == "" && rightBrac != "": amend = 3 // space and square brackets } if showDescriptionAtLineEnd { amend += 1 // another space } return amend } // calculateBarElements calculates bar visual elements func calculateBarElements(c config, s *state) (string, string, string, string) { barStart, barEnd := c.theme.BarStart, c.theme.BarEnd if s.finished && c.theme.BarEndFilled != "" { barEnd = c.theme.BarEndFilled } if (s.currentSaucerSize > 0 || s.currentPercent > 0) && c.theme.BarStartFilled != "" { barStart = c.theme.BarStartFilled } saucer, saucerHead := calculateSaucer(c, s) return barStart, barEnd, saucer, saucerHead } // calculateSaucer calculates saucer and saucer head func calculateSaucer(c config, s *state) (string, string) { if s.currentSaucerSize <= 0 { return "", "" } saucer := calculateSaucerBody(c, s) saucerHead := calculateSaucerHead(c, s) return saucer, saucerHead } // calculateSaucerBody calculates the saucer body func calculateSaucerBody(c config, s *state) string { if c.ignoreLength { return strings.Repeat(c.theme.SaucerPadding, s.currentSaucerSize-1) } return strings.Repeat(c.theme.Saucer, s.currentSaucerSize-1) } // calculateSaucerHead calculates the saucer head character func calculateSaucerHead(c config, s *state) string { if c.theme.AltSaucerHead != "" && s.isAltSaucerHead { s.isAltSaucerHead = false return c.theme.AltSaucerHead } if c.theme.SaucerHead == "" || s.currentSaucerSize == c.width { s.isAltSaucerHead = false return c.theme.Saucer } s.isAltSaucerHead = true return c.theme.SaucerHead } // buildProgressBarString builds the final progress bar string func buildProgressBarString(c config, s *state, statsStr, leftBrac, rightBrac, barStart, barEnd, saucer, saucerHead string, repeatAmount int, averageRate float64) string { if c.ignoreLength { return buildSpinnerString(c, s, statsStr, leftBrac) } if rightBrac == "" { return buildProgressBarWithoutTimePredict(c, s, statsStr, leftBrac, barStart, barEnd, saucer, saucerHead, repeatAmount) } return buildProgressBarWithTimePredict(c, s, statsStr, leftBrac, rightBrac, barStart, barEnd, saucer, saucerHead, repeatAmount) } // buildSpinnerString builds spinner string for ignoreLength mode func buildSpinnerString(c config, s *state, statsStr, leftBrac string) string { selectedSpinner := spinners[c.spinnerType] if len(c.spinner) > 0 { selectedSpinner = c.spinner } spinner := getSpinnerChar(c, s, selectedSpinner) if c.elapsedTime { if c.showDescriptionAtLineEnd { return fmt.Sprintf("\r%s %s [%s] %s ", spinner, statsStr, leftBrac, c.description) } return fmt.Sprintf("\r%s %s %s [%s] ", spinner, c.description, statsStr, leftBrac) } if c.showDescriptionAtLineEnd { return fmt.Sprintf("\r%s %s %s ", spinner, statsStr, c.description) } return fmt.Sprintf("\r%s %s %s ", spinner, c.description, statsStr) } // getSpinnerChar gets the current spinner character func getSpinnerChar(c config, s *state, selectedSpinner []string) string { if c.spinnerChangeInterval != 0 { idx := int(math.Round(math.Mod(float64(time.Since(s.startTime).Nanoseconds()/c.spinnerChangeInterval.Nanoseconds()), float64(len(selectedSpinner))))) return selectedSpinner[idx] } spinner := selectedSpinner[s.spinnerIdx] s.spinnerIdx = (s.spinnerIdx + 1) % len(selectedSpinner) return spinner } // buildProgressBarWithoutTimePredict builds progress bar without time prediction func buildProgressBarWithoutTimePredict(c config, s *state, statsStr, leftBrac, barStart, barEnd, saucer, saucerHead string, repeatAmount int) string { str := fmt.Sprintf("%4d%% %s%s%s%s%s %s", s.currentPercent, barStart, saucer, saucerHead, strings.Repeat(c.theme.SaucerPadding, repeatAmount), barEnd, statsStr) if (s.currentPercent == 100 && c.showElapsedTimeOnFinish) || c.elapsedTime { str = fmt.Sprintf("%s [%s]", str, leftBrac) } return addDescription(c, str) } // buildProgressBarWithTimePredict builds progress bar with time prediction func buildProgressBarWithTimePredict(c config, s *state, statsStr, leftBrac, rightBrac, barStart, barEnd, saucer, saucerHead string, repeatAmount int) string { if s.currentPercent == 100 { return buildFinishedProgressBar(c, s, statsStr, leftBrac, barStart, barEnd, saucer, saucerHead, repeatAmount) } str := fmt.Sprintf("%4d%% %s%s%s%s%s %s [%s:%s]", s.currentPercent, barStart, saucer, saucerHead, strings.Repeat(c.theme.SaucerPadding, repeatAmount), barEnd, statsStr, leftBrac, rightBrac) return addDescription(c, str) } // buildFinishedProgressBar builds progress bar when finished (100%) func buildFinishedProgressBar(c config, s *state, statsStr, leftBrac, barStart, barEnd, saucer, saucerHead string, repeatAmount int) string { str := fmt.Sprintf("%4d%% %s%s%s%s%s %s", s.currentPercent, barStart, saucer, saucerHead, strings.Repeat(c.theme.SaucerPadding, repeatAmount), barEnd, statsStr) if c.showElapsedTimeOnFinish { str = fmt.Sprintf("%s [%s]", str, leftBrac) } return addDescription(c, str) } // addDescription adds description to the progress bar string func addDescription(c config, str string) string { if c.showDescriptionAtLineEnd { return fmt.Sprintf("\r%s %s", str, c.description) } return fmt.Sprintf("\r%s%s", c.description, str) } func clearProgressBar(c config, s state) error { if s.maxLineWidth == 0 { return nil } if c.useANSICodes { // write the "clear current line" ANSI escape sequence return writeString(c, "\033[2K\r") } // fill the empty content // to overwrite the progress bar and jump // back to the beginning of the line str := fmt.Sprintf("\r%s\r", strings.Repeat(" ", s.maxLineWidth)) return writeString(c, str) // the following does not show correctly if the previous line is longer than subsequent line // return writeString(c, "\r") } func writeString(c config, str string) error { if _, err := io.WriteString(c.writer, str); err != nil { return err } if f, ok := c.writer.(*os.File); ok { // ignore any errors in Sync(), as stdout // can't be synced on some operating systems // like Debian 9 (Stretch) _ = f.Sync() } return nil } // Reader is the progressbar io.Reader struct type Reader struct { io.Reader bar *ProgressBar } // NewReader return a new Reader with a given progress bar. func NewReader(r io.Reader, bar *ProgressBar) Reader { return Reader{ Reader: r, bar: bar, } } // Read will read the data and add the number of bytes to the progressbar func (r *Reader) Read(p []byte) (n int, err error) { n, err = r.Reader.Read(p) _ = r.bar.Add(n) return } // Close the reader when it implements io.Closer func (r *Reader) Close() (err error) { if closer, ok := r.Reader.(io.Closer); ok { return closer.Close() } _ = r.bar.Finish() return } // Write implement io.Writer func (p *ProgressBar) Write(b []byte) (n int, err error) { n = len(b) err = p.Add(n) return } // Read implement io.Reader func (p *ProgressBar) Read(b []byte) (n int, err error) { n = len(b) err = p.Add(n) return } func (p *ProgressBar) Close() (err error) { err = p.Finish() return } func average(xs []float64) float64 { total := 0.0 for _, v := range xs { total += v } return total / float64(len(xs)) } func humanizeBytes(s float64, iec bool) (string, string) { sizes := []string{" B", " kB", " MB", " GB", " TB", " PB", " EB"} base := 1000.0 if iec { sizes = []string{" B", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB"} base = 1024.0 } if s < 10 { return fmt.Sprintf("%2.0f", s), sizes[0] } e := math.Floor(logn(float64(s), base)) suffix := sizes[int(e)] val := math.Floor(float64(s)/math.Pow(base, e)*10+0.5) / 10 f := "%.0f" if val < 10 { f = "%.1f" } return fmt.Sprintf(f, val), suffix } func logn(n, b float64) float64 { return math.Log(n) / math.Log(b) } // termWidth function returns the visible width of the current terminal // and can be redefined for testing var termWidth = func() (width int, err error) { width, _, err = term.GetSize(int(os.Stdout.Fd())) if err == nil { return width, nil } return 0, err } func shouldCacheOutput(pb *ProgressBar) bool { return !pb.state.finished && !pb.state.exit && !pb.config.invisible } func Bprintln(pb *ProgressBar, a ...any) (int, error) { pb.lock.Lock() defer pb.lock.Unlock() if !shouldCacheOutput(pb) { return fmt.Fprintln(pb.config.writer, a...) } return fmt.Fprintln(&pb.config.stdBuffer, a...) } func Bprintf(pb *ProgressBar, format string, a ...any) (int, error) { pb.lock.Lock() defer pb.lock.Unlock() if !shouldCacheOutput(pb) { return fmt.Fprintf(pb.config.writer, format, a...) } return fmt.Fprintf(&pb.config.stdBuffer, format, a...) } ================================================ FILE: modules/progressbar/spinners.go ================================================ package progressbar var spinners = map[int][]string{ 0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"}, 1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"}, 2: {"▖", "▘", "▝", "▗"}, 3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"}, 4: {"◢", "◣", "◤", "◥"}, 5: {"◰", "◳", "◲", "◱"}, 6: {"◴", "◷", "◶", "◵"}, 7: {"◐", "◓", "◑", "◒"}, 8: {".", "o", "O", "@", "*"}, 9: {"|", "/", "-", "\\"}, 10: {"◡◡", "⊙⊙", "◠◠"}, 11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"}, 12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"}, 13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"}, 14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}, 15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}, 16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"}, 17: {"■", "□", "▪", "▫"}, 18: {"←", "↑", "→", "↓"}, 19: {"╫", "╪"}, 20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"}, 21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"}, 22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"}, 23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"}, 24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"}, 25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"}, 26: {".", "..", "..."}, 27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"}, 28: {".", "o", "O", "°", "O", "o", "."}, 29: {"+", "x"}, 30: {"v", "<", "^", ">"}, 31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"}, 32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"}, 33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"}, 34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"}, 35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"}, 36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"}, 37: {"ဝ", "၀"}, 38: {"▌", "▀", "▐▄"}, 39: {"🌍", "🌎", "🌏"}, 40: {"◜", "◝", "◞", "◟"}, 41: {"⬒", "⬔", "⬓", "⬕"}, 42: {"⬖", "⬘", "⬗", "⬙"}, 43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"}, 44: {"♠", "♣", "♥", "♦"}, 45: {"➞", "➟", "➠", "➡", "➠", "➟"}, 46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "}, 47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."}, 48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "}, 49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"}, 50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"}, 51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"}, 52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"}, 53: {"✶", "✸", "✹", "✺", "✹", "✷"}, 54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"}, 55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"}, 56: {"¿", "?"}, 57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"}, 58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"}, 59: {". ", ".. ", "...", " ..", " .", " "}, 60: {".", "o", "O", "°", "O", "o", "."}, 61: {"▓", "▒", "░"}, 62: {"▌", "▀", "▐", "▄"}, 63: {"⊶", "⊷"}, 64: {"▪", "▫"}, 65: {"□", "■"}, 66: {"▮", "▯"}, 67: {"-", "=", "≡"}, 68: {"d", "q", "p", "b"}, 69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"}, 70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "}, 71: {"☗", "☖"}, 72: {"⧇", "⧆"}, 73: {"◉", "◎"}, 74: {"㊂", "㊀", "㊁"}, 75: {"⦾", "⦿"}, } ================================================ FILE: modules/securejoin/LICENSE ================================================ Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. Copyright (C) 2017 SUSE LLC. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: modules/securejoin/README.md ================================================ # filepath-securejoin From: https://github.com/cyphar/filepath-securejoin/tree/b69b737a2dcadcbf888a1f32389acdb26b73a2b5 ================================================ FILE: modules/securejoin/join.go ================================================ // Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. // Copyright (C) 2017 SUSE LLC. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package securejoin is an implementation of the hopefully-soon-to-be-included // SecureJoin helper that is meant to be part of the "path/filepath" package. // The purpose of this project is to provide a PoC implementation to make the // SecureJoin proposal (https://github.com/golang/go/issues/20126) more // tangible. package securejoin import ( "bytes" "errors" "os" "path/filepath" "strings" "syscall" ) // IsNotExist tells you if err is an error that implies that either the path // accessed does not exist (or path components don't exist). This is // effectively a more broad version of os.IsNotExist. func IsNotExist(err error) bool { // Check that it's not actually an ENOTDIR, which in some cases is a more // convoluted case of ENOENT (usually involving weird paths). return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT) } // SecureJoinVFS joins the two given path components (similar to Join) except // that the returned path is guaranteed to be scoped inside the provided root // path (when evaluated). Any symbolic links in the path are evaluated with the // given root treated as the root of the filesystem, similar to a chroot. The // filesystem state is evaluated through the given VFS interface (if nil, the // standard os.* family of functions are used). // // Note that the guarantees provided by this function only apply if the path // components in the returned string are not modified (in other words are not // replaced with symlinks on the filesystem) after this function has returned. // Such a symlink race is necessarily out-of-scope of SecureJoin. // // Volume names in unsafePath are always discarded, regardless if they are // provided via direct input or when evaluating symlinks. Therefore: // // "C:\Temp" + "D:\path\to\file.txt" results in "C:\Temp\path\to\file.txt" func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { // Use the os.* VFS implementation if none was specified. if vfs == nil { vfs = osVFS{} } unsafePath = filepath.FromSlash(unsafePath) var path bytes.Buffer n := 0 for unsafePath != "" { if n > 255 { return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP} } if v := filepath.VolumeName(unsafePath); v != "" { unsafePath = unsafePath[len(v):] } // Next path component, p. i := strings.IndexRune(unsafePath, filepath.Separator) var p string if i == -1 { p, unsafePath = unsafePath, "" } else { p, unsafePath = unsafePath[:i], unsafePath[i+1:] } // Create a cleaned path, using the lexical semantics of /../a, to // create a "scoped" path component which can safely be joined to fullP // for evaluation. At this point, path.String() doesn't contain any // symlink components. cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p) if cleanP == string(filepath.Separator) { path.Reset() continue } fullP := filepath.Clean(root + cleanP) // Figure out whether the path is a symlink. fi, err := vfs.Lstat(fullP) if err != nil && !IsNotExist(err) { return "", err } // Treat non-existent path components the same as non-symlinks (we // can't do any better here). if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 { path.WriteString(p) path.WriteRune(filepath.Separator) continue } // Only increment when we actually dereference a link. n++ // It's a symlink, expand it by prepending it to the yet-unparsed path. dest, err := vfs.Readlink(fullP) if err != nil { return "", err } // Absolute symlinks reset any work we've already done. if filepath.IsAbs(dest) { path.Reset() } unsafePath = dest + string(filepath.Separator) + unsafePath } // We have to clean path.String() here because it may contain '..' // components that are entirely lexical, but would be misleading otherwise. // And finally do a final clean to ensure that root is also lexically // clean. fullP := filepath.Clean(string(filepath.Separator) + path.String()) return filepath.Clean(root + fullP), nil } // SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library // of functions as the VFS. If in doubt, use this function over SecureJoinVFS. func SecureJoin(root, unsafePath string) (string, error) { return SecureJoinVFS(root, unsafePath, nil) } ================================================ FILE: modules/securejoin/vfs.go ================================================ // Copyright (C) 2017 SUSE LLC. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package securejoin import "os" // In future this should be moved into a separate package, because now there // are several projects (umoci and go-mtree) that are using this sort of // interface. // VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is // equivalent to using the standard os.* family of functions. This is mainly // used for the purposes of mock testing, but also can be used to otherwise use // SecureJoin with VFS-like system. type VFS interface { // Lstat returns a FileInfo describing the named file. If the file is a // symbolic link, the returned FileInfo describes the symbolic link. Lstat // makes no attempt to follow the link. These semantics are identical to // os.Lstat. Lstat(name string) (os.FileInfo, error) // Readlink returns the destination of the named symbolic link. These // semantics are identical to os.Readlink. Readlink(name string) (string, error) } // osVFS is the "nil" VFS, in that it just passes everything through to the os // module. type osVFS struct{} // Lstat returns a FileInfo describing the named file. If the file is a // symbolic link, the returned FileInfo describes the symbolic link. Lstat // makes no attempt to follow the link. These semantics are identical to // os.Lstat. func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } // Readlink returns the destination of the named symbolic link. These // semantics are identical to os.Readlink. func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } ================================================ FILE: modules/shlex/LICENSE ================================================ Copyright (c) anmitsu Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/shlex/shlex.go ================================================ // Package shlex provides a simple lexical analysis like Unix shell. package shlex import ( "bufio" "errors" "io" "strings" "unicode" ) var ( ErrNoClosing = errors.New("no closing quotation") ErrNoEscaped = errors.New("no escaped character") ) // Tokenizer is the interface that classifies a token according to // words, whitespaces, quotations, escapes and escaped quotations. type Tokenizer interface { IsWord(rune) bool IsWhitespace(rune) bool IsQuote(rune) bool IsEscape(rune) bool IsEscapedQuote(rune) bool } // DefaultTokenizer implements a simple tokenizer like Unix shell. type DefaultTokenizer struct{} func (t *DefaultTokenizer) IsWord(r rune) bool { return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) } func (t *DefaultTokenizer) IsQuote(r rune) bool { switch r { case '\'', '"': return true default: return false } } func (t *DefaultTokenizer) IsWhitespace(r rune) bool { return unicode.IsSpace(r) } func (t *DefaultTokenizer) IsEscape(r rune) bool { return r == '\\' } func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool { return r == '"' } // Lexer represents a lexical analyzer. type Lexer struct { reader *bufio.Reader tokenizer Tokenizer posix bool whitespacesplit bool } // NewLexer creates a new Lexer reading from io.Reader. This Lexer // has a DefaultTokenizer according to posix and whitespacesplit // rules. func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer { return &Lexer{ reader: bufio.NewReader(r), tokenizer: &DefaultTokenizer{}, posix: posix, whitespacesplit: whitespacesplit, } } // NewLexerString creates a new Lexer reading from a string. This // Lexer has a DefaultTokenizer according to posix and whitespacesplit // rules. func NewLexerString(s string, posix, whitespacesplit bool) *Lexer { return NewLexer(strings.NewReader(s), posix, whitespacesplit) } // Split splits a string according to posix or non-posix rules. func Split(s string, posix bool) ([]string, error) { return NewLexerString(s, posix, true).Split() } // SetTokenizer sets a Tokenizer. func (l *Lexer) SetTokenizer(t Tokenizer) { l.tokenizer = t } func (l *Lexer) Split() ([]string, error) { result := make([]string, 0) for { token, err := l.readToken() if token != "" { result = append(result, token) } if errors.Is(err, io.EOF) { break } if err != nil { return result, err } } return result, nil } func (l *Lexer) readToken() (string, error) { t := l.tokenizer token := "" quoted := false state := ' ' escapedstate := ' ' scanning: for { next, _, err := l.reader.ReadRune() if err != nil { if t.IsQuote(state) { return token, ErrNoClosing } else if t.IsEscape(state) { return token, ErrNoEscaped } return token, err } switch { case t.IsWhitespace(state): switch { case t.IsWhitespace(next): break scanning case l.posix && t.IsEscape(next): escapedstate = 'a' state = next case t.IsWord(next): token += string(next) state = 'a' case t.IsQuote(next): if !l.posix { token += string(next) } state = next default: token = string(next) if l.whitespacesplit { state = 'a' } else if token != "" || (l.posix && quoted) { break scanning } } case t.IsQuote(state): quoted = true switch { case next == state: if !l.posix { token += string(next) break scanning } else { state = 'a' } case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state): escapedstate = state state = next default: token += string(next) } case t.IsEscape(state): if t.IsQuote(escapedstate) && next != state && next != escapedstate { token += string(state) } token += string(next) state = escapedstate case t.IsWord(state): switch { case t.IsWhitespace(next): if token != "" || (l.posix && quoted) { break scanning } case l.posix && t.IsQuote(next): state = next case l.posix && t.IsEscape(next): escapedstate = 'a' state = next case t.IsWord(next) || t.IsQuote(next): token += string(next) default: if l.whitespacesplit { token += string(next) } else if token != "" { _ = l.reader.UnreadRune() break scanning } } } } return token, nil } ================================================ FILE: modules/streamio/bytes.go ================================================ package streamio import ( "bytes" "io" "sync" ) var ( byteSlice = sync.Pool{ New: func() any { b := make([]byte, 32*1024) return &b }, } bytesBuffer = sync.Pool{ New: func() any { return bytes.NewBuffer(nil) }, } ) // GetByteSlice returns a *[]byte that is managed by a sync.Pool. // The initial slice length will be 16384 (16kb). // // After use, the *[]byte should be put back into the sync.Pool // by calling PutByteSlice. func GetByteSlice() *[]byte { buf := byteSlice.Get().(*[]byte) return buf } // PutByteSlice puts buf back into its sync.Pool. func PutByteSlice(buf *[]byte) { byteSlice.Put(buf) } // GetBytesBuffer returns a *bytes.Buffer that is managed by a sync.Pool. // Returns a buffer that is reset and ready for use. // // After use, the *bytes.Buffer should be put back into the sync.Pool // by calling PutBytesBuffer. func GetBytesBuffer() *bytes.Buffer { buf := bytesBuffer.Get().(*bytes.Buffer) buf.Reset() return buf } // PutBytesBuffer puts buf back into its sync.Pool. func PutBytesBuffer(buf *bytes.Buffer) { bytesBuffer.Put(buf) } // Copy copy reader to writer func Copy(dst io.Writer, src io.Reader) (written int64, err error) { buf := GetByteSlice() defer PutByteSlice(buf) return io.CopyBuffer(dst, src, *buf) } ================================================ FILE: modules/streamio/io.go ================================================ package streamio import ( "bytes" "io" ) func ReadMax(r io.Reader, n int64) ([]byte, error) { var buf bytes.Buffer buf.Grow(int(n)) if _, err := buf.ReadFrom(io.LimitReader(r, n)); err != nil { return nil, err } return buf.Bytes(), nil } func GrowReadMax(r io.Reader, n int64, grow int) ([]byte, error) { var buf bytes.Buffer if grow <= 0 { grow = int(n) } buf.Grow(grow) if _, err := buf.ReadFrom(io.LimitReader(r, n)); err != nil { return nil, err } return buf.Bytes(), nil } ================================================ FILE: modules/streamio/io_test.go ================================================ package streamio import ( "fmt" "os" "strings" "testing" ) func TestReadMax(t *testing.T) { text := `XZXdewdieded3oifdjfrf4frewfrfreferwfgrewfreferferfdedoidqjwqdjqedo3qjhd3hqdiwqehdro3eidhewdiehdbweqdgewdgewdedewgdbe` b, err := ReadMax(strings.NewReader(text), 10) if err != nil { fmt.Fprintf(os.Stderr, "read error: %v", err) return } fmt.Fprintf(os.Stderr, "length: %d\n", len(b)) } func TestGrowReadMax(t *testing.T) { text := `XZXdewdieded3oifdjfrf4frewfrfreferwfgrewfreferferfdedoidqjwqdjqedo3qjhd3hqdiwqehdro3eidhewdiehdbweqdgewdgewdedewgdbe` b, err := GrowReadMax(strings.NewReader(text), 50, 10) if err != nil { fmt.Fprintf(os.Stderr, "read error: %v", err) return } fmt.Fprintf(os.Stderr, "length: %d\n", len(b)) } ================================================ FILE: modules/streamio/sync.go ================================================ package streamio import ( "bufio" "io" "sync" ) var bufioReader = sync.Pool{ New: func() any { return bufio.NewReader(nil) }, } // GetBufioReader returns a *bufio.Reader that is managed by a sync.Pool. // Returns a bufio.Reader that is reset with reader and ready for use. // // After use, the *bufio.Reader should be put back into the sync.Pool // by calling PutBufioReader. func GetBufioReader(reader io.Reader) *bufio.Reader { r := bufioReader.Get().(*bufio.Reader) r.Reset(reader) return r } // PutBufioReader puts reader back into its sync.Pool. func PutBufioReader(reader *bufio.Reader) { bufioReader.Put(reader) } const ( largePacketSize = 64 * 1024 ) var bufferWriter = sync.Pool{ New: func() any { return bufio.NewWriterSize(nil, largePacketSize) }, } // GetBufferWriter returns a *bufio.Writer that is managed by a sync.Pool. // Returns a bufio.Writer that is reset with writer and ready for use. // // After use, the *bufio.Writer should be put back into the sync.Pool // by calling PutBufferWriter. func GetBufferWriter(writer io.Writer) *bufio.Writer { w := bufferWriter.Get().(*bufio.Writer) w.Reset(writer) return w } // PutBufferWriter puts reader back into its sync.Pool. func PutBufferWriter(writer *bufio.Writer) { bufferWriter.Put(writer) } func LargeCopy(dst io.Writer, src io.Reader) (written int64, err error) { w := GetBufferWriter(dst) defer PutBufferWriter(w) if written, err = io.Copy(w, src); err != nil { return } err = w.Flush() return } ================================================ FILE: modules/streamio/zlib.go ================================================ package streamio import ( "bytes" "compress/zlib" "io" "sync" ) var ( zlibInitBytes = []byte{0x78, 0x9c, 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01} zlibReader = sync.Pool{ New: func() any { r, _ := zlib.NewReader(bytes.NewReader(zlibInitBytes)) return &ZLibReader{ Reader: r.(zlibReadCloser), } }, } zlibWriter = sync.Pool{ New: func() any { return zlib.NewWriter(nil) }, } ) type zlibReadCloser interface { io.ReadCloser zlib.Resetter } type ZLibReader struct { dict *[]byte Reader zlibReadCloser } // GetZlibReader returns a ZLibReader that is managed by a sync.Pool. // Returns a ZLibReader that is reset using a dictionary that is // also managed by a sync.Pool. // // After use, the ZLibReader should be put back into the sync.Pool // by calling PutZlibReader. func GetZlibReader(r io.Reader) (*ZLibReader, error) { z := zlibReader.Get().(*ZLibReader) z.dict = GetByteSlice() err := z.Reader.Reset(r, *z.dict) return z, err } // PutZlibReader puts z back into its sync.Pool, first closing the reader. // The Byte slice dictionary is also put back into its sync.Pool. func PutZlibReader(z *ZLibReader) { _ = z.Reader.Close() PutByteSlice(z.dict) zlibReader.Put(z) } // GetZlibWriter returns a *zlib.Writer that is managed by a sync.Pool. // Returns a writer that is reset with w and ready for use. // // After use, the *zlib.Writer should be put back into the sync.Pool // by calling PutZlibWriter. func GetZlibWriter(w io.Writer) *zlib.Writer { z := zlibWriter.Get().(*zlib.Writer) z.Reset(w) return z } // PutZlibWriter puts w back into its sync.Pool. func PutZlibWriter(w *zlib.Writer) { zlibWriter.Put(w) } ================================================ FILE: modules/streamio/zlib_test.go ================================================ package streamio import ( "bytes" "fmt" "io" "os" "strings" "testing" ) func TestZlibEncode(t *testing.T) { content := `Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ` for range 100 { var buf bytes.Buffer z := GetZlibWriter(&buf) if _, err := io.Copy(z, strings.NewReader(content)); err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) } PutZlibWriter(z) } } func TestZlibDecode(t *testing.T) { content := `Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ` var buf bytes.Buffer z := GetZlibWriter(&buf) _, _ = io.Copy(z, strings.NewReader(content)) PutZlibWriter(z) for i := range 100 { z, err := GetZlibReader(bytes.NewReader(buf.Bytes())) if err != nil { fmt.Fprintf(os.Stderr, "decode error: %v\n", err) PutZlibReader(z) continue } _, _ = io.Copy(io.Discard, z.Reader) fmt.Fprintf(os.Stderr, "%d\n", i) PutZlibReader(z) } } func TestZlibEncodeDecode(t *testing.T) { testCases := []string{ "", "hello world", "Hello, 世界!", strings.Repeat("a", 1000), strings.Repeat("hello ", 1000), } for _, content := range testCases { t.Run(fmt.Sprintf("len=%d", len(content)), func(t *testing.T) { // Encode var compressed bytes.Buffer writer := GetZlibWriter(&compressed) _, err := io.Copy(writer, strings.NewReader(content)) if err != nil { t.Fatalf("encode error: %v", err) } err = writer.Close() if err != nil { t.Fatalf("writer close error: %v", err) } PutZlibWriter(writer) // Decode reader, err := GetZlibReader(bytes.NewReader(compressed.Bytes())) if err != nil { t.Fatalf("get reader error: %v", err) } var decompressed bytes.Buffer _, err = io.Copy(&decompressed, reader.Reader) if err != nil { t.Fatalf("decode error: %v", err) } PutZlibReader(reader) // Verify if decompressed.String() != content { t.Errorf("decompressed content mismatch:\ngot: %q\nwant: %q", decompressed.String(), content) } }) } } func TestZlibInvalidData(t *testing.T) { invalidData := []byte{0x00, 0x01, 0x02, 0x03} _, err := GetZlibReader(bytes.NewReader(invalidData)) if err == nil { t.Error("expected error for invalid zlib data, got nil") } } func TestZlibConcurrent(t *testing.T) { content := strings.Repeat("concurrent test data ", 1000) var compressed bytes.Buffer writer := GetZlibWriter(&compressed) _, _ = io.Copy(writer, strings.NewReader(content)) _ = writer.Close() PutZlibWriter(writer) done := make(chan bool, 10) for range 10 { go func() { for range 100 { reader, err := GetZlibReader(bytes.NewReader(compressed.Bytes())) if err != nil { fmt.Fprintf(os.Stderr, "concurrent decode error: %v\n", err) continue } var decompressed bytes.Buffer _, _ = io.Copy(&decompressed, reader.Reader) PutZlibReader(reader) if decompressed.String() != content { fmt.Fprintf(os.Stderr, "concurrent data mismatch\n") } } done <- true }() } for range 10 { <-done } } func TestZlibEmptyInput(t *testing.T) { // Test with empty input var buf bytes.Buffer writer := GetZlibWriter(&buf) _, err := writer.Write([]byte{}) if err != nil { t.Fatalf("write empty error: %v", err) } err = writer.Close() if err != nil { t.Fatalf("close error: %v", err) } PutZlibWriter(writer) // Should be able to decompress reader, err := GetZlibReader(bytes.NewReader(buf.Bytes())) if err != nil { t.Fatalf("get reader error: %v", err) } var decompressed bytes.Buffer _, err = io.Copy(&decompressed, reader.Reader) if err != nil { t.Fatalf("decode error: %v", err) } PutZlibReader(reader) if decompressed.Len() != 0 { t.Errorf("expected empty decompressed data, got %d bytes", decompressed.Len()) } } func TestZlibMultipleWrite(t *testing.T) { content := "hello world" var buf bytes.Buffer writer := GetZlibWriter(&buf) _, err := writer.Write([]byte(content[:5])) if err != nil { t.Fatalf("first write error: %v", err) } _, err = writer.Write([]byte(content[5:])) if err != nil { t.Fatalf("second write error: %v", err) } err = writer.Close() if err != nil { t.Fatalf("close error: %v", err) } PutZlibWriter(writer) // Decompress and verify reader, err := GetZlibReader(bytes.NewReader(buf.Bytes())) if err != nil { t.Fatalf("get reader error: %v", err) } var decompressed bytes.Buffer _, err = io.Copy(&decompressed, reader.Reader) if err != nil { t.Fatalf("decode error: %v", err) } PutZlibReader(reader) if decompressed.String() != content { t.Errorf("decompressed content mismatch:\ngot: %q\nwant: %q", decompressed.String(), content) } } func TestZlibPoolReuse(t *testing.T) { content := "test content for pool reuse" for i := range 100 { // Compress var compressed bytes.Buffer writer := GetZlibWriter(&compressed) _, err := io.Copy(writer, strings.NewReader(content)) if err != nil { t.Fatalf("encode error: %v", err) } err = writer.Close() if err != nil { t.Fatalf("writer close error: %v", err) } PutZlibWriter(writer) // Decompress reader, err := GetZlibReader(bytes.NewReader(compressed.Bytes())) if err != nil { t.Fatalf("get reader error: %v", err) } var decompressed bytes.Buffer _, err = io.Copy(&decompressed, reader.Reader) if err != nil { t.Fatalf("decode error: %v", err) } PutZlibReader(reader) if decompressed.String() != content { t.Errorf("iteration %d: decompressed content mismatch", i) } } } ================================================ FILE: modules/streamio/zstd.go ================================================ package streamio import ( "io" "sync" "github.com/klauspost/compress/zstd" ) var ( zstdReader = sync.Pool{ New: func() any { d, _ := zstd.NewReader(nil) return &ZstdDecoder{ Decoder: d, } }, } zstdWriter = sync.Pool{ New: func() any { e, _ := zstd.NewWriter(nil) return &ZstdEncoder{ Encoder: e, } }, } ) type ZstdDecoder struct { *zstd.Decoder } // GetZstdReader returns a ZstdDecoder that is managed by a sync.Pool. // Returns a ZLibReader that is reset using a dictionary that is // also managed by a sync.Pool. // // After use, the ZstdDecoder should be put back into the sync.Pool // by calling PutZstdReader. func GetZstdReader(r io.Reader) (*ZstdDecoder, error) { z := zstdReader.Get().(*ZstdDecoder) err := z.Reset(r) return z, err } // PutZstdReader puts z back into its sync.Pool, first closing the reader. // The Byte slice dictionary is also put back into its sync.Pool. func PutZstdReader(z *ZstdDecoder) { zstdReader.Put(z) } type ZstdEncoder struct { *zstd.Encoder } // GetZstdWriter returns a *ztsd.Encoder that is managed by a sync.Pool. // Returns a writer that is reset with w and ready for use. // // After use, the *ztsd.Encoder should be put back into the sync.Pool // by calling PutZstdWriter. func GetZstdWriter(w io.Writer) *ZstdEncoder { z := zstdWriter.Get().(*ZstdEncoder) z.Reset(w) return z } // PutZstdWriter puts w back into its sync.Pool. func PutZstdWriter(w *ZstdEncoder) { _ = w.Close() // close flush writer zstdWriter.Put(w) } ================================================ FILE: modules/streamio/zstd_test.go ================================================ package streamio import ( "bytes" "fmt" "io" "os" "strings" "testing" ) func TestZstdEncode(t *testing.T) { content := `Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ` for range 100 { var buf bytes.Buffer z := GetZstdWriter(&buf) if _, err := io.Copy(z, strings.NewReader(content)); err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) } PutZstdWriter(z) } } func TestZstdDecode(t *testing.T) { content := `Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ` var buf bytes.Buffer z := GetZstdWriter(&buf) _, _ = io.Copy(z, strings.NewReader(content)) PutZstdWriter(z) for i := range 100 { z, err := GetZstdReader(bytes.NewReader(buf.Bytes())) if err != nil { fmt.Fprintf(os.Stderr, "decode error: %v\n", err) PutZstdReader(z) continue } _, _ = io.Copy(io.Discard, z) fmt.Fprintf(os.Stderr, "%d\n", i) PutZstdReader(z) } } ================================================ FILE: modules/strengthen/du.go ================================================ //go:build !windows package strengthen import ( "os" "path/filepath" "runtime" "syscall" "golang.org/x/sys/unix" ) const ( SystemBlockSize int64 = 512 ) type duWalker struct { size int64 dirSize int64 ignoreErr bool } func isReg(si *unix.Stat_t) bool { return si.Mode&unix.S_IFMT == syscall.S_IFREG } func isDir(si *unix.Stat_t) bool { return si.Mode&unix.S_IFMT == syscall.S_IFDIR } func (d *duWalker) unixStat(p string) error { var si unix.Stat_t if err := unix.Stat(p, &si); err != nil { if !d.ignoreErr { return err } return nil } if !isReg(&si) { return nil } // number of 512B blocks allocated d.size += si.Blocks * SystemBlockSize return nil } func (d *duWalker) du(path string) error { d.size += d.dirSize dirs, err := os.ReadDir(path) if err != nil { return err } for _, it := range dirs { if !it.IsDir() { if err := d.unixStat(filepath.Join(path, it.Name())); err != nil { return err } continue } if err := d.du(filepath.Join(path, it.Name())); err != nil { if !d.ignoreErr { return err } } } return nil } func Du(path string) (int64, error) { var si unix.Stat_t if err := unix.Stat(path, &si); err != nil { return 0, err } if !isDir(&si) { if !isReg(&si) { return 0, nil } return si.Blocks * SystemBlockSize, nil } dw := &duWalker{ignoreErr: true} // skip broken symlink // Windows and macOS directory self size is zero not like Linux. Linux 4K (blocks) if runtime.GOOS != "darwin" { dw.dirSize = si.Blocks } if err := dw.du(path); err != nil { return dw.size, err } return dw.size, nil } ================================================ FILE: modules/strengthen/du_test.go ================================================ //go:build !windows package strengthen import ( "fmt" "os" "syscall" "testing" ) func TestDu(t *testing.T) { sz, err := Du("/tmp/repositories") if err != nil { fmt.Fprintf(os.Stderr, "unable du %v\n", err) return } var si syscall.Stat_t if err := syscall.Stat("/tmp/repositories", &si); err != nil { fmt.Fprintf(os.Stderr, "unable du %v\n", err) return } fmt.Fprintf(os.Stderr, "/tmp/repositories %0.2f\n", float64(sz)/1024) } ================================================ FILE: modules/strengthen/du_windows.go ================================================ //go:build windows package strengthen import ( "os" "path/filepath" ) func Du(path string) (int64, error) { dirs, err := os.ReadDir(path) if err != nil { return 0, err } var size int64 for _, d := range dirs { di, err := d.Info() if err != nil { return size, nil } size += di.Size() if !d.IsDir() { continue } dirPath := filepath.Join(path, d.Name()) if sz, err := Du(dirPath); err == nil { size += sz } } return size, nil } ================================================ FILE: modules/strengthen/duration.go ================================================ package strengthen import ( "errors" "time" ) var unitMap = map[string]uint64{ "ns": uint64(time.Nanosecond), "us": uint64(time.Microsecond), "µs": uint64(time.Microsecond), // U+00B5 = micro symbol "μs": uint64(time.Microsecond), // U+03BC = Greek letter mu "ms": uint64(time.Millisecond), "s": uint64(time.Second), "m": uint64(time.Minute), "h": uint64(time.Hour), "d": uint64(time.Hour) * 24, "w": uint64(time.Hour) * 168, } var ( validDurationByte = [...]byte{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } ) // ParseDuration parses a duration string. // A duration string is a possibly signed sequence of // decimal numbers, each with optional fraction and a unit suffix, // such as "300ms", "-1.5h" or "2h45m". // Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". func ParseDuration(s string) (time.Duration, error) { // [-+]?([0-9]*(\.[0-9]*)?[a-z]+)+ orig := s var d uint64 neg := false // Consume [-+]? if s != "" { c := s[0] if c == '-' || c == '+' { neg = c == '-' s = s[1:] } } // Special case: if all that is left is "0", this is zero. if s == "0" { return 0, nil } if s == "" { return 0, errors.New("time: invalid duration " + quote(orig)) } for s != "" { var ( v, f uint64 // integers before, after decimal point scale float64 = 1 // value = v + f/scale ) var err error // The next character must be [0-9.] if validDurationByte[s[0]] != 1 { return 0, errors.New("time: invalid duration " + quote(orig)) } // Consume [0-9]* pl := len(s) v, s, err = leadingInt(s) if err != nil { return 0, errors.New("time: invalid duration " + quote(orig)) } pre := pl != len(s) // whether we consumed anything before a period // Consume (\.[0-9]*)? post := false if s != "" && s[0] == '.' { s = s[1:] pl := len(s) f, scale, s = leadingFraction(s) post = pl != len(s) } if !pre && !post { // no digits (e.g. ".s" or "-.s") return 0, errors.New("time: invalid duration " + quote(orig)) } // Consume unit. i := 0 for ; i < len(s); i++ { c := s[i] if c == '.' || '0' <= c && c <= '9' { break } } if i == 0 { return 0, errors.New("time: missing unit in duration " + quote(orig)) } u := s[:i] s = s[i:] unit, ok := unitMap[u] if !ok { return 0, errors.New("time: unknown unit " + quote(u) + " in duration " + quote(orig)) } if v > 1<<63/unit { // overflow return 0, errors.New("time: invalid duration " + quote(orig)) } v *= unit if f > 0 { // float64 is needed to be nanosecond accurate for fractions of hours. // v >= 0 && (f*unit/scale) <= 3.6e+12 (ns/h, h is the largest unit) v += uint64(float64(f) * (float64(unit) / scale)) if v > 1<<63 { // overflow return 0, errors.New("time: invalid duration " + quote(orig)) } } d += v if d > 1<<63 { return 0, errors.New("time: invalid duration " + quote(orig)) } } if neg { return -time.Duration(d), nil } if d > 1<<63-1 { return 0, errors.New("time: invalid duration " + quote(orig)) } return time.Duration(d), nil } func quote(s string) string { return "\"" + s + "\"" } var errLeadingInt = errors.New("time: bad [0-9]*") // never printed // leadingInt consumes the leading [0-9]* from s. func leadingInt[bytes []byte | string](s bytes) (x uint64, rem bytes, err error) { i := 0 for ; i < len(s); i++ { c := s[i] if c < '0' || c > '9' { break } if x > 1<<63/10 { // overflow return 0, rem, errLeadingInt } x = x*10 + uint64(c) - '0' if x > 1<<63 { // overflow return 0, rem, errLeadingInt } } return x, s[i:], nil } // leadingFraction consumes the leading [0-9]* from s. // It is used only for fractions, so does not return an error on overflow, // it just stops accumulating precision. func leadingFraction(s string) (x uint64, scale float64, rem string) { i := 0 scale = 1 overflow := false for ; i < len(s); i++ { c := s[i] if c < '0' || c > '9' { break } if overflow { continue } if x > (1<<63-1)/10 { // It's possible for overflow to give a positive number, so take care. overflow = true continue } y := x*10 + uint64(c) - '0' if y > 1<<63 { overflow = true continue } x = y scale *= 10 } return x, scale, s[i:] } ================================================ FILE: modules/strengthen/duration_test.go ================================================ package strengthen import ( "fmt" "os" "testing" ) func TestDurationByte(t *testing.T) { for i := range 256 { if validDurationByte[i] == 1 { fmt.Fprintf(os.Stderr, "GOOD: %c\n", i) } } } func TestParseDuration(t *testing.T) { ss := []string{ "-1.5h", "300ms", "2h45m", "uuuu8h", } for _, s := range ss { d, err := ParseDuration(s) if err != nil { fmt.Fprintf(os.Stderr, "BAD: %s err: %v\n", s, err) continue } fmt.Fprintf(os.Stderr, "GOOD: %v\n", d) } } ================================================ FILE: modules/strengthen/formatsize.go ================================================ package strengthen /* Copyright The containerd Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ // Port from: https://github.com/docker/go-units/blob/master/size.go import ( "fmt" ) const ( sizeByteBase = 1024.0 ) var ( sizeLists = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"} ) func getSizeAndUnit(size float64) (float64, string) { i := 0 unitsLimit := len(sizeLists) - 1 for size >= sizeByteBase && i < unitsLimit { size /= sizeByteBase i++ } return size, sizeLists[i] } func formatBytes(size float64) string { size, unit := getSizeAndUnit(size) return fmt.Sprintf("%.4g %s", size, unit) } func FormatSize(s int64) string { return formatBytes(float64(s)) } func FormatSizeU(s uint64) string { return formatBytes(float64(s)) } ================================================ FILE: modules/strengthen/fs_unix.go ================================================ //go:build !windows package strengthen import ( "os" ) func FinalizeObject(oldpath string, newpath string) (err error) { if err = os.Link(oldpath, newpath); err == nil { _ = os.Remove(oldpath) return } return os.Rename(oldpath, newpath) } ================================================ FILE: modules/strengthen/fs_windows.go ================================================ //go:build windows package strengthen import ( "errors" "os" "runtime" "syscall" "time" "unsafe" "golang.org/x/sys/windows" ) type FILE_BASIC_INFO struct { CreationTime int64 LastAccessTime int64 LastWriteTime int64 ChangedTime int64 FileAttributes uint32 // Pad out to 8-byte alignment. // // Without this padding, TestChmod fails due to an argument validation error // in SetFileInformationByHandle on windows/386. // // https://learn.microsoft.com/en-us/cpp/build/reference/zp-struct-member-alignment?view=msvc-170 // says that “The C/C++ headers in the Windows SDK assume the platform's // default alignment is used.” What we see here is padding rather than // alignment, but maybe it is related. _ uint32 } type FILE_DISPOSITION_INFO struct { Flags uint32 } type FILE_DISPOSITION_INFO_EX struct { Flags uint32 } type FILE_RENAME_INFO struct { ReplaceIfExists uint32 RootDirectory windows.Handle FileNameLength uint32 FileName [1]uint16 } var ( errUnsupported = map[error]bool{ windows.ERROR_INVALID_PARAMETER: true, windows.ERROR_INVALID_FUNCTION: true, windows.ERROR_NOT_SUPPORTED: true, } ) func posixSemanticsRename(oldpath, newpath string) error { oldPathUTF16, err := windows.UTF16PtrFromString(oldpath) if err != nil { return err } newPathUTF16, err := windows.UTF16FromString(newpath) if err != nil { return err } fd, err := windows.CreateFile(oldPathUTF16, windows.DELETE|windows.FILE_WRITE_ATTRIBUTES, windows.FILE_SHARE_WRITE|windows.FILE_SHARE_READ|windows.FILE_SHARE_DELETE, nil, windows.OPEN_EXISTING, windows.FILE_FLAG_BACKUP_SEMANTICS|windows.FILE_FLAG_OPEN_REPARSE_POINT, 0) if err != nil { return err } defer windows.CloseHandle(fd) // nolint fileNameLen := len(newPathUTF16)*2 - 2 var info FILE_RENAME_INFO bufferSize := int(unsafe.Offsetof(info.FileName)) + fileNameLen buffer := make([]byte, bufferSize) infoPtr := (*FILE_RENAME_INFO)(unsafe.Pointer(&buffer[0])) infoPtr.ReplaceIfExists = windows.FILE_RENAME_REPLACE_IF_EXISTS | windows.FILE_RENAME_POSIX_SEMANTICS | windows.FILE_RENAME_IGNORE_READONLY_ATTRIBUTE infoPtr.FileNameLength = uint32(fileNameLen) copy((*[windows.MAX_LONG_PATH]uint16)(unsafe.Pointer(&infoPtr.FileName[0]))[:fileNameLen/2:fileNameLen/2], newPathUTF16) // https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/ns-ntifs-_file_rename_information // https://learn.microsoft.com/en-us/windows/win32/api/winbase/ns-winbase-file_rename_info return windows.SetFileInformationByHandle(fd, windows.FileRenameInfoEx, &buffer[0], uint32(bufferSize)) } // rename: posix rename semantics func rename(oldpath, newpath string) error { err := posixSemanticsRename(oldpath, newpath) if errUnsupported[err] { return os.Rename(oldpath, newpath) } return err } func removeHideAttributes(fd windows.Handle) error { var du FILE_BASIC_INFO if err := windows.GetFileInformationByHandleEx(fd, windows.FileBasicInfo, (*byte)(unsafe.Pointer(&du)), uint32(unsafe.Sizeof(du))); err != nil { return err } du.FileAttributes &^= (windows.FILE_ATTRIBUTE_HIDDEN | windows.FILE_ATTRIBUTE_READONLY) return windows.SetFileInformationByHandle(fd, windows.FileBasicInfo, (*byte)(unsafe.Pointer(&du)), uint32(unsafe.Sizeof(du))) } func posixSemanticsRemove(fd windows.Handle) error { infoEx := FILE_DISPOSITION_INFO_EX{ Flags: windows.FILE_DISPOSITION_DELETE | windows.FILE_DISPOSITION_POSIX_SEMANTICS, } var err error if err = windows.SetFileInformationByHandle(fd, windows.FileDispositionInfoEx, (*byte)(unsafe.Pointer(&infoEx)), uint32(unsafe.Sizeof(infoEx))); err == nil { return nil } if err == windows.ERROR_ACCESS_DENIED { if err := removeHideAttributes(fd); err != nil { return err } if err = windows.SetFileInformationByHandle(fd, windows.FileDispositionInfoEx, (*byte)(unsafe.Pointer(&infoEx)), uint32(unsafe.Sizeof(infoEx))); err == nil { return nil } } if err != windows.ERROR_INVALID_PARAMETER && err != windows.ERROR_INVALID_FUNCTION && err != windows.ERROR_NOT_SUBSTED { return err } info := FILE_DISPOSITION_INFO{ Flags: 0x13, // DELETE } if err = windows.SetFileInformationByHandle(fd, windows.FileDispositionInfo, (*byte)(unsafe.Pointer(&info)), uint32(unsafe.Sizeof(info))); err == nil { return nil } if err != windows.ERROR_ACCESS_DENIED { return err } if err := removeHideAttributes(fd); err != nil { return err } return windows.SetFileInformationByHandle(fd, windows.FileDispositionInfo, (*byte)(unsafe.Pointer(&info)), uint32(unsafe.Sizeof(info))) } func Remove(name string) error { nameUTF16, err := windows.UTF16PtrFromString(name) if err != nil { return err } fd, err := windows.CreateFile(nameUTF16, windows.FILE_READ_ATTRIBUTES|windows.FILE_WRITE_ATTRIBUTES|windows.DELETE, windows.FILE_SHARE_READ|windows.FILE_SHARE_WRITE|windows.FILE_SHARE_DELETE, nil, windows.OPEN_EXISTING, windows.FILE_FLAG_BACKUP_SEMANTICS|windows.FILE_FLAG_OPEN_REPARSE_POINT, 0, ) if err == syscall.ERROR_NOT_FOUND { return nil } if err != nil { return err } defer windows.CloseHandle(fd) // nolint return posixSemanticsRemove(fd) } var ( delay = []time.Duration{0, 1, 10, 20, 40} isWindows = func() bool { return runtime.GOOS == "windows" }() ) const ( ERROR_ACCESS_DENIED syscall.Errno = 5 ERROR_SHARING_VIOLATION syscall.Errno = 32 ERROR_LOCK_VIOLATION syscall.Errno = 33 ) func isRetryErr(err error) bool { if !isWindows { return false } if os.IsPermission(err) { return true } if errno, ok := errors.AsType[syscall.Errno](err); ok { switch errno { case ERROR_ACCESS_DENIED, ERROR_SHARING_VIOLATION, ERROR_LOCK_VIOLATION: return true } } return false } func windowsLink(oldpath, newpath string) (err error) { for range 2 { if err = os.Link(oldpath, newpath); err == nil { _ = os.Remove(oldpath) return nil } if !errors.Is(err, windows.ERROR_ALREADY_EXISTS) { break } if removeErr := os.Remove(newpath); removeErr != nil { break } } return err } func FinalizeObject(oldpath string, newpath string) (err error) { if err = windowsLink(oldpath, newpath); err == nil { return err } // no retry rename if err = rename(oldpath, newpath); err == nil { return } // on Windows and if !isRetryErr(err) { return } for tries := range delay { /* * We assume that some other process had the source or * destination file open at the wrong moment and retry. * In order to give the other process a higher chance to * complete its operation, we give up our time slice now. * If we have to retry again, we do sleep a bit. */ time.Sleep(delay[tries] * time.Millisecond) _ = os.Chmod(newpath, 0644) // & ~FILE_ATTRIBUTE_READONLY // retry run if err = rename(oldpath, newpath); err == nil { return } // Only windows retry if !isRetryErr(err) { return } } // FIXME: Windows platform security software can cause some bizarre phenomena, such as star points. if os.IsPermission(err) { _, err = os.Stat(newpath) return } return } ================================================ FILE: modules/strengthen/limitwriter.go ================================================ package strengthen import ( "io" ) type LimitWriter struct { dst io.Writer limit int } // NewLimitWriter create a new LimitWriter that accepts at most 'limit' bytes. func NewLimitWriter(dst io.Writer, limit int) *LimitWriter { return &LimitWriter{ dst: dst, limit: limit, } } func (w *LimitWriter) Write(p []byte) (int, error) { n := len(p) var err error if w.limit > 0 { if n > w.limit { p = p[:w.limit] } w.limit -= len(p) _, err = w.dst.Write(p) } return n, err } ================================================ FILE: modules/strengthen/measure.go ================================================ package strengthen import ( "fmt" "os" "path/filepath" "runtime/pprof" ) type Measurer struct { closeFn func() } func NewMeasurer(name string, debugMode bool) *Measurer { m := &Measurer{} if !debugMode { return m } pprofName := filepath.Join(os.TempDir(), fmt.Sprintf("%s-%d.pprof", name, os.Getpid())) fd, err := os.Create(pprofName) if err != nil { return m } if err = pprof.StartCPUProfile(fd); err != nil { _ = fd.Close() return m } m.closeFn = func() { pprof.StopCPUProfile() _ = fd.Close() fmt.Fprintf(os.Stderr, "Task operation completed\ngo tool pprof -http=\":8080\" %s\n", pprofName) } return m } func (d *Measurer) Close() { if d.closeFn != nil { d.closeFn() } } ================================================ FILE: modules/strengthen/net.go ================================================ package strengthen import ( "context" "errors" "fmt" "net" "net/netip" "slices" ) var ( ErrNoAddress = errors.New("no ip address") ) func parseCidr(network string, comment string) *net.IPNet { _, net, err := net.ParseCIDR(network) if err != nil { panic(fmt.Sprintf("error parsing %s (%s): %s", network, comment, err)) } return net } var ( // Private CIDRs to ignore privateNetworks = []*net.IPNet{ // RFC1918 // 10.0.0.0/8 { IP: []byte{10, 0, 0, 0}, Mask: []byte{255, 0, 0, 0}, }, // 172.16.0.0/12 { IP: []byte{172, 16, 0, 0}, Mask: []byte{255, 240, 0, 0}, }, // 192.168.0.0/16 { IP: []byte{192, 168, 0, 0}, Mask: []byte{255, 255, 0, 0}, }, // RFC5735 // 127.0.0.0/8 { IP: []byte{127, 0, 0, 0}, Mask: []byte{255, 0, 0, 0}, }, // RFC1122 Section 3.2.1.3 // 0.0.0.0/8 { IP: []byte{0, 0, 0, 0}, Mask: []byte{255, 0, 0, 0}, }, // RFC3927 // 169.254.0.0/16 { IP: []byte{169, 254, 0, 0}, Mask: []byte{255, 255, 0, 0}, }, // RFC 5736 // 192.0.0.0/24 { IP: []byte{192, 0, 0, 0}, Mask: []byte{255, 255, 255, 0}, }, // RFC 5737 // 192.0.2.0/24 { IP: []byte{192, 0, 2, 0}, Mask: []byte{255, 255, 255, 0}, }, // 198.51.100.0/24 { IP: []byte{198, 51, 100, 0}, Mask: []byte{255, 255, 255, 0}, }, // 203.0.113.0/24 { IP: []byte{203, 0, 113, 0}, Mask: []byte{255, 255, 255, 0}, }, // RFC 3068 // 192.88.99.0/24 { IP: []byte{192, 88, 99, 0}, Mask: []byte{255, 255, 255, 0}, }, // RFC 2544 // 192.18.0.0/15 { IP: []byte{192, 18, 0, 0}, Mask: []byte{255, 254, 0, 0}, }, // RFC 3171 // 224.0.0.0/4 { IP: []byte{224, 0, 0, 0}, Mask: []byte{240, 0, 0, 0}, }, // RFC 1112 // 240.0.0.0/4 { IP: []byte{240, 0, 0, 0}, Mask: []byte{240, 0, 0, 0}, }, // RFC 919 Section 7 // 255.255.255.255/32 { IP: []byte{255, 255, 255, 255}, Mask: []byte{255, 255, 255, 255}, }, // // RFC 6598 // // 100.64.0.0./10 // { // IP: []byte{100, 64, 0, 0}, // Mask: []byte{255, 192, 0, 0}, // }, } // Sourced from https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml // where Global, Source, or Destination is False privateV6Networks = []*net.IPNet{ parseCidr("::/128", "RFC 4291: Unspecified Address"), parseCidr("::1/128", "RFC 4291: Loopback Address"), parseCidr("::ffff:0:0/96", "RFC 4291: IPv4-mapped Address"), parseCidr("100::/64", "RFC 6666: Discard Address Block"), parseCidr("2001::/23", "RFC 2928: IETF Protocol Assignments"), parseCidr("2001:2::/48", "RFC 5180: Benchmarking"), parseCidr("2001:db8::/32", "RFC 3849: Documentation"), parseCidr("2001::/32", "RFC 4380: TEREDO"), parseCidr("fc00::/7", "RFC 4193: Unique-Local"), parseCidr("fe80::/10", "RFC 4291: Section 2.5.6 Link-Scoped Unicast"), parseCidr("ff00::/8", "RFC 4291: Section 2.7"), // We disable validations to IPs under the 6to4 anycase prefix because // there's too much risk of a malicious actor advertising the prefix and // answering validations for a 6to4 host they do not control. // https://community.letsencrypt.org/t/problems-validating-ipv6-against-host-running-6to4/18312/9 parseCidr("2002::/16", "RFC 7526: 6to4 anycast prefix deprecated"), } ) func LookupExternalAddr(ctx context.Context, host string) (bool, error) { ns, err := net.DefaultResolver.LookupHost(ctx, host) if err != nil { return false, err } addr, err := netip.ParseAddr(ns[0]) if err != nil { return false, err } switch { case addr.Is4(): i := net.IP(addr.AsSlice()) return !slices.ContainsFunc(privateNetworks, func(n *net.IPNet) bool { return n.Contains(i) }), nil case addr.Is6(): i := net.IP(addr.AsSlice()) return !slices.ContainsFunc(privateV6Networks, func(n *net.IPNet) bool { return n.Contains(i) }), nil default: } return false, nil } func ExternalAddr() ([]string, error) { ifaces, err := net.Interfaces() if err != nil { return nil, err } exAddrs := make([]string, 0, 4) for _, iface := range ifaces { //interface down || loopback interface if iface.Flags&net.FlagUp == 0 || (iface.Flags&net.FlagLoopback != 0) { continue } addrs, err := iface.Addrs() if err != nil { return nil, err } for _, addr := range addrs { var ip net.IP switch v := addr.(type) { case *net.IPNet: ip = v.IP case *net.IPAddr: ip = v.IP } if ip == nil || ip.IsLoopback() || ip.IsLinkLocalUnicast() { continue } exAddrs = append(exAddrs, ip.String()) } } return exAddrs, nil } ================================================ FILE: modules/strengthen/os_unix.go ================================================ //go:build !windows /* Copyright The containerd Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package strengthen import ( "os" "path/filepath" ) // ResolveSymbolicLink will follow any symbolic links func ResolveSymbolicLink(path string) (string, error) { info, err := os.Lstat(path) if err != nil { return "", err } if info.Mode()&os.ModeSymlink != os.ModeSymlink { return path, nil } return filepath.EvalSymlinks(path) } ================================================ FILE: modules/strengthen/os_windows.go ================================================ //go:build windows /* Copyright The containerd Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package strengthen import ( "os" "strings" "sync" "unicode/utf16" "golang.org/x/sys/windows" ) // openPath takes a path, opens it, and returns the resulting handle. // It works for both file and directory paths. // // We are not able to use builtin Go functionality for opening a directory path: // - os.Open on a directory returns a os.File where Fd() is a search handle from FindFirstFile. // - syscall.Open does not provide a way to specify FILE_FLAG_BACKUP_SEMANTICS, which is needed to // open a directory. // // We could use os.Open if the path is a file, but it's easier to just use the same code for both. // Therefore, we call windows.CreateFile directly. func openPath(path string) (windows.Handle, error) { u16, err := windows.UTF16PtrFromString(path) if err != nil { return 0, err } h, err := windows.CreateFile( u16, 0, windows.FILE_SHARE_READ|windows.FILE_SHARE_WRITE|windows.FILE_SHARE_DELETE, nil, windows.OPEN_EXISTING, windows.FILE_FLAG_BACKUP_SEMANTICS, // Needed to open a directory handle. 0) if err != nil { return 0, &os.PathError{ Op: "CreateFile", Path: path, Err: err, } } return h, nil } // GetFinalPathNameByHandle flags. // //nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API. const ( cFILE_NAME_OPENED = 0x8 cVOLUME_NAME_DOS = 0x0 cVOLUME_NAME_GUID = 0x1 ) var pool = sync.Pool{ New: func() any { // Size of buffer chosen somewhat arbitrarily to accommodate a large number of path strings. // MAX_PATH (260) + size of volume GUID prefix (49) + null terminator = 310. b := make([]uint16, 310) return &b }, } // getFinalPathNameByHandle facilitates calling the Windows API GetFinalPathNameByHandle // with the given handle and flags. It transparently takes care of creating a buffer of the // correct size for the call. func getFinalPathNameByHandle(h windows.Handle, flags uint32) (string, error) { b := *(pool.Get().(*[]uint16)) defer func() { pool.Put(&b) }() for { n, err := windows.GetFinalPathNameByHandle(h, &b[0], uint32(len(b)), flags) if err != nil { return "", err } // If the buffer wasn't large enough, n will be the total size needed (including null terminator). // Resize and try again. if n > uint32(len(b)) { b = make([]uint16, n) continue } // If the buffer is large enough, n will be the size not including the null terminator. // Convert to a Go string and return. return string(utf16.Decode(b[:n])), nil } } // resolvePath implements path resolution for Windows. It attempts to return the "real" path to the // file or directory represented by the given path. // The resolution works by using the Windows API GetFinalPathNameByHandle, which takes a handle and // returns the final path to that file. func resolvePath(path string) (string, error) { h, err := openPath(path) if err != nil { return "", err } defer windows.CloseHandle(h) //nolint // We use the Windows API GetFinalPathNameByHandle to handle path resolution. GetFinalPathNameByHandle // returns a resolved path name for a file or directory. The returned path can be in several different // formats, based on the flags passed. There are several goals behind the design here: // - Do as little manual path manipulation as possible. Since Windows path formatting can be quite // complex, we try to just let the Windows APIs handle that for us. // - Retain as much compatibility with existing Go path functions as we can. In particular, we try to // ensure paths returned from resolvePath can be passed to EvalSymlinks. // // First, we query for the VOLUME_NAME_GUID path of the file. This will return a path in the form // "\\?\Volume{8a25748f-cf34-4ac6-9ee2-c89400e886db}\dir\file.txt". If the path is a UNC share // (e.g. "\\server\share\dir\file.txt"), then the VOLUME_NAME_GUID query will fail with ERROR_PATH_NOT_FOUND. // In this case, we will next try a VOLUME_NAME_DOS query. This query will return a path for a UNC share // in the form "\\?\UNC\server\share\dir\file.txt". This path will work with most functions, but EvalSymlinks // fails on it. Therefore, we rewrite the path to the form "\\server\share\dir\file.txt" before returning it. // This path rewrite may not be valid in all cases (see the notes in the next paragraph), but those should // be very rare edge cases, and this case wouldn't have worked with EvalSymlinks anyways. // // The "\\?\" prefix indicates that no path parsing or normalization should be performed by Windows. // Instead the path is passed directly to the object manager. The lack of parsing means that "." and ".." are // interpreted literally and "\"" must be used as a path separator. Additionally, because normalization is // not done, certain paths can only be represented in this format. For instance, "\\?\C:\foo." (with a trailing .) // cannot be written as "C:\foo.", because path normalization will remove the trailing ".". // // We use FILE_NAME_OPENED instead of FILE_NAME_NORMALIZED, as FILE_NAME_NORMALIZED can fail on some // UNC paths based on access restrictions. The additional normalization done is also quite minimal in // most cases. // // Querying for VOLUME_NAME_DOS first instead of VOLUME_NAME_GUID would yield a "nicer looking" path in some cases. // For instance, it could return "\\?\C:\dir\file.txt" instead of "\\?\Volume{8a25748f-cf34-4ac6-9ee2-c89400e886db}\dir\file.txt". // However, we query for VOLUME_NAME_GUID first for two reasons: // - The volume GUID path is more stable. A volume's mount point can change when it is remounted, but its // volume GUID should not change. // - If the volume is mounted at a non-drive letter path (e.g. mounted to "C:\mnt"), then VOLUME_NAME_DOS // will return the mount path. EvalSymlinks fails on a path like this due to a bug. // // References: // - GetFinalPathNameByHandle: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfinalpathnamebyhandlea // - Naming Files, Paths, and Namespaces: https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file // - Naming a Volume: https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-volume rPath, err := getFinalPathNameByHandle(h, cFILE_NAME_OPENED|cVOLUME_NAME_GUID) if err == windows.ERROR_PATH_NOT_FOUND { // ERROR_PATH_NOT_FOUND is returned from the VOLUME_NAME_GUID query if the path is a // network share (UNC path). In this case, query for the DOS name instead, then translate // the returned path to make it more palatable to other path functions. rPath, err = getFinalPathNameByHandle(h, cFILE_NAME_OPENED|cVOLUME_NAME_DOS) if err != nil { return "", err } if strings.HasPrefix(rPath, `\\?\UNC\`) { // Convert \\?\UNC\server\share -> \\server\share. The \\?\UNC syntax does not work with // some Go filepath functions such as EvalSymlinks. In the future if other components // move away from EvalSymlinks and use GetFinalPathNameByHandle instead, we could remove // this path munging. rPath = `\\` + rPath[len(`\\?\UNC\`):] } } else if err != nil { return "", err } return rPath, nil } // ResolveSymbolicLink will follow any symbolic links func ResolveSymbolicLink(path string) (string, error) { // filepath.EvalSymlinks does not work very well on Windows, so instead we resolve the path // via resolvePath which uses GetFinalPathNameByHandle. This returns either a path prefixed with `\\?\`, // or a remote share path in the form \\server\share. These should work with most Go and Windows APIs. return resolvePath(path) } ================================================ FILE: modules/strengthen/path.go ================================================ package strengthen import ( "errors" "os" "os/user" "path/filepath" "strings" ) var ( ErrDangerousRepoPath = errors.New("dangerous or unreachable repository path") ) // ExpandPath is a helper function to expand a relative or home-relative path to an absolute path. // // eg. // // ~/.someconf -> /home/alec/.someconf // ~alec/.someconf -> /home/alec/.someconf func ExpandPath(path string) string { if filepath.IsAbs(path) { return path } if strings.HasPrefix(path, "~") { // For Windows systems, please replace the path separator first pos := strings.IndexByte(path, '/') switch { case pos == 1: if homeDir, err := os.UserHomeDir(); err == nil { return filepath.Join(homeDir, path[2:]) } case pos > 1: // https://github.com/golang/go/issues/24383 // macOS may not produce correct results username := path[1:pos] if userAccount, err := user.Lookup(username); err == nil { return filepath.Join(userAccount.HomeDir, path[pos+1:]) } default: } } abspath, err := filepath.Abs(path) if err != nil { return path } return abspath } func splitPathInternal(p string) []string { sv := make([]string, 0, 8) var first, i int for ; i < len(p); i++ { if p[i] != '/' && p[i] != '\\' { continue } if first != i { sv = append(sv, p[first:i]) } first = i + 1 } if first < len(p) { sv = append(sv, p[first:]) } return sv } // SplitPath skip empty string func SplitPath(p string) []string { if len(p) == 0 { return nil } svv := splitPathInternal(p) sv := make([]string, 0, len(svv)) for _, s := range svv { if s == "." { continue } if s == ".." { if len(sv) == 0 { return sv } sv = sv[0 : len(sv)-1] continue } sv = append(sv, s) } return sv } ================================================ FILE: modules/strengthen/path_test.go ================================================ package strengthen import ( "fmt" "os" "os/user" "testing" ) func TestExpandPath(t *testing.T) { u, err := user.Current() if err != nil { return } dirs := []string{ "~/.zetaignore", "~" + u.Username + "/jacksone", "/tmp/jock", "~root/downloads", } for _, d := range dirs { fmt.Fprintf(os.Stderr, "%s --> %s\n", d, ExpandPath(d)) } } ================================================ FILE: modules/strengthen/rid.go ================================================ package strengthen // Thanks: https://github.com/zincium/zinc/blob/mainline/modules/shadow/rid.go import ( "crypto/rand" "encoding/base64" "encoding/hex" "fmt" "io" "time" "github.com/antgroup/hugescm/modules/base58" ) // RID type type RID [16]byte type Token [18]byte // var s var ( ZeroRID RID // empty RID, all zeros ZeroToken Token ) var rd = rand.Reader // random function // NewRandom random func NewRandom() (RID, error) { return NewRandomFromReader(rd) } // NewRandomFromReader returns a UUID based on bytes read from a given io.Reader. func NewRandomFromReader(r io.Reader) (RID, error) { var rid RID _, err := io.ReadFull(r, rid[:]) if err != nil { return ZeroRID, err } rid[6] = (rid[6] & 0x0f) | 0x40 // Version 4 rid[8] = (rid[8] & 0x3f) | 0x80 // Variant is 10 return rid, nil } // String returns the string form of uuid, xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx // , or "" if uuid is invalid. func (rid RID) String() string { var buf [36]byte encodeHex(buf[:], rid) return string(buf[:]) } func encodeHex(dst []byte, rid RID) { hex.Encode(dst, rid[:4]) dst[8] = '-' hex.Encode(dst[9:13], rid[4:6]) dst[13] = '-' hex.Encode(dst[14:18], rid[6:8]) dst[18] = '-' hex.Encode(dst[19:23], rid[8:10]) dst[23] = '-' hex.Encode(dst[24:], rid[10:]) } // NewRID return RequestID func NewRID() string { rid, _ := NewRandom() return rid.String() } func NewToken() string { var token Token _, err := io.ReadFull(rd, token[:]) if err != nil { return base58.Encode(ZeroToken[:]) } return base58.Encode(token[:]) } func NewRandomString(length int) string { buf := make([]byte, length) _, _ = io.ReadFull(rd, buf) return base64.URLEncoding.EncodeToString(buf)[0:length] } const ( DateOnly = "20060102" ) func NewSessionID() string { now := time.Now() buf := make([]byte, 16) _, _ = io.ReadFull(rd, buf) return fmt.Sprintf("%s-%s", now.Format(DateOnly), base58.Encode(buf)) } ================================================ FILE: modules/strengthen/rid_test.go ================================================ package strengthen import ( "fmt" "os" "testing" ) func TestNewRID(t *testing.T) { fmt.Fprintf(os.Stderr, "new uuid: {%s}\n", NewRID()) fmt.Fprintf(os.Stderr, "new fingerprint: {%s}\n", NewRandomString(16)) } func TestNewToken(t *testing.T) { fmt.Fprintf(os.Stderr, "new token: {%s}\n", NewToken()) } ================================================ FILE: modules/strengthen/statfs.go ================================================ package strengthen type DiskFreeSpace struct { Total uint64 Used uint64 Free uint64 Avail uint64 FS string } const UnknownFS = "unknown" ================================================ FILE: modules/strengthen/statfs_linux.go ================================================ //go:build linux package strengthen import "golang.org/x/sys/unix" const ( FilesystemSuperMagicTmpfs = 0x01021994 FilesystemSuperMagicExt4 = 0xEF53 FilesystemSuperMagicXfs = 0x58465342 FilesystemSuperMagicNfs = 0x6969 FilesystemSuperMagicZfs = 0x2fc12fc1 // FilesystemSuperMagicBtrfs is the 64bit magic for Btrfs // we not support 32bit system FilesystemSuperMagicBtrfs = 0x9123683E FilesystemSuperMagicCGroup = 0x27e0eb FilesystemSuperMagicCGroup2 = 0x63677270 FilesystemSuperMagicNTFS = 0x5346544e FilesystemSuperMagicEXFAT = 0x2011BAB0 FilesystemSuperMagicCEPH = 0x00c36400 FilesystemSuperMagicOverlayFS = 0x794c7630 // https://developer.apple.com/support/downloads/Apple-File-System-Reference.pdf FilesystemSuperMagicAPFS = 0x42535041 // BSPA ) // This map has been collected from `man 2 statfs` and is non-exhaustive // The values of EXT2, EXT3, and EXT4 have been renamed to a generic EXT as their // key values were duplicate. This value is now called EXT_2_3_4 // https://github.com/torvalds/linux/blob/master/include/uapi/linux/magic.h var ( magicMap = map[int64]string{ 0xadf5: "adfs", 0xadff: "affs", 0x5346414f: "afs", 0x0187: "autofs", 0x00c36400: "ceph", 0x73757245: "coda", 0x28cd3d45: "cramfs", // 0x453dcd28 wroing endianess 0x64626720: "debugfs", 0x73636673: "securityfs", 0xf97cff8c: "selinux", 0x43415d53: "smack", 0x858458f6: "ramfs", 0x01021994: "tmpfs", 0x958458f6: "hugetlbfs", 0x73717368: "squashfs", 0xf15f: "ecryptfs", 0x00414a53: "efs", 0xE0F5E1E2: "erofs", 0xef53: "ext_2_3_4", 0xabba1974: "xenfs", 0x9123683e: "btrfs", 0x3434: "nilfs", 0xf2f52010: "f2fs", 0xf995e849: "hpfs", 0x9660: "isofs", 0x72b6: "jffs2", 0x58465342: "xfs", 0x6165676c: "pstorefs", 0xde5e81e4: "efivarfs", 0x00c0ffee: "hostfs", 0x794c7630: "overlayfs", 0x65735546: "fuse", 0xca451a4e: "bcachefs", // MINIX fs 0x137f: "minix", 0x138f: "minix2", 0x2468: "minix2", 0x2478: "minix22", 0x4d5a: "minix3", // Others 0x4d44: "msdos", 0x2011bab0: "exFAT", 0x564c: "ncp", 0x6969: "nfs", 0x7461636f: "ocfs2", 0x9fa1: "openprom", 0x002f: "qnx4", 0x68191122: "qnx6", 0x6B414653: "afs", // used by gcc 0x52654973: "reiserfs", // SMB 0x517b: "smb", 0xff534d42: "smd2", /* the first four bytes of SMB PDUs or SMB2 */ // CGroup 0x27e0eb: "cgroup", 0x63677270: "cgroup2", // tracefs 0x74726163: "tracefs", // next 0x01021997: "v9fs", 0x64646178: "daxfs", 0x42494e4d: "binfmtfs", 0x1cd1: "devpts", 0x6c6f6f70: "binderfs", 0xbad1dea: "futexfs", 0x50495045: "pipefs", 0x9fa0: "proc", 0x534f434b: "sockfs", 0x62656572: "sysfs", 0x9fa2: "usbdevice", 0x11307854: "mtd_inode_fs", 0x09041934: "anon_inode_fs", 0x73727279: "btrfs_test", 0x6e736673: "nsfs", 0xcafe4a11: "bpf_fs", 0x5a3c69f0: "aafs", 0x5a4f4653: "zonefs", 0x15013346: "udf", 0x444d4142: "DMAB", 0x454d444d: "DMEM", 0x5345434d: "SECM", 0x50494446: "PIDF", // PID fs // no include 0x00011954: "ufs", 0x62646576: "bdevfs", 0x42465331: "befs", 0x1badface: "bfs", 0x012ff7b7: "coh", 0x1373: "devfs", 0x137d: "ext", 0xef51: "ext2_old", 0x4244: "hfs", 0x3153464a: "jfs", 0x19800202: "mqueue", 0x7275: "romfs", 0x012ff7b6: "sysv2", 0x012ff7b5: "sysv4", 0xa501fcf5: "vxfs", 0x012ff7b4: "xenix", // APFS_MAGIC https://github.com/linux-apfs/linux-apfs-rw/blob/master/apfs_raw.h#L1045 0x42535041: "apfs", // NTFS magic 0x5346544e: "ntfs", // ZFS_SUPER_MAGIC https://github.com/openzfs/zfs/blob/6c82951d111bb4c8a426e5f58a87ac80a4996fc1/include/sys/fs/zfs.h#L1374 0x2fc12fc1: "zfs", } ) func detectFileSystem(stat *unix.Statfs_t) string { // This explicit cast to int64 is required for systems where the syscall // returns an int32 instead. fsType, found := magicMap[int64(stat.Type)] //nolint:unconvert if !found { return UnknownFS } return fsType } func GetDiskFreeSpaceEx(mountPath string) (*DiskFreeSpace, error) { var st unix.Statfs_t if err := unix.Statfs(mountPath, &st); err != nil { return nil, err } ds := &DiskFreeSpace{ Total: st.Blocks * uint64(st.Bsize), Avail: uint64(st.Bavail) * uint64(st.Bsize), Free: st.Bfree * uint64(st.Bsize), } ds.Used = ds.Total - ds.Free ds.FS = detectFileSystem(&st) return ds, nil } ================================================ FILE: modules/strengthen/statfs_openbsd.go ================================================ //go:build openbsd && !386 package strengthen import "golang.org/x/sys/unix" func detectFileSystem(stat *unix.Statfs_t) string { var buf []byte for _, c := range stat.F_fstypename { if c == 0 { break } buf = append(buf, byte(c)) } if len(buf) == 0 { return UnknownFS } return string(buf) } func GetDiskFreeSpaceEx(mountPath string) (*DiskFreeSpace, error) { var st unix.Statfs_t if err := unix.Statfs(mountPath, &st); err != nil { return nil, err } ds := &DiskFreeSpace{ Total: st.F_blocks * uint64(st.F_bsize), Avail: uint64(st.F_favail) * uint64(st.F_bsize), Free: st.F_ffree * uint64(st.F_bsize), } ds.Used = ds.Total - ds.Free ds.FS = detectFileSystem(&st) return ds, nil } ================================================ FILE: modules/strengthen/statfs_test.go ================================================ package strengthen import ( "fmt" "os" "testing" ) func TestGetDiskFreeSpaceEx(t *testing.T) { gb := float64(1024 * 1024 * 1024) cwd, err := os.Getwd() if err != nil { return } ds, err := GetDiskFreeSpaceEx(cwd) if err != nil { fmt.Fprintf(os.Stderr, "usage: %v\n", err) return } fmt.Fprintf(os.Stderr, "disk space total: %0.2f GB. used: %0.2f GB. available: %0.2f GB FS: %s\n", float64(ds.Total)/gb, float64(ds.Used)/gb, float64(ds.Avail)/gb, ds.FS) } func TestGetDiskFreeSpaceExTemp(t *testing.T) { gb := float64(1024 * 1024 * 1024) ds, err := GetDiskFreeSpaceEx(os.TempDir()) if err != nil { fmt.Fprintf(os.Stderr, "usage: %v\n", err) return } fmt.Fprintf(os.Stderr, "disk space total: %0.2f GB. used: %0.2f GB. available: %0.2f GB FS: %s\n", float64(ds.Total)/gb, float64(ds.Used)/gb, float64(ds.Avail)/gb, ds.FS) } ================================================ FILE: modules/strengthen/statfs_unix.go ================================================ //go:build darwin || dragonfly || freebsd package strengthen import "golang.org/x/sys/unix" func detectFileSystem(stat *unix.Statfs_t) string { var buf []byte for _, c := range stat.Fstypename { if c == 0 { break } buf = append(buf, c) } if len(buf) == 0 { return UnknownFS } return string(buf) } func GetDiskFreeSpaceEx(mountPath string) (*DiskFreeSpace, error) { var st unix.Statfs_t if err := unix.Statfs(mountPath, &st); err != nil { return nil, err } ds := &DiskFreeSpace{ Total: uint64(st.Blocks) * uint64(st.Bsize), //nolint:unconvert // uint32 -> uint64 for disk size calculation Avail: uint64(st.Bavail) * uint64(st.Bsize), //nolint:unconvert // uint32 -> uint64 for disk size calculation Free: uint64(st.Bfree) * uint64(st.Bsize), //nolint:unconvert // uint32 -> uint64 for disk size calculation } ds.Used = ds.Total - ds.Free ds.FS = detectFileSystem(&st) return ds, nil } ================================================ FILE: modules/strengthen/statfs_windows.go ================================================ //go:build windows package strengthen import ( "path/filepath" "golang.org/x/sys/windows" ) const ( pathLength = windows.MAX_PATH + 1 ) func GetDiskFreeSpaceEx(mountPath string) (*DiskFreeSpace, error) { absPath, err := filepath.Abs(mountPath) if err != nil { return nil, err } windowsPath, err := windows.UTF16PtrFromString(absPath) if err != nil { return nil, err } var freeBytesAvailableToCaller, totalNumberOfBytes, totalNumberOfFreeBytes uint64 if err = windows.GetDiskFreeSpaceEx(windowsPath, &freeBytesAvailableToCaller, &totalNumberOfBytes, &totalNumberOfFreeBytes); err != nil { return nil, err } root := filepath.VolumeName(absPath) + "\\" driveUTF16, err := windows.UTF16PtrFromString(root) if err != nil { return nil, err } volumeNameBuffer := make([]uint16, pathLength) fileSystemNameBuffer := make([]uint16, pathLength) di := &DiskFreeSpace{ Total: totalNumberOfBytes, Free: totalNumberOfFreeBytes, Used: totalNumberOfBytes - totalNumberOfFreeBytes, Avail: totalNumberOfFreeBytes, } if err = windows.GetVolumeInformation(driveUTF16, &volumeNameBuffer[0], pathLength, nil, nil, nil, &fileSystemNameBuffer[0], pathLength); err == nil { di.FS = windows.UTF16PtrToString(&fileSystemNameBuffer[0]) } return di, nil } ================================================ FILE: modules/strengthen/strings.go ================================================ package strengthen import ( "bytes" "errors" "strconv" "strings" ) // StrSplitSkipEmpty skip empty string func StrSplitSkipEmpty(s string, sep byte, capacity int) []string { sv := make([]string, 0, capacity) var first, i int for ; i < len(s); i++ { if s[i] != sep { continue } if first != i { sv = append(sv, s[first:i]) } first = i + 1 } if first < len(s) { sv = append(sv, s[first:]) } return sv } // StrCat cat strings: // You should know that StrCat gradually builds advantages // only when the number of parameters is> 2. func StrCat(sv ...string) string { var sb strings.Builder var size int for _, s := range sv { size += len(s) } sb.Grow(size) for _, s := range sv { _, _ = sb.WriteString(s) } return sb.String() } // ByteCat cat strings: // You should know that StrCat gradually builds advantages // only when the number of parameters is> 2. func ByteCat(sv ...[]byte) string { var b strings.Builder var size int for _, s := range sv { size += len(s) } b.Grow(size) for _, s := range sv { _, _ = b.Write(s) } return b.String() } // BufferCat todo func BufferCat(sv ...string) []byte { var buf bytes.Buffer var size int for _, s := range sv { size += len(s) } buf.Grow(size) for _, s := range sv { _, _ = buf.WriteString(s) } return buf.Bytes() } // ErrorCat todo func ErrorCat(sv ...string) error { return errors.New(StrCat(sv...)) } func SimpleAtob(s string, dv bool) bool { switch strings.ToLower(s) { case "true", "yes", "on", "1": return true case "false", "no", "off", "0": return false } return dv } const ( Byte = 1 << (iota * 10) // Byte KiByte MiByte GiByte TiByte PiByte EiByte ) var ( sizeRatio = map[string]int64{ "k": KiByte, "m": MiByte, "g": GiByte, "t": TiByte, "p": PiByte, "e": EiByte, } ) var ( ErrSyntaxSize = errors.New("size syntax error") ) func ParseSize(text string) (int64, error) { text = strings.TrimSuffix(strings.ToLower(text), "b") for rs, ratio := range sizeRatio { if prefix, ok := strings.CutSuffix(text, rs); ok { v, err := strconv.ParseInt(strings.TrimSpace(prefix), 10, 64) if err != nil { return 0, ErrSyntaxSize } return v * ratio, nil } } v, err := strconv.ParseInt(strings.TrimSpace(text), 10, 64) if err != nil { return 0, ErrSyntaxSize } return v, nil } ================================================ FILE: modules/symlink/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: modules/symlink/LICENSE.APACHE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2014-2018 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: modules/symlink/LICENSE.BSD ================================================ Copyright (c) 2014-2018 The Docker & Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: modules/symlink/doc.go ================================================ // Package symlink implements [FollowSymlinkInScope] which is an extension // of [path/filepath.EvalSymlinks], as well as a Windows long-path aware // version of [path/filepath.EvalSymlinks] from the Go standard library. // // The code from [path/filepath.EvalSymlinks] has been adapted in fs.go. // Read the [LICENSE.BSD] file that governs fs.go and [LICENSE.APACHE] for // fs_unix_test.go. // // [LICENSE.APACHE]: https://github.com/moby/sys/blob/symlink/v0.2.0/symlink/LICENSE.APACHE // [LICENSE.BSD]: https://github.com/moby/sys/blob/symlink/v0.2.0/symlink/LICENSE.APACHE package symlink ================================================ FILE: modules/symlink/fs.go ================================================ // Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE.BSD file. // This code is a modified version of path/filepath/symlink.go from the Go // standard library in [docker@fa3ec89], which was based on [go1.3.3], // with Windows implementatinos being added in [docker@9b648df]. // // [docker@fa3ec89]: https://github.com/moby/moby/commit/fa3ec89515431ce425f924c8a9a804d5cb18382f // [go1.3.3]: https://github.com/golang/go/blob/go1.3.3/src/pkg/path/filepath/symlink.go // [docker@9b648df]: https://github.com/moby/moby/commit/9b648dfac6453de5944ee4bb749115d85a253a05 package symlink import ( "bytes" "errors" "os" "path/filepath" "strings" ) // FollowSymlinkInScope evaluates symbolic links in "path" within a scope "root" // and returns a result guaranteed to be contained within the scope "root" at // the time of the call. It returns an error of either "path" or "root" cannot // be converted to an absolute path. // // Symbolic links in "root" are not evaluated and left as-is. Errors encountered // while attempting to evaluate symlinks in path are returned, but non-existing // paths are valid and do not constitute an error. "path" must contain "root" // as a prefix, or else an error is returned. Trying to break out from "root" // does not constitute an error, instead resolves the path within "root". // // Example: // // // If "/foo/bar" is a symbolic link to "/outside": // FollowSymlinkInScope("/foo/bar", "/foo") // Returns "/foo/outside" instead of "/outside" // // IMPORTANT: It is the caller's responsibility to call FollowSymlinkInScope // after relevant symbolic links are created to avoid Time-of-check Time-of-use // (TOCTOU) race conditions ([CWE-367]). No additional symbolic links must be // created after evaluating, as those could potentially make a previously-safe // path unsafe. // // For example, if "/foo/bar" does not exist, FollowSymlinkInScope("/foo/bar", "/foo") // evaluates the path to "/foo/bar". If one makes "/foo/bar" a symbolic link to // "/baz" subsequently, then "/foo/bar" should no longer be considered safely // contained in "/foo". // // [CWE-367]: https://cwe.mitre.org/data/definitions/367.html func FollowSymlinkInScope(path, root string) (string, error) { path, err := filepath.Abs(filepath.FromSlash(path)) if err != nil { return "", err } root, err = filepath.Abs(filepath.FromSlash(root)) if err != nil { return "", err } return evalSymlinksInScope(path, root) } // evalSymlinksInScope evaluates symbolic links in "path" within a scope "root" // and returns a result guaranteed to be contained within the scope "root" at // the time of the call. Refer to [FollowSymlinkInScope] for details. func evalSymlinksInScope(path, root string) (string, error) { root = filepath.Clean(root) if path == root { return path, nil } if !strings.HasPrefix(path, root) { return "", errors.New("evalSymlinksInScope: " + path + " is not in " + root) } const maxIter = 255 originalPath := path // given root of "/a" and path of "/a/b/../../c" we want path to be "/b/../../c" path = path[len(root):] if root == string(filepath.Separator) { path = string(filepath.Separator) + path } if !strings.HasPrefix(path, string(filepath.Separator)) { return "", errors.New("evalSymlinksInScope: " + path + " is not in " + root) } path = filepath.Clean(path) // consume path by taking each frontmost path element, // expanding it if it's a symlink, and appending it to b var b bytes.Buffer // b here will always be considered to be the "current absolute path inside // root" when we append paths to it, we also append a slash and use // filepath.Clean after the loop to trim the trailing slash for n := 0; path != ""; n++ { if n > maxIter { return "", errors.New("evalSymlinksInScope: too many links in " + originalPath) } // find next path component, p i := strings.IndexRune(path, filepath.Separator) var p string if i == -1 { p, path = path, "" } else { p, path = path[:i], path[i+1:] } if p == "" { continue } // this takes a b.String() like "b/../" and a p like "c" and turns it // into "/b/../c" which then gets filepath.Cleaned into "/c" and then // root gets prepended and we Clean again (to remove any trailing slash // if the first Clean gave us just "/") cleanP := filepath.Clean(string(filepath.Separator) + b.String() + p) if isDriveOrRoot(cleanP) { // never Lstat "/" itself, or drive letters on Windows b.Reset() continue } fullP := filepath.Clean(root + cleanP) fi, err := os.Lstat(fullP) if os.IsNotExist(err) { // if p does not exist, accept it b.WriteString(p) b.WriteRune(filepath.Separator) continue } if err != nil { return "", err } if fi.Mode()&os.ModeSymlink == 0 { b.WriteString(p) b.WriteRune(filepath.Separator) continue } // it's a symlink, put it at the front of path dest, err := os.Readlink(fullP) if err != nil { return "", err } if isAbs(dest) { b.Reset() } path = dest + string(filepath.Separator) + path } // see note above on "fullP := ..." for why this is double-cleaned and // what's happening here return filepath.Clean(root + filepath.Clean(string(filepath.Separator)+b.String())), nil } // EvalSymlinks is a modified version of [path/filepath.EvalSymlinks] from // the Go standard library with support for Windows long paths (paths prepended // with "\\?\"). On non-Windows platforms, it's an alias for [path/filepath.EvalSymlinks]. // // EvalSymlinks returns the path name after the evaluation of any symbolic // links. If path is relative, the result will be relative to the current // directory, unless one of the components is an absolute symbolic link. // // EvalSymlinks calls [path/filepath.Clean] on the result. func EvalSymlinks(path string) (string, error) { return evalSymlinks(path) } ================================================ FILE: modules/symlink/fs_unix.go ================================================ //go:build !windows package symlink import ( "path/filepath" ) func evalSymlinks(path string) (string, error) { return filepath.EvalSymlinks(path) } func isDriveOrRoot(p string) bool { return p == string(filepath.Separator) } var isAbs = filepath.IsAbs ================================================ FILE: modules/symlink/fs_windows.go ================================================ // Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE.BSD file. // This code is a modified version of [path/filepath/symlink_windows.go] // and [path/filepath/symlink.go] from the Go 1.4.2 standard library, and // added in [docker@9b648df]. // // [path/filepath/symlink_windows.go]: https://github.com/golang/go/blob/go1.4.2/src/path/filepath/symlink_windows.go // [path/filepath/symlink.go]: https://github.com/golang/go/blob/go1.4.2/src/path/filepath/symlink.go // [docker@9b648df]: https://github.com/moby/moby/commit/9b648dfac6453de5944ee4bb749115d85a253a05 package symlink import ( "bytes" "errors" "os" "path/filepath" "strings" "golang.org/x/sys/windows" ) func toShort(path string) (string, error) { p, err := windows.UTF16FromString(path) if err != nil { return "", err } b := p // GetShortPathName says we can reuse buffer n, err := windows.GetShortPathName(&p[0], &b[0], uint32(len(b))) if err != nil { return "", err } if n > uint32(len(b)) { b = make([]uint16, n) if _, err = windows.GetShortPathName(&p[0], &b[0], uint32(len(b))); err != nil { return "", err } } return windows.UTF16ToString(b), nil } func toLong(path string) (string, error) { p, err := windows.UTF16FromString(path) if err != nil { return "", err } b := p // GetLongPathName says we can reuse buffer n, err := windows.GetLongPathName(&p[0], &b[0], uint32(len(b))) if err != nil { return "", err } if n > uint32(len(b)) { b = make([]uint16, n) n, err = windows.GetLongPathName(&p[0], &b[0], uint32(len(b))) if err != nil { return "", err } } b = b[:n] return windows.UTF16ToString(b), nil } func evalSymlinks(path string) (string, error) { path, err := walkSymlinks(path) if err != nil { return "", err } p, err := toShort(path) if err != nil { return "", err } p, err = toLong(p) if err != nil { return "", err } // windows.GetLongPathName does not change the case of the drive letter, // but the result of EvalSymlinks must be unique, so we have // EvalSymlinks(`c:\a`) == EvalSymlinks(`C:\a`). // Make drive letter upper case. if len(p) >= 2 && p[1] == ':' && 'a' <= p[0] && p[0] <= 'z' { p = string(p[0]+'A'-'a') + p[1:] } else if len(p) >= 6 && p[5] == ':' && 'a' <= p[4] && p[4] <= 'z' { p = p[:3] + string(p[4]+'A'-'a') + p[5:] } return filepath.Clean(p), nil } const ( utf8RuneSelf = 0x80 longPathPrefix = `\\?\` ) func walkSymlinks(path string) (string, error) { const maxIter = 255 originalPath := path // consume path by taking each frontmost path element, // expanding it if it's a symlink, and appending it to b var b bytes.Buffer for n := 0; path != ""; n++ { if n > maxIter { return "", errors.New("too many links in " + originalPath) } // A path beginning with `\\?\` represents the root, so automatically // skip that part and begin processing the next segment. if strings.HasPrefix(path, longPathPrefix) { b.WriteString(longPathPrefix) path = path[4:] continue } // find next path component, p i := -1 for j, c := range path { if c < utf8RuneSelf && os.IsPathSeparator(uint8(c)) { i = j break } } var p string if i == -1 { p, path = path, "" } else { p, path = path[:i], path[i+1:] } if p == "" { if b.Len() == 0 { // must be absolute path b.WriteRune(filepath.Separator) } continue } // If this is the first segment after the long path prefix, accept the // current segment as a volume root or UNC share and move on to the next. if b.String() == longPathPrefix { b.WriteString(p) b.WriteRune(filepath.Separator) continue } fi, err := os.Lstat(b.String() + p) if err != nil { return "", err } if fi.Mode()&os.ModeSymlink == 0 { b.WriteString(p) if path != "" || (b.Len() == 2 && len(p) == 2 && p[1] == ':') { b.WriteRune(filepath.Separator) } continue } // it's a symlink, put it at the front of path dest, err := os.Readlink(b.String() + p) if err != nil { return "", err } if isAbs(dest) { b.Reset() } path = dest + string(filepath.Separator) + path } return filepath.Clean(b.String()), nil } func isDriveOrRoot(p string) bool { if p == string(filepath.Separator) { return true } length := len(p) if length >= 2 { if p[length-1] == ':' && (('a' <= p[length-2] && p[length-2] <= 'z') || ('A' <= p[length-2] && p[length-2] <= 'Z')) { return true } } return false } // isAbs is a platform-specific wrapper for filepath.IsAbs. On Windows, // golang filepath.IsAbs does not consider a path \windows\system32 as absolute // as it doesn't start with a drive-letter/colon combination. However, in // docker we need to verify things such as WORKDIR /windows/system32 in // a Dockerfile (which gets translated to \windows\system32 when being processed // by the daemon. This SHOULD be treated as absolute from a docker processing // perspective. func isAbs(path string) bool { if filepath.IsAbs(path) || strings.HasPrefix(path, string(os.PathSeparator)) { return true } return false } ================================================ FILE: modules/systemproxy/dialer.go ================================================ package systemproxy import ( "net" "net/url" "strings" ) func newDialer(proxyURL *url.URL, forward *net.Dialer, noProxy string) Dialer { p, err := NewDialerFromURL(proxyURL, forward) if err != nil { return forward } perHost := NewPerHost(p, forward) perHost.AddFromString(noProxy) return perHost } func newDialerForHosts(proxyURL *url.URL, forward *net.Dialer, hosts []string, bypassSimpleHostnames bool) Dialer { pd, err := NewDialerFromURL(proxyURL, forward) if err != nil { return forward } p := NewPerHost(pd, forward) p.SetBypassSimpleHostnames(bypassSimpleHostnames) for _, host := range hosts { host = strings.TrimSpace(host) if host == "" { continue } if strings.Contains(host, "/") { // We assume that it's a CIDR address like 127.0.0.0/8 if _, net, err := net.ParseCIDR(host); err == nil { p.AddNetwork(net) } continue } if ip := net.ParseIP(host); ip != nil { p.AddIP(ip) continue } if strings.HasPrefix(host, "*.") { p.AddZone(host[1:]) continue } p.AddHost(host) } return p } ================================================ FILE: modules/systemproxy/env.go ================================================ package systemproxy import ( "os" ) // https://about.gitlab.com/blog/2021/01/27/we-need-to-talk-no-proxy/ func getEnvAny(names ...string) string { for _, n := range names { if val, ok := os.LookupEnv(n); ok && val != "" { return val } } return "" } ================================================ FILE: modules/systemproxy/http.go ================================================ package systemproxy import ( "bufio" "context" "crypto/tls" "encoding/base64" "fmt" "net" "net/http" "net/url" ) type coordDialer struct { proxyURL *url.URL forward *net.Dialer } func (d *coordDialer) DialContext(ctx context.Context, network string, address string) (net.Conn, error) { return DialServerViaCONNECT(ctx, address, d.proxyURL, d.forward) } // DialServerViaCONNECT: SSH protocol should use socks5 protocol as much as possible func DialServerViaCONNECT(ctx context.Context, addr string, proxy *url.URL, forward *net.Dialer) (net.Conn, error) { proxyAddr := proxy.Host var c net.Conn var err error switch proxy.Scheme { case "http": if proxy.Port() == "" { proxyAddr = net.JoinHostPort(proxyAddr, "80") } if c, err = forward.DialContext(ctx, "tcp", proxyAddr); err != nil { return nil, err } case "https": if proxy.Port() == "" { proxyAddr = net.JoinHostPort(proxyAddr, "443") } d := &tls.Dialer{NetDialer: forward} if c, err = d.DialContext(ctx, "tcp", proxyAddr); err != nil { return nil, err } } h := make(http.Header) if u := proxy.User; u != nil { h.Set("Proxy-Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(u.String()))) } h.Set("Proxy-Connection", "Keep-Alive") connect := &http.Request{ Method: "CONNECT", URL: &url.URL{Opaque: addr}, Host: addr, Header: h, } if err := connect.Write(c); err != nil { _ = c.Close() return nil, err } br := bufio.NewReader(c) res, err := http.ReadResponse(br, nil) if err != nil { _ = c.Close() return nil, fmt.Errorf("reading HTTP response from CONNECT to %s via proxy %s failed: %w", addr, proxyAddr, err) } if res.StatusCode != 200 { _ = c.Close() return nil, fmt.Errorf("proxy error from %s while dialing %s: %v", proxyAddr, addr, res.Status) } // It's safe to discard the bufio.Reader here and return the // original TCP conn directly because we only use this for // TLS, and in TLS the client speaks first, so we know there's // no unbuffered data. But we can double-check. if br.Buffered() > 0 { _ = c.Close() return nil, fmt.Errorf("unexpected %d bytes of buffered data from CONNECT proxy %q", br.Buffered(), proxyAddr) } return c, nil } ================================================ FILE: modules/systemproxy/http_test.go ================================================ package systemproxy import ( "bufio" "fmt" "net" "net/url" "os" "strings" "testing" ) func TestDialGithub(t *testing.T) { var d net.Dialer proxyURL, err := url.Parse("http://127.0.0.1:8080") if err != nil { return } conn, err := DialServerViaCONNECT(t.Context(), "github.com:22", proxyURL, &d) if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) return } defer conn.Close() // nolint if _, err := conn.Write([]byte("SSH-2.0-Jack-7.9\n")); err != nil { fmt.Fprintf(os.Stderr, "write error: %v\n", err) return } br := bufio.NewReader(conn) line, _, err := br.ReadLine() if err != nil { fmt.Fprintf(os.Stderr, "ReadLine error: %v\n", err) return } fmt.Fprintf(os.Stderr, "line: %s\n", strings.TrimSpace(string(line))) } ================================================ FILE: modules/systemproxy/internal/readme.md ================================================ # placeholder ================================================ FILE: modules/systemproxy/internal/socks/client.go ================================================ // Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package socks import ( "cmp" "context" "errors" "io" "net" "strconv" "time" ) var ( noDeadline = time.Time{} aLongTimeAgo = time.Unix(1, 0) ) func (d *Dialer) connect(ctx context.Context, c net.Conn, address string) (_ net.Addr, ctxErr error) { host, port, err := splitHostPort(address) if err != nil { return nil, err } if deadline, ok := ctx.Deadline(); ok && !deadline.IsZero() { _ = c.SetDeadline(deadline) defer c.SetDeadline(noDeadline) // nolint } if ctx != context.Background() { errCh := make(chan error, 1) done := make(chan struct{}) defer func() { close(done) if ctxErr == nil { ctxErr = <-errCh } }() go func() { select { case <-ctx.Done(): _ = c.SetDeadline(aLongTimeAgo) errCh <- ctx.Err() case <-done: errCh <- nil } }() } b := make([]byte, 0, 6+len(host)) // the size here is just an estimate b = append(b, Version5) if len(d.AuthMethods) == 0 || d.Authenticate == nil { b = append(b, 1, byte(AuthMethodNotRequired)) } else { ams := d.AuthMethods if len(ams) > 255 { return nil, errors.New("too many authentication methods") } b = append(b, byte(len(ams))) for _, am := range ams { b = append(b, byte(am)) } } if _, ctxErr = c.Write(b); ctxErr != nil { return } if _, ctxErr = io.ReadFull(c, b[:2]); ctxErr != nil { return } if b[0] != Version5 { return nil, errors.New("unexpected protocol version " + strconv.Itoa(int(b[0]))) } am := AuthMethod(b[1]) if am == AuthMethodNoAcceptableMethods { return nil, errors.New("no acceptable authentication methods") } if d.Authenticate != nil { if ctxErr = d.Authenticate(ctx, c, am); ctxErr != nil { return } } b = b[:0] b = append(b, Version5, byte(d.cmd), 0) if ip := net.ParseIP(host); ip != nil { if ip4 := ip.To4(); ip4 != nil { b = append(b, AddrTypeIPv4) b = append(b, ip4...) } else if ip6 := ip.To16(); ip6 != nil { b = append(b, AddrTypeIPv6) b = append(b, ip6...) } else { return nil, errors.New("unknown address type") } } else { if len(host) > 255 { return nil, errors.New("FQDN too long") } b = append(b, AddrTypeFQDN) b = append(b, byte(len(host))) b = append(b, host...) } b = append(b, byte(port>>8), byte(port)) if _, ctxErr = c.Write(b); ctxErr != nil { return } if _, ctxErr = io.ReadFull(c, b[:4]); ctxErr != nil { return } if b[0] != Version5 { return nil, errors.New("unexpected protocol version " + strconv.Itoa(int(b[0]))) } if cmdErr := Reply(b[1]); cmdErr != StatusSucceeded { return nil, errors.New("unknown error " + cmdErr.String()) } if b[2] != 0 { return nil, errors.New("non-zero reserved field") } l := 2 var a Addr switch b[3] { case AddrTypeIPv4: l += net.IPv4len a.IP = make(net.IP, net.IPv4len) case AddrTypeIPv6: l += net.IPv6len a.IP = make(net.IP, net.IPv6len) case AddrTypeFQDN: if _, err := io.ReadFull(c, b[:1]); err != nil { return nil, err } l += int(b[0]) default: return nil, errors.New("unknown address type " + strconv.Itoa(int(b[3]))) } if cap(b) < l { b = make([]byte, l) } else { b = b[:l] } if _, ctxErr = io.ReadFull(c, b); ctxErr != nil { return } if a.IP != nil { copy(a.IP, b) } else { a.Name = string(b[:len(b)-2]) } a.Port = int(b[len(b)-2])<<8 | int(b[len(b)-1]) return &a, nil } func splitHostPort(address string) (string, int, error) { host, port, err := net.SplitHostPort(address) if err != nil { return "", 0, err } portnum, err := strconv.Atoi(port) if err != nil { return "", 0, err } if cmp.Compare(portnum, 1) < 0 || cmp.Compare(portnum, 0xffff) > 0 { return "", 0, errors.New("port number out of range " + port) } return host, portnum, nil } ================================================ FILE: modules/systemproxy/internal/socks/socks.go ================================================ // Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package socks provides a SOCKS version 5 client implementation. // // SOCKS protocol version 5 is defined in RFC 1928. // Username/Password authentication for SOCKS version 5 is defined in // RFC 1929. package socks import ( "context" "errors" "io" "net" "strconv" ) // A Command represents a SOCKS command. type Command int func (cmd Command) String() string { switch cmd { case CmdConnect: return "socks connect" case cmdBind: return "socks bind" default: return "socks " + strconv.Itoa(int(cmd)) } } // An AuthMethod represents a SOCKS authentication method. type AuthMethod int // A Reply represents a SOCKS command reply code. type Reply int func (code Reply) String() string { switch code { case StatusSucceeded: return "succeeded" case 0x01: return "general SOCKS server failure" case 0x02: return "connection not allowed by ruleset" case 0x03: return "network unreachable" case 0x04: return "host unreachable" case 0x05: return "connection refused" case 0x06: return "TTL expired" case 0x07: return "command not supported" case 0x08: return "address type not supported" default: return "unknown code: " + strconv.Itoa(int(code)) } } // Wire protocol constants. const ( Version5 = 0x05 AddrTypeIPv4 = 0x01 AddrTypeFQDN = 0x03 AddrTypeIPv6 = 0x04 CmdConnect Command = 0x01 // establishes an active-open forward proxy connection cmdBind Command = 0x02 // establishes a passive-open forward proxy connection AuthMethodNotRequired AuthMethod = 0x00 // no authentication required AuthMethodUsernamePassword AuthMethod = 0x02 // use username/password AuthMethodNoAcceptableMethods AuthMethod = 0xff // no acceptable authentication methods StatusSucceeded Reply = 0x00 ) // An Addr represents a SOCKS-specific address. // Either Name or IP is used exclusively. type Addr struct { Name string // fully-qualified domain name IP net.IP Port int } func (a *Addr) Network() string { return "socks" } func (a *Addr) String() string { if a == nil { return "" } port := strconv.Itoa(a.Port) if a.IP == nil { return net.JoinHostPort(a.Name, port) } return net.JoinHostPort(a.IP.String(), port) } // A Conn represents a forward proxy connection. type Conn struct { net.Conn boundAddr net.Addr } // BoundAddr returns the address assigned by the proxy server for // connecting to the command target address from the proxy server. func (c *Conn) BoundAddr() net.Addr { if c == nil { return nil } return c.boundAddr } // A Dialer holds SOCKS-specific options. type Dialer struct { cmd Command // either CmdConnect or cmdBind proxyNetwork string // network between a proxy server and a client proxyAddress string // proxy server address // ProxyDial specifies the optional dial function for // establishing the transport connection. ProxyDial func(context.Context, string, string) (net.Conn, error) // AuthMethods specifies the list of request authentication // methods. // If empty, SOCKS client requests only AuthMethodNotRequired. AuthMethods []AuthMethod // Authenticate specifies the optional authentication // function. It must be non-nil when AuthMethods is not empty. // It must return an error when the authentication is failed. Authenticate func(context.Context, io.ReadWriter, AuthMethod) error } // DialContext connects to the provided address on the provided // network. // // The returned error value may be a net.OpError. When the Op field of // net.OpError contains "socks", the Source field contains a proxy // server address and the Addr field contains a command target // address. // // See func Dial of the net package of standard library for a // description of the network and address parameters. func (d *Dialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { if err := d.validateTarget(network, address); err != nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } if ctx == nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: errors.New("nil context")} } var err error var c net.Conn if d.ProxyDial != nil { c, err = d.ProxyDial(ctx, d.proxyNetwork, d.proxyAddress) } else { var dd net.Dialer c, err = dd.DialContext(ctx, d.proxyNetwork, d.proxyAddress) } if err != nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } a, err := d.connect(ctx, c, address) if err != nil { _ = c.Close() proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } return &Conn{Conn: c, boundAddr: a}, nil } // DialWithConn initiates a connection from SOCKS server to the target // network and address using the connection c that is already // connected to the SOCKS server. // // It returns the connection's local address assigned by the SOCKS // server. func (d *Dialer) DialWithConn(ctx context.Context, c net.Conn, network, address string) (net.Addr, error) { if err := d.validateTarget(network, address); err != nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } if ctx == nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: errors.New("nil context")} } a, err := d.connect(ctx, c, address) if err != nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } return a, nil } // Dial connects to the provided address on the provided network. // // Unlike DialContext, it returns a raw transport connection instead // of a forward proxy connection. // // Deprecated: Use DialContext or DialWithConn instead. func (d *Dialer) Dial(network, address string) (net.Conn, error) { if err := d.validateTarget(network, address); err != nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } var err error var c net.Conn if d.ProxyDial != nil { c, err = d.ProxyDial(context.Background(), d.proxyNetwork, d.proxyAddress) } else { c, err = net.Dial(d.proxyNetwork, d.proxyAddress) } if err != nil { proxy, dst, _ := d.pathAddrs(address) return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err} } if _, err := d.DialWithConn(context.Background(), c, network, address); err != nil { _ = c.Close() return nil, err } return c, nil } func (d *Dialer) validateTarget(network, _ string) error { switch network { case "tcp", "tcp6", "tcp4": default: return errors.New("network not implemented") } switch d.cmd { case CmdConnect, cmdBind: default: return errors.New("command not implemented") } return nil } func (d *Dialer) pathAddrs(address string) (proxy, dst net.Addr, err error) { for i, s := range []string{d.proxyAddress, address} { host, port, err := splitHostPort(s) if err != nil { return nil, nil, err } a := &Addr{Port: port} a.IP = net.ParseIP(host) if a.IP == nil { a.Name = host } if i == 0 { proxy = a } else { dst = a } } return } // NewDialer returns a new Dialer that dials through the provided // proxy server's network and address. func NewDialer(network, address string) *Dialer { return &Dialer{proxyNetwork: network, proxyAddress: address, cmd: CmdConnect} } const ( authUsernamePasswordVersion = 0x01 authStatusSucceeded = 0x00 ) // UsernamePassword are the credentials for the username/password // authentication method. type UsernamePassword struct { Username string Password string } // Authenticate authenticates a pair of username and password with the // proxy server. func (up *UsernamePassword) Authenticate(ctx context.Context, rw io.ReadWriter, auth AuthMethod) error { switch auth { case AuthMethodNotRequired: return nil case AuthMethodUsernamePassword: if len(up.Username) == 0 || len(up.Username) > 255 || len(up.Password) > 255 { return errors.New("invalid username/password") } b := []byte{authUsernamePasswordVersion} b = append(b, byte(len(up.Username))) b = append(b, up.Username...) b = append(b, byte(len(up.Password))) b = append(b, up.Password...) // TODO(mikio): handle IO deadlines and cancelation if // necessary if _, err := rw.Write(b); err != nil { return err } if _, err := io.ReadFull(rw, b[:2]); err != nil { return err } if b[0] != authUsernamePasswordVersion { return errors.New("invalid username/password version") } if b[1] != authStatusSucceeded { return errors.New("username/password authentication failed") } return nil } return errors.New("unsupported authentication method " + strconv.Itoa(int(auth))) } ================================================ FILE: modules/systemproxy/pre_host.go ================================================ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package systemproxy import ( "context" "net" "slices" "strings" ) // A PerHost directs connections to a default Dialer unless the host name // requested matches one of a number of exceptions. type PerHost struct { def, bypass Dialer bypassNetworks []*net.IPNet bypassIPs []net.IP bypassZones []string bypassHosts []string bypassSimpleHostnames bool // bypass proxy for simple hostnames (no dots) } // NewPerHost returns a PerHost Dialer that directs connections to either // defaultDialer or bypass, depending on whether the connection matches one of // the configured rules. func NewPerHost(defaultDialer, bypass Dialer) *PerHost { return &PerHost{ def: defaultDialer, bypass: bypass, } } // DialContext connects to the address addr on the given network through either // defaultDialer or bypass. func (p *PerHost) DialContext(ctx context.Context, network, addr string) (c net.Conn, err error) { host, _, err := net.SplitHostPort(addr) if err != nil { return nil, err } d := p.dialerForRequest(host) return d.DialContext(ctx, network, addr) } // normalizeHost normalizes a hostname for comparison // - converts to lowercase (DNS is case-insensitive) // - removes trailing dot (FQDN canonical form) func normalizeHost(host string) string { host = strings.ToLower(host) host = strings.TrimSuffix(host, ".") return host } func (p *PerHost) dialerForRequest(host string) Dialer { // Normalize host for consistent comparison host = normalizeHost(host) // Check if this is an IP address first // IP addresses are NOT simple hostnames if ip := net.ParseIP(host); ip != nil { if slices.ContainsFunc(p.bypassNetworks, func(net *net.IPNet) bool { return net.Contains(ip) }) { return p.bypass } if slices.ContainsFunc(p.bypassIPs, func(bypassIP net.IP) bool { return bypassIP.Equal(ip) }) { return p.bypass } return p.def } // Check if this is a simple hostname (no dots) and bypass is enabled // This implements macOS ExcludeSimpleHostnames and Windows behavior // Simple hostname = hostname without dots, not an IP address if p.bypassSimpleHostnames && !strings.Contains(host, ".") { return p.bypass } if slices.ContainsFunc(p.bypassZones, func(zone string) bool { return strings.HasSuffix(host, zone) || host == zone[1:] }) { return p.bypass } if slices.Contains(p.bypassHosts, host) { return p.bypass } return p.def } // AddFromString parses a string that contains comma-separated values // specifying hosts that should use the bypass proxy. Each value is either an // IP address, a CIDR range, a zone (*.example.com) or a host name // (localhost). A best effort is made to parse the string and errors are // ignored. func (p *PerHost) AddFromString(s string) { for host := range strings.SplitSeq(s, ",") { host = strings.TrimSpace(host) if host == "" { continue } if strings.Contains(host, "/") { // We assume that it's a CIDR address like 127.0.0.0/8 if _, net, err := net.ParseCIDR(host); err == nil { p.AddNetwork(net) } continue } if ip := net.ParseIP(host); ip != nil { p.AddIP(ip) continue } if strings.HasPrefix(host, "*.") { p.AddZone(host[1:]) continue } p.AddHost(host) } } // AddIP specifies an IP address that will use the bypass proxy. Note that // this will only take effect if a literal IP address is dialed. A connection // to a named host will never match an IP. func (p *PerHost) AddIP(ip net.IP) { p.bypassIPs = append(p.bypassIPs, ip) } // AddNetwork specifies an IP range that will use the bypass proxy. Note that // this will only take effect if a literal IP address is dialed. A connection // to a named host will never match. func (p *PerHost) AddNetwork(net *net.IPNet) { p.bypassNetworks = append(p.bypassNetworks, net) } // AddZone specifies a DNS suffix that will use the bypass proxy. A zone of // "example.com" matches "example.com" and all of its subdomains. func (p *PerHost) AddZone(zone string) { // Normalize: lowercase and remove trailing dot zone = normalizeHost(zone) if !strings.HasPrefix(zone, ".") { zone = "." + zone } p.bypassZones = append(p.bypassZones, zone) } // AddHost specifies a host name that will use the bypass proxy. func (p *PerHost) AddHost(host string) { // Normalize: lowercase and remove trailing dot host = normalizeHost(host) p.bypassHosts = append(p.bypassHosts, host) } // SetBypassSimpleHostnames sets whether to bypass proxy for simple hostnames. // A simple hostname is a hostname without any dots (e.g., "server", "localhost"). // This implements macOS ExcludeSimpleHostnames and Windows behavior. func (p *PerHost) SetBypassSimpleHostnames(bypass bool) { p.bypassSimpleHostnames = bypass } ================================================ FILE: modules/systemproxy/pre_host_test.go ================================================ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package systemproxy import ( "context" "net" "testing" ) func TestNormalizeHost(t *testing.T) { tests := []struct { input string expected string }{ {"Example.COM", "example.com"}, {"EXAMPLE.COM", "example.com"}, {"example.com.", "example.com"}, {"Example.COM.", "example.com"}, {"localhost", "localhost"}, {"LocalHost", "localhost"}, {"LOCALHOST.", "localhost"}, {"192.168.1.1", "192.168.1.1"}, {"::1", "::1"}, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { got := normalizeHost(tt.input) if got != tt.expected { t.Errorf("normalizeHost(%q) = %q, want %q", tt.input, got, tt.expected) } }) } } // testDialer is a simple dialer used to identify which dialer was selected type testDialer struct { name string } func (d *testDialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { return nil, nil } func TestPerHostAddZoneCaseInsensitive(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Add zone with mixed case p.AddZone("Example.COM") // Test that lowercase version matches d := p.dialerForRequest("www.example.com") if d != bypass { t.Error("expected bypass dialer for www.example.com") } // Test that uppercase version matches d = p.dialerForRequest("WWW.EXAMPLE.COM") if d != bypass { t.Error("expected bypass dialer for WWW.EXAMPLE.COM") } // Test that zone itself matches d = p.dialerForRequest("example.com") if d != bypass { t.Error("expected bypass dialer for example.com") } } func TestPerHostAddHostCaseInsensitive(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Add host with mixed case p.AddHost("LocalHost") // Test that lowercase version matches d := p.dialerForRequest("localhost") if d != bypass { t.Error("expected bypass dialer for localhost") } // Test that uppercase version matches d = p.dialerForRequest("LOCALHOST") if d != bypass { t.Error("expected bypass dialer for LOCALHOST") } } func TestPerHostTrailingDot(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Add host without trailing dot p.AddHost("example.com") // Test that version with trailing dot matches d := p.dialerForRequest("example.com.") if d != bypass { t.Error("expected bypass dialer for example.com.") } // Add zone p.AddZone("test.com") // Test that FQDN with trailing dot matches zone d = p.dialerForRequest("www.test.com.") if d != bypass { t.Error("expected bypass dialer for www.test.com.") } } func TestPerHostAddFromStringCaseInsensitive(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Add hosts from string with mixed case p.AddFromString("LocalHost,*.Example.COM") // Test exact host match with different case d := p.dialerForRequest("LOCALHOST") if d != bypass { t.Error("expected bypass dialer for LOCALHOST") } // Test zone match with different case d = p.dialerForRequest("www.example.com") if d != bypass { t.Error("expected bypass dialer for www.example.com") } d = p.dialerForRequest("WWW.EXAMPLE.COM") if d != bypass { t.Error("expected bypass dialer for WWW.EXAMPLE.COM") } } func TestPerHostNotMatch(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Add some bypass rules p.AddHost("localhost") p.AddZone("example.com") // Test that unrelated host goes to default d := p.dialerForRequest("other.com") if d != def { t.Error("expected default dialer for other.com") } d = p.dialerForRequest("www.other.com") if d != def { t.Error("expected default dialer for www.other.com") } } func TestPerHostBypassSimpleHostnames(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Enable bypass for simple hostnames p.SetBypassSimpleHostnames(true) // Test simple hostnames (no dots) should bypass tests := []struct { host string expected Dialer }{ {"localhost", bypass}, {"server", bypass}, {"printer", bypass}, {"myserver", bypass}, {"LOCALHOST", bypass}, // case insensitive {"Server", bypass}, // case insensitive // FQDNs and IPs should NOT bypass {"example.com", def}, {"www.example.com", def}, {"sub.domain.example.com", def}, {"192.168.1.1", def}, {"::1", def}, } for _, tt := range tests { t.Run(tt.host, func(t *testing.T) { d := p.dialerForRequest(tt.host) if d != tt.expected { t.Errorf("dialerForRequest(%q) = %v, want %v", tt.host, d, tt.expected) } }) } } func TestPerHostBypassSimpleHostnamesDisabled(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Default is disabled, so simple hostnames should NOT bypass if d := p.dialerForRequest("localhost"); d != def { t.Error("expected default dialer for localhost when bypassSimpleHostnames is disabled") } if d := p.dialerForRequest("server"); d != def { t.Error("expected default dialer for server when bypassSimpleHostnames is disabled") } } func TestPerHostBypassSimpleHostnamesWithExplicitHosts(t *testing.T) { bypass := &testDialer{name: "bypass"} def := &testDialer{name: "default"} p := NewPerHost(def, bypass) // Enable bypass for simple hostnames AND add explicit hosts p.SetBypassSimpleHostnames(true) p.AddHost("explicit.example.com") // Simple hostname should bypass if d := p.dialerForRequest("server"); d != bypass { t.Error("expected bypass dialer for simple hostname 'server'") } // Explicit host should also bypass if d := p.dialerForRequest("explicit.example.com"); d != bypass { t.Error("expected bypass dialer for explicit.example.com") } // FQDN not in list should use default if d := p.dialerForRequest("other.example.com"); d != def { t.Error("expected default dialer for other.example.com") } } ================================================ FILE: modules/systemproxy/proxy.go ================================================ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package proxy provides support for a variety of protocols to proxy network // data. package systemproxy import ( "context" "errors" "fmt" "net" "net/http" "net/url" "os" "sync" ) // A Dialer is a means to establish a connection. // Custom dialers should also implement ContextDialer. type Dialer interface { DialContext(ctx context.Context, network string, address string) (net.Conn, error) } // Auth contains authentication parameters that specific Dialers may require. type Auth struct { User, Password string } func NewDialerFromURL(u *url.URL, forward *net.Dialer) (Dialer, error) { switch u.Scheme { case "socks5", "socks5h": addr := u.Hostname() port := u.Port() if port == "" { port = "1080" } var auth *Auth if u.User != nil { auth = &Auth{ User: u.User.Username(), Password: func() string { if p, ok := u.User.Password(); ok { return p } return "" }(), } } return SOCKS5("tcp", net.JoinHostPort(addr, port), auth, forward) case "http", "https": d := &coordDialer{ proxyURL: u, forward: forward, } return d, nil } return nil, errors.New("systemproxy: unknown scheme: " + u.Scheme) } type ProxyFuncValue func(*url.URL) (*url.URL, error) // systemProxyFunc returns a function that reads the // environment variable or system config to determine the proxy address. var ( systemProxyFunc = sync.OnceValue(func() ProxyFuncValue { return systemProxyConfig().ProxyFunc() }) ) func NewSystemProxy(proxyURL string) func(*http.Request) (*url.URL, error) { if len(proxyURL) != 0 { u, err := url.Parse(proxyURL) if err == nil { return http.ProxyURL(u) } // Log warning to stderr and fallback to system proxy fmt.Fprintf(os.Stderr, "systemproxy: failed to parse proxyURL %q: %v, falling back to system proxy\n", proxyURL, err) } return func(r *http.Request) (*url.URL, error) { return systemProxyFunc()(r.URL) } } ================================================ FILE: modules/systemproxy/proxy_darwin.go ================================================ //go:build darwin package systemproxy import ( "cmp" "context" "errors" "net" "net/url" "os" "os/exec" "slices" "strconv" "strings" "time" "golang.org/x/net/http/httpproxy" ) type MacProxySettings struct { ExceptionsList []string ExcludeSimpleHostnames bool FTPPassive bool // HTTP HTTPEnable bool HTTPPort string HTTPProxy string HTTPUser string // HTTPS HTTPSEnable bool HTTPSPort string HTTPSProxy string HTTPSUser string // SOCKS SOCKSEnable bool SOCKSPort string SOCKSProxy string SOCKSUser string // ProxyAutoConfigEnable bool ProxyAutoDiscoveryEnable bool ProxyAutoConfigURLString string } func joinHostPort(u, p string) string { if p != "" { return net.JoinHostPort(u, p) } return u } func joinProxyURL(defaultScheme, host, port, user string) *url.URL { u := &url.URL{ Scheme: defaultScheme, Host: joinHostPort(host, port), } if user != "" { u.User = url.User(user) } return u } // section represents a key-value map from scutil output type section map[string]any type arrayItem struct { i string v string } func (se section) boolean(name string) bool { v, ok := se[name] if !ok { return false } s, ok := v.(string) if !ok { return false } return s == "1" } func (se section) string(name string) string { v, ok := se[name] if !ok { return "" } s, ok := v.(string) if !ok { return "" } return s } func (se section) array(name string) []string { o, ok := se[name] if !ok { return nil } sub, ok := o.(section) if !ok { return nil } items := make([]*arrayItem, 0, len(sub)) for k, v := range sub { s, ok := v.(string) if !ok { continue } items = append(items, &arrayItem{i: k, v: s}) } slices.SortFunc(items, func(a, b *arrayItem) int { // Convert indices to integers for numeric sorting to avoid string comparison issues // e.g., "10" < "2" is wrong in string comparison, but correct in numeric sorting ai, _ := strconv.Atoi(a.i) bi, _ := strconv.Atoi(b.i) return cmp.Compare(ai, bi) }) arr := make([]string, 0, len(items)) for _, i := range items { arr = append(arr, i.v) } return arr } func parseOut(out string) section { var cur section stack := make([]section, 0) for line := range strings.Lines(out) { line = strings.TrimSpace(line) fields := slices.Collect(strings.FieldsSeq(line)) if len(fields) == 0 { continue } lastField := fields[len(fields)-1] firstField := fields[0] if lastField == "}" { if len(stack) == 0 { break } cur = stack[len(stack)-1] stack = stack[:len(stack)-1] continue } if lastField == "{" { newObj := make(section) if cur != nil { stack = append(stack, cur) cur[firstField] = newObj } cur = newObj continue } if len(fields) == 3 && fields[1] == ":" { if cur != nil { cur[firstField] = lastField } } } return cur } func findSystemProxy() (*MacProxySettings, error) { ctx, cancelCtx := context.WithTimeout(context.Background(), time.Second) defer cancelCtx() cmd := exec.CommandContext(ctx, "scutil", "--proxy") out, err := cmd.CombinedOutput() if err != nil { return nil, err } se := parseOut(string(out)) if se == nil { return nil, errors.New("no scutil proxy settings") } return &MacProxySettings{ ExceptionsList: se.array("ExceptionsList"), ExcludeSimpleHostnames: se.boolean("ExcludeSimpleHostnames"), FTPPassive: se.boolean("FTPPassive"), HTTPEnable: se.boolean("HTTPEnable"), HTTPPort: se.string("HTTPPort"), HTTPProxy: se.string("HTTPProxy"), HTTPUser: se.string("HTTPUser"), HTTPSEnable: se.boolean("HTTPSEnable"), HTTPSPort: se.string("HTTPSPort"), HTTPSProxy: se.string("HTTPSProxy"), HTTPSUser: se.string("HTTPSUser"), SOCKSEnable: se.boolean("SOCKSEnable"), SOCKSPort: se.string("SOCKSPort"), SOCKSProxy: se.string("SOCKSProxy"), SOCKSUser: se.string("SOCKSUser"), ProxyAutoConfigEnable: se.boolean("ProxyAutoConfigEnable"), ProxyAutoDiscoveryEnable: se.boolean("ProxyAutoDiscoveryEnable"), ProxyAutoConfigURLString: se.string("ProxyAutoConfigURLString"), }, nil } // SOCKS5 support func newSystemDialer(forward *net.Dialer) Dialer { systemProxy, err := findSystemProxy() if err != nil { return forward } if systemProxy.SOCKSEnable && systemProxy.SOCKSProxy != "" { proxyURL := joinProxyURL("socks5", systemProxy.SOCKSProxy, systemProxy.SOCKSPort, systemProxy.SOCKSUser) return newDialerForHosts(proxyURL, forward, systemProxy.ExceptionsList, systemProxy.ExcludeSimpleHostnames) } return forward } func NewSystemDialer(forward *net.Dialer) Dialer { allProxy := getEnvAny("ALL_PROXY", "all_proxy") // follow ALL_PROXY noProxy := getEnvAny("NO_PROXY", "no_proxy") if allProxy == "" { return newSystemDialer(forward) } proxyURL, err := ParseURL(allProxy, "http://") if err != nil { return forward } return newDialer(proxyURL, forward, noProxy) } func systemProxyConfig() *httpproxy.Config { cfg := &httpproxy.Config{ HTTPProxy: getEnvAny("HTTP_PROXY", "http_proxy", "ALL_PROXY", "all_proxy"), HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy", "ALL_PROXY", "all_proxy"), NoProxy: getEnvAny("NO_PROXY", "no_proxy"), CGI: os.Getenv("REQUEST_METHOD") != "", } systemProxy, err := findSystemProxy() if err != nil { return cfg } if cfg.NoProxy == "" { cfg.NoProxy = strings.Join(systemProxy.ExceptionsList, ",") } // macOS proxy priority: protocol-specific proxy takes precedence over SOCKS // HTTP requests use HTTP proxy, HTTPS requests use HTTPS proxy // SOCKS is only used as fallback when no protocol-specific proxy is configured // Reference: Apple CFNetwork framework behavior // Configure HTTP proxy if cfg.HTTPProxy == "" { if systemProxy.HTTPEnable && systemProxy.HTTPProxy != "" { cfg.HTTPProxy = joinProxyURL("http", systemProxy.HTTPProxy, systemProxy.HTTPPort, systemProxy.HTTPUser).String() } else if systemProxy.SOCKSEnable && systemProxy.SOCKSProxy != "" { // Fallback to SOCKS if no HTTP proxy configured cfg.HTTPProxy = joinProxyURL("socks5", systemProxy.SOCKSProxy, systemProxy.SOCKSPort, systemProxy.SOCKSUser).String() } } // Configure HTTPS proxy if cfg.HTTPSProxy == "" { if systemProxy.HTTPSEnable && systemProxy.HTTPSProxy != "" { cfg.HTTPSProxy = joinProxyURL("https", systemProxy.HTTPSProxy, systemProxy.HTTPSPort, systemProxy.HTTPSUser).String() } else if systemProxy.SOCKSEnable && systemProxy.SOCKSProxy != "" { // Fallback to SOCKS if no HTTPS proxy configured cfg.HTTPSProxy = joinProxyURL("socks5", systemProxy.SOCKSProxy, systemProxy.SOCKSPort, systemProxy.SOCKSUser).String() } } return cfg } ================================================ FILE: modules/systemproxy/proxy_darwin_test.go ================================================ //go:build darwin package systemproxy import ( "encoding/json" "fmt" "net/http" "os" "reflect" "testing" "time" ) func TestFindSystemProxy(t *testing.T) { settings, err := findSystemProxy() if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) return } enc := json.NewEncoder(os.Stderr) enc.SetIndent("", " ") _ = enc.Encode(settings) } func TestSystemProxyConfig(t *testing.T) { cfg := systemProxyConfig() fmt.Fprintf(os.Stderr, "%v\n", cfg) } func TestConnectHackNews(t *testing.T) { client := &http.Client{ Transport: &http.Transport{ Proxy: NewSystemProxy(""), ForceAttemptHTTP2: true, MaxIdleConns: 100, IdleConnTimeout: 90 * time.Second, TLSHandshakeTimeout: 10 * time.Second, ExpectContinueTimeout: 1 * time.Second, }, } resp, err := client.Get("https://news.ycombinator.com/") if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) return } defer resp.Body.Close() // nolint fmt.Fprintf(os.Stderr, "%d %s\n", resp.StatusCode, resp.Status) for k, v := range resp.Header { if len(v) != 0 { fmt.Fprintf(os.Stderr, "%s: %s\n", k, v[0]) } } } func TestParseOut(t *testing.T) { tests := []struct { name string input string expectedHTTPProxy string expectedHTTPPort string expectedArray []string }{ { name: "simple dictionary", input: ` { HTTPEnable : 1 HTTPProxy : 127.0.0.1 HTTPPort : 7890 }`, expectedHTTPProxy: "127.0.0.1", expectedHTTPPort: "7890", }, { name: "array with numeric indices", input: ` { ExceptionsList : { 0 : first.com 1 : second.com 2 : third.com 10 : tenth.com 11 : eleventh.com } }`, expectedArray: []string{ "first.com", "second.com", "third.com", "tenth.com", "eleventh.com", }, }, { name: "complex proxy settings", input: ` { ExceptionsList : { 0 : 127.0.0.1 1 : localhost 2 : 192.168.0.0/16 } ExcludeSimpleHostnames : 1 FTPPassive : 1 SOCKSEnable : 1 SOCKSPort : 13659 SOCKSProxy : 127.0.0.1 }`, expectedArray: []string{ "127.0.0.1", "localhost", "192.168.0.0/16", }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { se := parseOut(tt.input) if se == nil { t.Fatal("parseOut returned nil") } if tt.expectedHTTPProxy != "" { got := se.string("HTTPProxy") if got != tt.expectedHTTPProxy { t.Errorf("HTTPProxy = %q, want %q", got, tt.expectedHTTPProxy) } } if tt.expectedHTTPPort != "" { got := se.string("HTTPPort") if got != tt.expectedHTTPPort { t.Errorf("HTTPPort = %q, want %q", got, tt.expectedHTTPPort) } } if tt.expectedArray != nil { got := se.array("ExceptionsList") if !reflect.DeepEqual(got, tt.expectedArray) { t.Errorf("ExceptionsList = %v, want %v", got, tt.expectedArray) } } }) } } func TestArraySortingWithLargeIndices(t *testing.T) { // Test sorting with large indices to verify numeric sorting instead of string sorting input := ` { ExceptionsList : { 0 : item0 1 : item1 2 : item2 9 : item9 10 : item10 11 : item11 100 : item100 101 : item101 } }` se := parseOut(input) if se == nil { t.Fatal("parseOut returned nil") } got := se.array("ExceptionsList") expected := []string{ "item0", "item1", "item2", "item9", "item10", "item11", "item100", "item101", } if !reflect.DeepEqual(got, expected) { t.Errorf("ExceptionsList sorting failed:\ngot: %v\nexpected: %v", got, expected) } } func TestParseOutEdgeCases(t *testing.T) { tests := []struct { name string input string shouldPanic bool }{ { name: "empty string", input: "", shouldPanic: false, }, { name: "only newlines", input: "\n\n\n", shouldPanic: false, }, { name: "malformed input without dictionary start", input: "HTTPEnable : 1", shouldPanic: false, }, { name: "malformed input with only field assignment", input: "SomeField : SomeValue\nAnotherField : AnotherValue", shouldPanic: false, }, { name: "unclosed dictionary", input: ` { HTTPEnable : 1`, shouldPanic: false, }, { name: "extra closing braces", input: ` { HTTPEnable : 1 } }`, shouldPanic: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // This should not panic result := parseOut(tt.input) // Result can be nil or empty section, both are valid t.Logf("parseOut returned: %v (nil=%v)", result, result == nil) }) } } ================================================ FILE: modules/systemproxy/proxy_others.go ================================================ //go:build !windows && !darwin package systemproxy import ( "net" "os" "golang.org/x/net/http/httpproxy" ) func NewSystemDialer(forward *net.Dialer) Dialer { allProxy := getEnvAny("ALL_PROXY", "all_proxy") noProxy := getEnvAny("NO_PROXY", "no_proxy") if allProxy == "" { return forward } proxyURL, err := ParseURL(allProxy, "http://") if err != nil { return forward } return newDialer(proxyURL, forward, noProxy) } func systemProxyConfig() *httpproxy.Config { return &httpproxy.Config{ HTTPProxy: getEnvAny("HTTP_PROXY", "http_proxy", "ALL_PROXY", "all_proxy"), HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy", "ALL_PROXY", "all_proxy"), NoProxy: getEnvAny("NO_PROXY", "no_proxy"), CGI: os.Getenv("REQUEST_METHOD") != "", } } ================================================ FILE: modules/systemproxy/proxy_test.go ================================================ package systemproxy import ( "reflect" "strings" "testing" ) // parseProxyOverrideForTest is a copy of parseProxyOverride for testing on non-Windows platforms // Windows format: "localhost;127.0.0.1;;*.example.com" // means bypass proxy for all local addresses (simple hostnames without dots) func parseProxyOverrideForTest(proxyOverride string) (hosts []string, bypassLocal bool) { items := strings.SplitSeq(proxyOverride, ";") for item := range items { item = strings.TrimSpace(item) if item == "" { continue } if strings.EqualFold(item, "") { bypassLocal = true continue } hosts = append(hosts, item) } return hosts, bypassLocal } // TestParseProxyOverride tests the Windows ProxyOverride parsing logic func TestParseProxyOverride(t *testing.T) { tests := []struct { name string input string expectedHosts []string expectedLocal bool }{ { name: "empty string", input: "", expectedHosts: nil, expectedLocal: false, }, { name: "simple hosts", input: "localhost;127.0.0.1;192.168.0.0/16", expectedHosts: []string{"localhost", "127.0.0.1", "192.168.0.0/16"}, expectedLocal: false, }, { name: "with local tag", input: "localhost;127.0.0.1;", expectedHosts: []string{"localhost", "127.0.0.1"}, expectedLocal: true, }, { name: "local tag only", input: "", expectedHosts: nil, expectedLocal: true, }, { name: "local tag with different case", input: "", expectedHosts: nil, expectedLocal: true, }, { name: "local tag mixed case", input: "", expectedHosts: nil, expectedLocal: true, }, { name: "with wildcards", input: "*.example.com;*.test.com;", expectedHosts: []string{"*.example.com", "*.test.com"}, expectedLocal: true, }, { name: "with spaces", input: " localhost ; 127.0.0.1 ; ", expectedHosts: []string{"localhost", "127.0.0.1"}, expectedLocal: true, }, { name: "multiple semicolons", input: "localhost;;127.0.0.1;;", expectedHosts: []string{"localhost", "127.0.0.1"}, expectedLocal: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { hosts, bypassLocal := parseProxyOverrideForTest(tt.input) if !reflect.DeepEqual(hosts, tt.expectedHosts) { t.Errorf("hosts = %v, want %v", hosts, tt.expectedHosts) } if bypassLocal != tt.expectedLocal { t.Errorf("bypassLocal = %v, want %v", bypassLocal, tt.expectedLocal) } }) } } ================================================ FILE: modules/systemproxy/proxy_windows.go ================================================ //go:build windows package systemproxy import ( "net" "os" "strings" "golang.org/x/net/http/httpproxy" "golang.org/x/sys/windows/registry" ) type windowsProxyConfig struct { ProxyServer string ProxyOverride string ProxyEnable uint64 AutoConfigURL string } // parseProxyServer parses Windows proxy server string into a map // Windows proxy format: "http=proxy.example.com:8080;https=proxy.example.com:8443;socks=proxy.example.com:1080" // or just "proxy.example.com:8080" for all protocols // Note: Keys are normalized to lowercase for case-insensitive matching func parseProxyServer(proxyServer string) map[string]string { protocol := make(map[string]string) for s := range strings.SplitSeq(proxyServer, ";") { if s == "" { continue } pair := strings.SplitN(s, "=", 2) if len(pair) > 1 { // Normalize key to lowercase for case-insensitive matching protocol[strings.ToLower(pair[0])] = pair[1] } else { protocol[""] = pair[0] } } return protocol } // getProtocolAny returns the first matching protocol value from the map // Keys are checked in order, returns empty string if none found func getProtocolAny(protocol map[string]string, keys ...string) string { for _, key := range keys { if v, ok := protocol[key]; ok { return v } } return "" } func fromWindowsProxy() (values windowsProxyConfig, err error) { var proxySettingsPerUser uint64 = 1 // 1 is the default value to consider current user k, err := registry.OpenKey(registry.LOCAL_MACHINE, `Software\Policies\Microsoft\Windows\CurrentVersion\Internet Settings`, registry.QUERY_VALUE) if err == nil { // We had used the below variable tempPrxUsrSettings, because the Golang method GetIntegerValue // sets the value to zero even it fails. tempPrxUsrSettings, _, err := k.GetIntegerValue("ProxySettingsPerUser") if err == nil { // consider the value of tempPrxUsrSettings if it is a success proxySettingsPerUser = tempPrxUsrSettings } _ = k.Close() } var hkey registry.Key if proxySettingsPerUser == 0 { hkey = registry.LOCAL_MACHINE } else { hkey = registry.CURRENT_USER } k, err = registry.OpenKey(hkey, `Software\Microsoft\Windows\CurrentVersion\Internet Settings`, registry.QUERY_VALUE) if err != nil { return } defer k.Close() // nolint values.ProxyServer, _, err = k.GetStringValue("ProxyServer") if err != nil && err != registry.ErrNotExist { return } values.ProxyOverride, _, err = k.GetStringValue("ProxyOverride") if err != nil && err != registry.ErrNotExist { return } values.ProxyEnable, _, err = k.GetIntegerValue("ProxyEnable") if err != nil && err != registry.ErrNotExist { return } values.AutoConfigURL, _, err = k.GetStringValue("AutoConfigURL") if err != nil && err != registry.ErrNotExist { return } err = nil return } // parseProxyOverride parses Windows ProxyOverride string and handles special tag // Windows format: "localhost;127.0.0.1;;*.example.com" // means bypass proxy for all local addresses (simple hostnames without dots) func parseProxyOverride(proxyOverride string) (hosts []string, bypassLocal bool) { for item := range strings.SplitSeq(proxyOverride, ";") { item = strings.TrimSpace(item) if item == "" { continue } // is a special tag in Windows that means bypass proxy for: // - Hostnames without dots (e.g., "server", "localhost") // - Does NOT include FQDNs or IP addresses if strings.EqualFold(item, "") { bypassLocal = true continue } hosts = append(hosts, item) } return hosts, bypassLocal } func newSystemDialer(forward *net.Dialer) Dialer { values, err := fromWindowsProxy() if err != nil || values.ProxyEnable < 1 { // not config or disabled return forward } noProxy, bypassLocal := parseProxyOverride(values.ProxyOverride) protocol := parseProxyServer(values.ProxyServer) // Priority: socks proxy > default proxy // SOCKS proxy is preferred for general dialing as it supports more protocols (TCP, UDP, etc.) // Default proxy (without protocol prefix) is typically HTTP proxy if socksProxy := getProtocolAny(protocol, "socks"); socksProxy != "" { if proxyURL, err := ParseURL(socksProxy, "socks5://"); err == nil { return newDialerForHosts(proxyURL, forward, noProxy, bypassLocal) } } if defaultProxy := getProtocolAny(protocol, ""); defaultProxy != "" { if proxyURL, err := ParseURL(defaultProxy, "http://"); err == nil { return newDialerForHosts(proxyURL, forward, noProxy, bypassLocal) } } return forward } func NewSystemDialer(forward *net.Dialer) Dialer { allProxy := getEnvAny("ALL_PROXY", "all_proxy") noProxy := getEnvAny("NO_PROXY", "no_proxy") if allProxy == "" { return newSystemDialer(forward) } proxyURL, err := ParseURL(allProxy, "http://") if err != nil { return forward } return newDialer(proxyURL, forward, noProxy) } func systemProxyConfig() *httpproxy.Config { cfg := &httpproxy.Config{ HTTPProxy: getEnvAny("HTTP_PROXY", "http_proxy", "ALL_PROXY", "all_proxy"), HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy", "ALL_PROXY", "all_proxy"), NoProxy: getEnvAny("NO_PROXY", "no_proxy"), CGI: os.Getenv("REQUEST_METHOD") != "", } if cfg.HTTPProxy != "" || cfg.HTTPSProxy != "" { return cfg } values, err := fromWindowsProxy() if err != nil || values.ProxyEnable < 1 { // not config or disabled return cfg } protocol := parseProxyServer(values.ProxyServer) if cfg.NoProxy == "" { // Parse ProxyOverride and convert to standard NoProxy format noProxyHosts, bypassLocal := parseProxyOverride(values.ProxyOverride) var noProxyParts []string for _, host := range noProxyHosts { // Convert Windows format to standard format noProxyParts = append(noProxyParts, host) } if bypassLocal { // For , add common local patterns // Note: httpproxy.Config doesn't natively support "simple hostname" concept, // so we add common local addresses noProxyParts = append(noProxyParts, "localhost", "127.0.0.1", "::1") } cfg.NoProxy = strings.Join(noProxyParts, ",") } // Windows proxy priority: protocol-specific proxy takes precedence over SOCKS // HTTP requests use HTTP proxy, HTTPS requests use HTTPS proxy // SOCKS is only used as fallback when no protocol-specific proxy is configured // Reference: WinHTTP proxy configuration behavior // Configure HTTP proxy if cfg.HTTPProxy == "" { if httpProxy := getProtocolAny(protocol, "http"); httpProxy != "" { cfg.HTTPProxy = httpProxy } else if socksProxy := getProtocolAny(protocol, "socks"); socksProxy != "" { // Fallback to SOCKS if no HTTP proxy configured cfg.HTTPProxy = "socks5://" + socksProxy } else if defaultProxy := getProtocolAny(protocol, ""); defaultProxy != "" { cfg.HTTPProxy = defaultProxy } } // Configure HTTPS proxy if cfg.HTTPSProxy == "" { if httpsProxy := getProtocolAny(protocol, "https"); httpsProxy != "" { cfg.HTTPSProxy = httpsProxy } else if socksProxy := getProtocolAny(protocol, "socks"); socksProxy != "" { // Fallback to SOCKS if no HTTPS proxy configured cfg.HTTPSProxy = "socks5://" + socksProxy } else if defaultProxy := getProtocolAny(protocol, ""); defaultProxy != "" { cfg.HTTPSProxy = defaultProxy } } return cfg } ================================================ FILE: modules/systemproxy/socks5.go ================================================ // Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package systemproxy import ( "context" "net" "github.com/antgroup/hugescm/modules/systemproxy/internal/socks" ) // SOCKS5 returns a Dialer that makes SOCKSv5 connections to the given // address with an optional username and password. // See RFC 1928 and RFC 1929. func SOCKS5(network, address string, auth *Auth, forward Dialer) (Dialer, error) { d := socks.NewDialer(network, address) if forward != nil { d.ProxyDial = func(ctx context.Context, network string, address string) (net.Conn, error) { return forward.DialContext(ctx, network, address) } } if auth != nil { d.AuthMethods = []socks.AuthMethod{socks.AuthMethodNotRequired, socks.AuthMethodUsernamePassword} d.Authenticate = (&socks.UsernamePassword{Username: auth.User, Password: auth.Password}).Authenticate } return d, nil } ================================================ FILE: modules/systemproxy/url.go ================================================ package systemproxy import ( "net/url" "strings" ) // ParseURL parses a URL string with an optional default scheme. // If rawURL already contains "://", it's parsed as-is. // Otherwise, defaultScheme is prepended before parsing. func ParseURL(rawURL string, defaultScheme string) (*url.URL, error) { if strings.Contains(rawURL, "://") { return url.Parse(rawURL) } // Ensure defaultScheme ends with "://" if !strings.HasSuffix(defaultScheme, "://") { defaultScheme += "://" } return url.Parse(defaultScheme + rawURL) } ================================================ FILE: modules/systemproxy/url_test.go ================================================ package systemproxy import ( "testing" ) func TestParseURL(t *testing.T) { tests := []struct { name string rawURL string defaultScheme string wantScheme string wantHost string wantErr bool }{ { name: "URL with scheme", rawURL: "http://proxy.example.com:8080", defaultScheme: "http://", wantScheme: "http", wantHost: "proxy.example.com:8080", wantErr: false, }, { name: "URL without scheme - http default", rawURL: "proxy.example.com:8080", defaultScheme: "http://", wantScheme: "http", wantHost: "proxy.example.com:8080", wantErr: false, }, { name: "URL without scheme - https default", rawURL: "proxy.example.com:8443", defaultScheme: "https://", wantScheme: "https", wantHost: "proxy.example.com:8443", wantErr: false, }, { name: "URL without scheme - socks5 default", rawURL: "proxy.example.com:1080", defaultScheme: "socks5://", wantScheme: "socks5", wantHost: "proxy.example.com:1080", wantErr: false, }, { name: "URL without scheme - default without suffix", rawURL: "proxy.example.com:8080", defaultScheme: "http", wantScheme: "http", wantHost: "proxy.example.com:8080", wantErr: false, }, { name: "SOCKS5 URL with authentication", rawURL: "socks5://user:password@proxy.example.com:1080", defaultScheme: "http://", wantScheme: "socks5", wantHost: "proxy.example.com:1080", wantErr: false, }, { name: "HTTP URL with authentication", rawURL: "http://user:password@proxy.example.com:8080", defaultScheme: "http://", wantScheme: "http", wantHost: "proxy.example.com:8080", wantErr: false, }, { name: "Empty URL", rawURL: "", defaultScheme: "http://", wantScheme: "", wantHost: "", wantErr: false, // url.Parse accepts empty string }, { name: "IP address without scheme", rawURL: "127.0.0.1:8080", defaultScheme: "http://", wantScheme: "http", wantHost: "127.0.0.1:8080", wantErr: false, }, { name: "IP address with scheme", rawURL: "https://127.0.0.1:8443", defaultScheme: "http://", wantScheme: "https", wantHost: "127.0.0.1:8443", wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got, err := ParseURL(tt.rawURL, tt.defaultScheme) if (err != nil) != tt.wantErr { t.Errorf("ParseURL() error = %v, wantErr %v", err, tt.wantErr) return } if tt.wantErr { return } if tt.wantScheme != "" && got.Scheme != tt.wantScheme { t.Errorf("ParseURL() scheme = %v, want %v", got.Scheme, tt.wantScheme) } if tt.wantHost != "" && got.Host != tt.wantHost { t.Errorf("ParseURL() host = %v, want %v", got.Host, tt.wantHost) } }) } } ================================================ FILE: modules/term/color.go ================================================ package term // Red returns the string s wrapped in red ANSI color codes. // The color format depends on the Level: // - Level16M: Uses RGB #f43b47 (truecolor) // - Level256: Uses standard ANSI red // - LevelNone: Returns s unchanged func (v Level) Red(s string) string { switch v { case Level16M: // #f43b47 return "\x1b[38;2;244;59;71m" + s + "\x1b[0m" case Level256: // \e[0;31m Red return "\x1b[31m" + s + "\x1b[0m" default: } return s } // Green returns the string s wrapped in green ANSI color codes. // The color format depends on the Level: // - Level16M: Uses RGB #43e97a (truecolor) // - Level256: Uses standard ANSI green // - LevelNone: Returns s unchanged func (v Level) Green(s string) string { switch v { case Level16M: // #43e97a return "\x1b[38;2;67;233;123m" + s + "\x1b[0m" case Level256: // \e[0;32m Green return "\x1b[32m" + s + "\x1b[0m" default: } return s } // Yellow returns the string s wrapped in yellow ANSI color codes. // The color format depends on the Level: // - Level16M: Uses RGB #fee240 (truecolor) // - Level256: Uses standard ANSI yellow // - LevelNone: Returns s unchanged func (v Level) Yellow(s string) string { switch v { case Level16M: // #fee240 return "\x1b[38;2;254;225;64m" + s + "\x1b[0m" case Level256: // \e[0;33m Yellow return "\x1b[33m" + s + "\x1b[0m" default: } return s } // Blue returns the string s wrapped in blue ANSI color codes. // The color format depends on the Level: // - Level16M: Uses RGB #00c8ff (truecolor) // - Level256: Uses standard ANSI blue // - LevelNone: Returns s unchanged func (v Level) Blue(s string) string { switch v { case Level16M: // #00c8ff return "\x1b[38;2;0;201;255m" + s + "\x1b[0m" case Level256: // \e[0;34m Blue return "\x1b[34m" + s + "\x1b[0m" default: } return s } // Purple returns the string s wrapped in purple ANSI color codes. // The color format depends on the Level: // - Level16M: Uses RGB #7028e4 (truecolor) // - Level256: Uses standard ANSI purple // - LevelNone: Returns s unchanged func (v Level) Purple(s string) string { switch v { case Level16M: // #7028e4 return "\x1b[38;2;112;40;228m" + s + "\x1b[0m" case Level256: // \e[0;35m Purple return "\x1b[35m" + s + "\x1b[0m" default: } return s } ================================================ FILE: modules/term/fmt.go ================================================ package term import ( "fmt" "io" "os" "github.com/charmbracelet/x/ansi" ) // Fprintf formats according to a format specifier and writes to w. // It respects the global StderrLevel and StdoutLevel settings: // - If w is os.Stdout and StdoutLevel is LevelNone, ANSI codes are stripped // - If w is os.Stderr and StderrLevel is LevelNone, ANSI codes are stripped // - Otherwise, output is passed through unchanged // // This allows TUI applications to automatically disable colors when // the output is redirected to a file or pipe. func Fprintf(w io.Writer, format string, a ...any) (int, error) { switch { case w == os.Stderr && StderrLevel == LevelNone: out := fmt.Sprintf(format, a...) return os.Stderr.WriteString(ansi.Strip(out)) case w == os.Stdout && StdoutLevel == LevelNone: out := fmt.Sprintf(format, a...) return os.Stdout.WriteString(ansi.Strip(out)) default: } return fmt.Fprintf(w, format, a...) } ================================================ FILE: modules/term/fmt_test.go ================================================ package term import ( "fmt" "os" "testing" "unicode" "github.com/charmbracelet/x/ansi" ) func TestStripAnsi(t *testing.T) { ss := fmt.Sprintf("\x1b[38;2;254;225;64m* %s jack\x1b[0m", os.Args[0]) as := ansi.Strip(ss) fmt.Fprintf(os.Stderr, "%s\n", as) } func TestCygwinTerminal(t *testing.T) { fmt.Fprintf(os.Stderr, "IsCygwinTerminal: %v\n", IsCygwinTerminal(os.Stderr.Fd())) } func TestSanitized(t *testing.T) { ss := []string{ "error: Have you \033[31mread\033[m this?\a\n", fmt.Sprintf("\x1b[38;2;254;225;64m* %s jack\x1b[0m", os.Args[0]), } for i, s := range ss { s1 := SanitizeANSI(s, true) s2 := SanitizeANSI(s, false) fmt.Fprintf(os.Stderr, "round %d\n%s\x1b[0m\n%s\x1b[0m\n", i, s1, s2) } } func TestTable(t *testing.T) { table := make([]int, 0, 256) for i := range 256 { // iscntrl: i < 0x20 || i == 0x7f if i < 0x20 || i == 0x7f { table = append(table, CHAR_CONTROL) continue } if unicode.IsDigit(rune(i)) || i == ';' || i == ':' { table = append(table, CHAR_COLOR_SEQUENCE) continue } table = append(table, CHAR_UNSPECIFIED) } for i, b := range table { if i%16 == 0 && i != 0 { fmt.Fprintf(os.Stderr, "\n") } fmt.Fprintf(os.Stderr, "%d,", b) } } func TestSanitizedF(t *testing.T) { _, _ = SanitizedF("remote: %s\n", "objects 已验证") _, _ = SanitizedF("remote: %s\n", "objects 你好") } ================================================ FILE: modules/term/sanitized.go ================================================ package term import ( "fmt" "os" "strings" ) const ( CHAR_UNSPECIFIED = 0 CHAR_COLOR_SEQUENCE = 1 CHAR_CONTROL = 2 ) var ( // charIndex is a lookup table for quick character classification. // Index corresponds to ASCII code (0-255), values are CHAR_* constants. charIndex = []byte{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } ) // handleAnsiColorSequence parses an ANSI color sequence at the start of text. // If the sequence is valid and allowColor is true, it's written to b. // Returns the length of the sequence consumed, or 0 if invalid. // // Valid format: ESC [ [ [; ]*] m // // References: // - https://github.com/gitgitgadget/git/pull/1853 // - https://public-inbox.org/git/Z4bqMYKRP7Gva5St@tapette.crustytoothpaste.net/T/#t func handleAnsiColorSequence(b *strings.Builder, text []byte, allowColor bool) int { /* * Valid ANSI color sequences are of the form * * ESC [ [ [; ]*] m */ if len(text) < 3 || text[0] != '\x1b' || text[1] != '[' { return 0 } for i := 2; i < len(text); i++ { c := text[i] if c == 'm' { if allowColor { _, _ = b.Write(text[:i+1]) } return i } if charIndex[c] != CHAR_COLOR_SEQUENCE { break } } return 0 } // SanitizeANSI sanitizes ANSI sequences in content for safe terminal output. // // Behavior: // - If allowColor is true: ANSI color sequences are preserved // - If allowColor is false: All ANSI sequences are removed // - Control characters (except tab and newline) are converted to caret notation (^G, etc.) // // This is useful for displaying untrusted or external output safely in a TUI. func SanitizeANSI(content string, allowColor bool) string { b := &strings.Builder{} text := []byte(content) b.Grow(len(content)) for i := 0; i < len(text); i++ { c := text[i] if charIndex[c] != CHAR_CONTROL || c == '\t' || c == '\n' { _ = b.WriteByte(c) continue } if j := handleAnsiColorSequence(b, text[i:], allowColor); j != 0 { i += j continue } _ = b.WriteByte('^') _ = b.WriteByte(c + 0x40) } return b.String() } // SanitizedF formats according to a format specifier, sanitizes the result, // and writes it to stderr. Color sequences are preserved based on StderrLevel. // // This is a convenience function for safely printing formatted output to stderr // in TUI applications, ensuring control characters are converted to caret notation. func SanitizedF(format string, a ...any) (int, error) { content := fmt.Sprintf(format, a...) return os.Stderr.WriteString(SanitizeANSI(content, StderrLevel != LevelNone)) } ================================================ FILE: modules/term/terminal.go ================================================ package term import ( "os" "strings" "golang.org/x/term" ) // Level represents the color support level of a terminal. // // The levels are: // - LevelNone: No color support, ANSI codes are stripped // - Level256: 256-color palette support (standard ANSI colors) // - Level16M: 16 million colors (24-bit truecolor/RGB) support type Level int const ( LevelNone Level = iota Level256 Level16M ) // SupportColor returns true if the terminal supports any color output. func (level Level) SupportColor() bool { return level > LevelNone } var ( // StderrLevel is the detected color support level for stderr. StderrLevel Level // StdoutLevel is the detected color support level for stdout. StdoutLevel Level ) // isFalse checks if a string value represents a false/negative value. // Recognized false values: false, off, 0, no (case-insensitive). func isFalse(s string) bool { s = strings.ToLower(s) return s == "false" || s == "off" || s == "0" || s == "no" } // detectForceColor checks the FORCE_COLOR environment variable and returns // the forced color level along with a boolean indicating if forcing is enabled. // // FORCE_COLOR values: // - 0, false, off, no: No color (LevelNone) // - 3: Truecolor (Level16M) // - any other value: 256-color (Level256) func detectForceColor() (Level, bool) { forceColorEnv, ok := os.LookupEnv("FORCE_COLOR") if !ok { return LevelNone, false } if isFalse(forceColorEnv) { return LevelNone, true } if forceColorEnv == "3" { return Level16M, true } return Level256, true } // https://github.com/gui-cs/Terminal.Gui/issues/48 // https://github.com/termstandard/colors // https://github.com/microsoft/terminal/issues/11057 // https://marvinh.dev/blog/terminal-colors/ // https://github.com/microsoft/terminal/issues/13006 // https://github.com/termstandard/colors/issues/69 Terminal.app for macOS Tahoe supports truecolor var ( // termSupports maps terminal program names to their color capabilities. // This list includes known terminals that support 16M colors. termSupports = map[string]Level{ "mintty": Level16M, "iTerm.app": Level16M, "WezTerm": Level16M, } ) // detectColorLevel detects the terminal's color support capability by checking // various environment variables and terminal type indicators. // // Detection order: // 1. Windows Terminal (WT_SESSION env var) // 2. Known terminal programs (TERM_PROGRAM env var) // 3. COLORTERM and TERM env vars for truecolor/256color keywords // 4. Platform-specific detection (Cygwin/Windows console) func detectColorLevel() Level { // detect Windows Terminal if _, ok := os.LookupEnv("WT_SESSION"); ok { return Level16M } if termApp, ok := os.LookupEnv("TERM_PROGRAM"); ok { if colorLevel, ok := termSupports[termApp]; ok { return colorLevel } } colorTermEnv := os.Getenv("COLORTERM") termEnv := os.Getenv("TERM") if strings.Contains(termEnv, "24bit") || strings.Contains(termEnv, "truecolor") || strings.Contains(colorTermEnv, "24bit") || strings.Contains(colorTermEnv, "truecolor") { return Level16M } if strings.Contains(termEnv, "256") || strings.Contains(colorTermEnv, "256") { return Level256 } return detectColorLevelHijack() } func init() { // Detect FORCE_COLOR and override detection if colorLevel, ok := detectForceColor(); ok { StderrLevel = colorLevel StdoutLevel = colorLevel return } // Detect NO_COLOR (https://no-color.org/) if noColor, ok := os.LookupEnv("NO_COLOR"); ok && !isFalse(noColor) { return } // Auto-detect color level from environment colorLevel := detectColorLevel() if IsTerminal(os.Stderr.Fd()) { StderrLevel = colorLevel } if IsTerminal(os.Stdout.Fd()) { StdoutLevel = colorLevel } } // IsTerminal returns true if the given file descriptor is connected to a terminal. // This works for both native terminals and Cygwin/MSYS2 pseudo-terminals. func IsTerminal(fd uintptr) bool { return term.IsTerminal(int(fd)) || IsCygwinTerminal(fd) } // IsNativeTerminal returns true if the given file descriptor is a native terminal // (not a Cygwin/MSYS2 pseudo-terminal). func IsNativeTerminal(fd uintptr) bool { return term.IsTerminal(int(fd)) } // GetSize returns the dimensions of the terminal for the given file descriptor. // Returns width, height in characters, and any error encountered. func GetSize(fd int) (width, height int, err error) { return term.GetSize(fd) } ================================================ FILE: modules/term/terminal_others.go ================================================ //go:build !windows package term func IsCygwinTerminal(fd uintptr) bool { return false } func detectColorLevelHijack() Level { return LevelNone } ================================================ FILE: modules/term/terminal_windows.go ================================================ package term import ( "os" "strings" "syscall" "unsafe" "golang.org/x/sys/windows" ) var ( kernel32 = syscall.NewLazyDLL("kernel32.dll") procGetFileInformationByHandleEx = kernel32.NewProc("GetFileInformationByHandleEx") ) // isCygwinPipeName checks if a pipe name indicates a Cygwin/MSYS2 pseudo-terminal. // Cygwin/MSYS2 PTY pipe names follow the pattern: // // \{cygwin,msys}-XXXXXXXXXXXXXXXX-ptyN-{from,to}-master // // This function is used by IsCygwinTerminal to detect these emulated terminals. func isCygwinPipeName(name string) bool { token := strings.Split(name, "-") if len(token) < 5 { return false } if token[0] != `\msys` && token[0] != `\cygwin` && token[0] != `\Device\NamedPipe\msys` && token[0] != `\Device\NamedPipe\cygwin` { return false } if token[1] == "" { return false } if !strings.HasPrefix(token[2], "pty") { return false } if token[3] != `from` && token[3] != `to` { return false } if token[4] != "master" { return false } return true } // FILE_NAME_INFO structure used by GetFileInformationByHandleEx. // Receives the file name. Used for any handles. type FILE_NAME_INFO struct { FileNameLength uint32 FileName [512]uint16 } // GetFileInformationByHandleEx retrieves file information for the specified file. // This is a wrapper around the Windows API of the same name. func GetFileInformationByHandleEx(hFile syscall.Handle, fileInformationClass uint32, lpFileInformation unsafe.Pointer, dwBufferSize uint32) error { r1, _, err := procGetFileInformationByHandleEx.Call( uintptr(hFile), uintptr(fileInformationClass), uintptr(lpFileInformation), uintptr(dwBufferSize), ) if r1 == 1 { return nil } return err } const ( FILE_NAME_INFO_BY_HANDLE = 2 ) // IsCygwinTerminal returns true if the file descriptor is connected to a // Cygwin or MSYS2 pseudo-terminal. These terminals use named pipes rather // than native Windows console APIs. func IsCygwinTerminal(fd uintptr) bool { var fi FILE_NAME_INFO bufferSize := uint32(unsafe.Sizeof(fi)) if err := GetFileInformationByHandleEx(syscall.Handle(fd), FILE_NAME_INFO_BY_HANDLE, unsafe.Pointer(&fi), bufferSize); err != nil { return false } fileName := windows.UTF16ToString(fi.FileName[:fi.FileNameLength/2]) return isCygwinPipeName(fileName) } // detectColorLevelHijack detects Windows console color support and enables // virtual terminal processing if needed. // // This function: // 1. Attempts to get the current console mode // 2. Enables virtual terminal processing (VT100/ANSI escape sequences) if disabled // 3. Determines color support based on Windows version: // - Windows 10 build 14931+: 16M colors (truecolor) // - Windows 10 build 10586+: 256 colors // - Earlier versions: No color support // // References: // - https://github.com/microsoft/terminal/issues/11057#issuecomment-1493118152 // - https://github.com/microsoft/terminal/issues/13006 func detectColorLevelHijack() Level { var mode uint32 handle := windows.Handle(os.Stderr.Fd()) if err := windows.GetConsoleMode(handle, &mode); err != nil { handle = windows.Handle(os.Stdout.Fd()) if err := windows.GetConsoleMode(handle, &mode); err != nil { return LevelNone } } // VT detect and vt enabled if mode&windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING != windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING { mode = mode | windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING if err := windows.SetConsoleMode(handle, mode); err != nil { return LevelNone } } major, minor, build := windows.RtlGetNtVersionNumbers() if major > 10 || (major == 10 && minor >= 1) || (major == 10 && minor == 0 && build > 14931) { return Level16M } if major == 10 && build > 10586 { return Level256 } return LevelNone } ================================================ FILE: modules/trace/error.go ================================================ package trace import ( "errors" "fmt" "os" "runtime" "strings" "time" "github.com/sirupsen/logrus" ) func Location(skip int) (string, int) { pc, _, line, ok := runtime.Caller(skip) if !ok { return "?", line } fn := runtime.FuncForPC(pc) if fn == nil { return "?", line } return fn.Name(), line } func Errorf(format string, a ...any) error { fn, line := Location(2) msg := fmt.Sprintf(format, a...) logrus.Error(fn, ":", line, " ", msg) return errors.New(msg) } type Tracker struct { debug bool last time.Time } func NewTracker(debugMode bool) *Tracker { return &Tracker{debug: debugMode, last: time.Now()} } func (t *Tracker) StepNext(format string, a ...any) { if !t.debug { return } s := fmt.Sprintf(format, a...) now := time.Now() fmt.Fprintf(os.Stderr, "\x1b[35m* %s use time: %v\x1b[0m\n", strings.Trim(s, "\n"), now.Sub(t.last)) t.last = now } ================================================ FILE: modules/trace/trace.go ================================================ package trace import ( "bytes" "fmt" "os" "strings" "github.com/antgroup/hugescm/modules/term" ) var ( verbose bool ) func EnableDebugMode() { verbose = true } func DbgPrint(format string, args ...any) { if !verbose { return } message := fmt.Sprintf(format, args...) var buffer bytes.Buffer switch term.StderrLevel { case term.Level16M: for s := range strings.SplitSeq(message, "\n") { _, _ = buffer.WriteString("\x1b[38;2;254;225;64m* ") _, _ = buffer.WriteString(s) _, _ = buffer.WriteString("\x1b[0m\n") } case term.Level256: for s := range strings.SplitSeq(message, "\n") { _, _ = buffer.WriteString("\x1b[33m* ") _, _ = buffer.WriteString(s) _, _ = buffer.WriteString("\x1b[0m\n") } default: for s := range strings.SplitSeq(message, "\n") { _, _ = buffer.WriteString(s) _ = buffer.WriteByte('\n') } } _, _ = os.Stderr.Write(buffer.Bytes()) } ================================================ FILE: modules/trace/trace_test.go ================================================ package trace import ( "testing" "github.com/antgroup/hugescm/modules/term" ) func TestDebug(t *testing.T) { term.StderrLevel = term.Level256 verbose = true DbgPrint("jack") } ================================================ FILE: modules/tui/color.go ================================================ package tui import ( "maps" "os" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/diferenco/color" "github.com/antgroup/hugescm/modules/term" ) // DiffTheme defines color scheme for diff output. type DiffTheme struct { Dark map[color.ColorKey]string Light map[color.ColorKey]string } // Predefined diff themes. var ( // GitHub theme (default). GitHub = DiffTheme{ Dark: map[color.ColorKey]string{ color.Old: "\x1b[38;2;248;81;73m", // #f85149 red color.New: "\x1b[38;2;63;185;80m", // #3fb950 green color.Frag: "\x1b[38;2;88;166;255m", // #58a6ff blue color.Commit: "\x1b[38;2;210;153;34m", // #d29922 yellow }, Light: map[color.ColorKey]string{ color.Old: "\x1b[38;2;215;58;73m", // #d73a49 red color.New: "\x1b[38;2;40;167;69m", // #28a745 green color.Frag: "\x1b[38;2;0;92;197m", // #005cc5 blue color.Commit: "\x1b[38;2;176;136;0m", // #b08800 yellow }, } // Dracula theme. Dracula = DiffTheme{ Dark: map[color.ColorKey]string{ color.Old: "\x1b[38;2;255;85;85m", // #ff5555 red color.New: "\x1b[38;2;80;250;123m", // #50fa7b green color.Frag: "\x1b[38;2;139;233;253m", // #8be9fd cyan color.Commit: "\x1b[38;2;241;250;140m", // #f1fa8c yellow }, Light: map[color.ColorKey]string{ color.Old: "\x1b[38;2;215;58;73m", // same as GitHub light color.New: "\x1b[38;2;40;167;69m", color.Frag: "\x1b[38;2;0;92;197m", color.Commit: "\x1b[38;2;176;136;0m", }, } // OneDark theme. OneDark = DiffTheme{ Dark: map[color.ColorKey]string{ color.Old: "\x1b[38;2;224;108;117m", // #e06c75 red color.New: "\x1b[38;2;152;195;121m", // #98c379 green color.Frag: "\x1b[38;2;97;175;239m", // #61afef blue color.Commit: "\x1b[38;2;209;154;102m", // #d19a66 orange }, Light: map[color.ColorKey]string{ color.Old: "\x1b[38;2;228;86;73m", // #e45649 red color.New: "\x1b[38;2;80;161;79m", // #50a14f green color.Frag: "\x1b[38;2;56;125;203m", // #387dcb blue color.Commit: "\x1b[38;2;188;122;0m", // #bc7a00 orange }, } // Catppuccin theme. Catppuccin = DiffTheme{ Dark: map[color.ColorKey]string{ color.Old: "\x1b[38;2;243;139;168m", // #f38ba8 red color.New: "\x1b[38;2;166;227;161m", // #a6e3a1 green color.Frag: "\x1b[38;2;137;180;250m", // #89b4fa blue color.Commit: "\x1b[38;2;249;226;175m", // #f9e2af yellow }, Light: map[color.ColorKey]string{ color.Old: "\x1b[38;2;210;15;57m", // #d20f39 red color.New: "\x1b[38;2;64;160;43m", // #40a02b green color.Frag: "\x1b[38;2;30;102;245m", // #1e66f5 blue color.Commit: "\x1b[38;2;223;142;29m", // #df8e1d yellow }, } // Nord theme. Nord = DiffTheme{ Dark: map[color.ColorKey]string{ color.Old: "\x1b[38;2;191;97;106m", // #bf616a red color.New: "\x1b[38;2;163;190;140m", // #a3be8c green color.Frag: "\x1b[38;2;136;192;208m", // #88c0d0 cyan color.Commit: "\x1b[38;2;235;203;139m", // #ebcb8b yellow }, Light: map[color.ColorKey]string{ color.Old: "\x1b[38;2;191;97;106m", // same as dark color.New: "\x1b[38;2;163;190;140m", color.Frag: "\x1b[38;2;136;192;208m", color.Commit: "\x1b[38;2;235;203;139m", }, } // Current theme (can be changed). currentTheme = Dracula ) // SetDiffTheme sets the current diff theme. func SetDiffTheme(theme DiffTheme) { currentTheme = theme } // EncoderOptions returns diferenco.EncoderOption slice with appropriate color // configuration based on the terminal's color level. func EncoderOptions(level term.Level) []diferenco.EncoderOption { cc := color.ColorConfig{ color.Context: color.Normal, color.Meta: color.Bold, color.Whitespace: color.BgRed, color.Func: color.Normal, color.OldMoved: color.BoldMagenta, color.OldMovedAlternative: color.BoldBlue, color.OldMovedDimmed: color.Faint, color.OldMovedAlternativeDimmed: color.FaintItalic, color.NewMoved: color.BoldCyan, color.NewMovedAlternative: color.BoldYellow, color.NewMovedDimmed: color.Faint, color.NewMovedAlternativeDimmed: color.FaintItalic, color.ContextDimmed: color.Faint, color.OldDimmed: color.FaintRed, color.NewDimmed: color.FaintGreen, color.ContextBold: color.Bold, color.OldBold: color.BoldRed, color.NewBold: color.BoldGreen, } switch level { case term.Level16M: // Use truecolor with current theme based on background theme := currentTheme.Dark if !lipgloss.HasDarkBackground(os.Stdin, os.Stdout) { theme = currentTheme.Light } maps.Copy(cc, theme) case term.Level256: cc[color.Old] = color.Red cc[color.New] = color.Green cc[color.Frag] = color.Cyan cc[color.Commit] = color.Yellow default: return nil } return []diferenco.EncoderOption{diferenco.WithColor(cc)} } ================================================ FILE: modules/tui/confirm.go ================================================ package tui import ( "os" "charm.land/huh/v2" "charm.land/lipgloss/v2" "charm.land/lipgloss/v2/compat" ) // Color definitions for huh theme var ( normalFg = compat.AdaptiveColor{Light: lipgloss.Color("235"), Dark: lipgloss.Color("252")} fuchsia = lipgloss.Color("#F780E2") green = compat.AdaptiveColor{Light: lipgloss.Color("#02BA84"), Dark: lipgloss.Color("#02BF87")} ) // baseTheme returns a custom theme for huh widgets. func baseTheme() huh.Theme { return huh.ThemeFunc(func(isDark bool) *huh.Styles { t := huh.ThemeBase(isDark) t.Focused.Title = t.Focused.Title.Foreground(blue).Bold(true) t.Focused.ErrorIndicator = t.Focused.ErrorIndicator.Foreground(red) t.Focused.ErrorMessage = t.Focused.ErrorMessage.Foreground(red) t.Focused.TextInput.Cursor = t.Focused.TextInput.Cursor.Foreground(green) t.Focused.TextInput.Placeholder = t.Focused.TextInput.Placeholder.Foreground(compat.AdaptiveColor{Light: lipgloss.Color("248"), Dark: lipgloss.Color("238")}) t.Focused.TextInput.Prompt = t.Focused.TextInput.Prompt.Foreground(fuchsia) t.Focused.TextInput.Text = t.Focused.TextInput.Text.Foreground(normalFg) t.Blurred = t.Focused t.Blurred.TextInput.Cursor = lipgloss.NewStyle() return t }) } // AskConfirm prompts for a confirmation using huh library. // It provides a user-friendly yes/no confirmation dialog. // // Note: Output goes to stderr to avoid interfering with stdout piping. func AskConfirm(confirm *bool, format string, a ...any) error { c := huh.NewConfirm().Title(askTitle(format, a...)).Inline(true).Value(confirm).WithTheme(baseTheme()) return c.RunAccessible(os.Stderr, os.Stdin) } ================================================ FILE: modules/tui/input.go ================================================ package tui import ( "bufio" "errors" "fmt" "io" "os" "strings" "unicode" "unicode/utf8" "charm.land/lipgloss/v2" "charm.land/lipgloss/v2/compat" "golang.org/x/term" ) // ErrInterrupted is returned when user presses Ctrl+C or Ctrl+D. var ErrInterrupted = errors.New("interrupted") const maxAttempts = 3 // Color definitions (package-level to avoid recreation) var ( blue = compat.AdaptiveColor{Light: lipgloss.Color("#ace0f9"), Dark: lipgloss.Color("#ace0f9")} red = compat.AdaptiveColor{Light: lipgloss.Color("#FF4672"), Dark: lipgloss.Color("#ED567A")} ) var ( errorStyle = lipgloss.NewStyle().Foreground(red) titleStyle = lipgloss.NewStyle().Foreground(blue).Bold(true) ) // askTitle formats a title with a prefix. func askTitle(format string, a ...any) string { return "? " + fmt.Sprintf(format, a...) } // readLine reads a line with proper CJK/emoji backspace handling. // mask=0 shows input directly; otherwise each rune is replaced by mask. func readLine(mask rune, format string, a ...any) (string, error) { title := titleStyle.Render(askTitle(format, a...)) _, _ = lipgloss.Fprint(os.Stderr, title) fd := int(os.Stdin.Fd()) oldState, err := term.MakeRaw(fd) if err != nil { return "", fmt.Errorf("failed to set raw mode: %w", err) } defer term.Restore(fd, oldState) // nolint var inputRunes []rune reader := bufio.NewReader(os.Stdin) for { r, _, err := reader.ReadRune() if err != nil { if errors.Is(err, io.EOF) { fmt.Fprint(os.Stderr, "\r\n") return "", ErrInterrupted } return "", fmt.Errorf("read error: %w", err) } switch r { case '\r', '\n': fmt.Fprint(os.Stderr, "\r\n") return string(inputRunes), nil case 127, 8: // Backspace if len(inputRunes) > 0 { inputRunes = inputRunes[:len(inputRunes)-1] redrawLine(title, inputRunes, mask) } case 3: // Ctrl+C fmt.Fprint(os.Stderr, "\r\n") return "", ErrInterrupted case 4: // Ctrl+D fmt.Fprint(os.Stderr, "\r\n") return "", ErrInterrupted case 27: // ESC sequence (arrow keys, etc.) for { b, err := reader.ReadByte() if err != nil { break } if (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z') || b == '~' { break } } continue default: if r == utf8.RuneError { continue } if !unicode.IsControl(r) { inputRunes = append(inputRunes, r) if mask != 0 { fmt.Fprint(os.Stderr, string(mask)) } else { fmt.Fprint(os.Stderr, string(r)) } } } } } // redrawLine redraws the input line, correctly handling CJK/emoji characters. func redrawLine(title string, runes []rune, mask rune) { fmt.Fprint(os.Stderr, "\r") fmt.Fprint(os.Stderr, title) if mask != 0 { fmt.Fprint(os.Stderr, strings.Repeat(string(mask), len(runes))) } else { fmt.Fprint(os.Stderr, string(runes)) } fmt.Fprint(os.Stderr, "\x1b[K") } // AskInput prompts for a text input with proper CJK/emoji backspace handling. // // Note: Output goes to stderr to avoid interfering with stdout piping. func AskInput(value *string, format string, a ...any) error { input, err := readLine(0, format, a...) if err != nil { return err } *value = input return nil } // AskPassword prompts for a password input with asterisk masking. // It properly handles UTF-8, CJK characters, emoji, and terminal control sequences. // Cross-platform support: Windows, Linux, macOS (via golang.org/x/term). // // Note: Output goes to stderr to avoid interfering with stdout piping. func AskPassword(password *string, format string, a ...any) error { for range maxAttempts { input, err := readLine('*', format, a...) if err != nil { return err } if input = strings.TrimSpace(input); input == "" { _, _ = lipgloss.Fprintln(os.Stderr, errorStyle.Render("password cannot be empty")) continue } *password = input return nil } return fmt.Errorf("failed to get password after %d attempts", maxAttempts) } ================================================ FILE: modules/tui/pager.go ================================================ package tui import ( "bytes" "fmt" "io" "os" "strings" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/term" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/item" ) // Compile-time interface assertion var _ io.WriteCloser = &Pager{} // StringObject implements viewport.Object for a single line type StringObject string func (s StringObject) GetItem() item.Item { return item.NewItem(string(s)) } // Pager represents a simple terminal pager built with viewport. // It implements io.Writer and must be closed to display content. type Pager struct { buf bytes.Buffer colorMode term.Level useAltScreen bool } // NewPager creates a new pager with given color mode and alt screen setting. // The pager implements io.Writer, content is accumulated via Write calls. // Close must be called to display the content in the pager. // useAltScreen controls whether to use alternate screen buffer (default true). func NewPager(colorMode term.Level, useAltScreen bool) *Pager { return &Pager{ colorMode: colorMode, useAltScreen: useAltScreen, } } // Write implements io.Writer interface for the pager. // It appends data to the internal buffer and returns an error if the pager is closed. func (p *Pager) Write(data []byte) (int, error) { return p.buf.Write(data) } // Close finalizes the pager and displays the content. // For short content that fits in the terminal, it outputs directly without starting the pager. // For longer content, it starts an interactive pager with viewport. // Close is idempotent - calling it multiple times is safe. func (p *Pager) Close() error { content := p.buf.String() if content == "" { return nil } // If color is disabled (e.g. NO_COLOR or non-interactive terminal), // we also disable the pager and print directly. if p.colorMode == term.LevelNone { _, err := io.WriteString(os.Stdout, content) return err } // If content fits in one screen, output directly without starting pager if p.shouldSkipPager(content) { _, err := io.WriteString(os.Stdout, content) return err } return p.run(content) } // shouldSkipPager checks if the content is short enough to display without a pager. func (p *Pager) shouldSkipPager(content string) bool { _, termHeight, err := term.GetSize(int(os.Stdout.Fd())) if err != nil || termHeight <= 0 { return false } lineCount := strings.Count(content, "\n") if !strings.HasSuffix(content, "\n") && content != "" { lineCount++ } return lineCount <= termHeight-4 } // run starts the interactive pager with the given content using viewport. func (p *Pager) run(content string) error { lines := strings.Split(strings.TrimSuffix(content, "\n"), "\n") objects := make([]StringObject, len(lines)) for i, line := range lines { objects[i] = StringObject(line) } model := &pagerModel{ vp: newViewport(objects), useAlt: p.useAltScreen, width: 80, height: 24, totalLine: len(objects), } program := tea.NewProgram(model, tea.WithOutput(os.Stderr)) _, err := program.Run() return err } // ColorMode returns the color mode of the pager. func (p *Pager) ColorMode() term.Level { return p.colorMode } func newViewport(objects []StringObject) *viewport.Model[StringObject] { styles := viewport.DefaultStyles() styles.FooterStyle = lipgloss.NewStyle(). Foreground(lipgloss.Color("241")). Background(lipgloss.Color("235")). Padding(0, 1) opts := []viewport.Option[StringObject]{ viewport.WithFooterEnabled[StringObject](false), viewport.WithWrapText[StringObject](false), viewport.WithStyles[StringObject](styles), } vp := viewport.New(80, 24, opts...) vp.SetObjects(objects) return vp } // pagerModel is the bubbletea model for the pager using viewport type pagerModel struct { vp *viewport.Model[StringObject] useAlt bool ready bool width int height int totalLine int } // Init initializes the pager model. func (m *pagerModel) Init() tea.Cmd { return nil } // Update handles messages and updates the model func (m *pagerModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { switch msg := msg.(type) { case tea.WindowSizeMsg: m.width = msg.Width m.height = msg.Height m.ready = true m.vp.SetWidth(m.width) m.vp.SetHeight(m.height - 1) return m, nil case tea.KeyPressMsg: // Handle quit keys ourselves (viewport doesn't handle these) switch msg.String() { case "q", "esc", "ctrl+c": return m, tea.Quit } } // Let viewport handle navigation vp, cmd := m.vp.Update(msg) m.vp = vp return m, cmd } // View renders the pager UI func (m *pagerModel) View() tea.View { if !m.ready { return tea.NewView("Loading...") } content := m.vp.View() statusBar := m.renderStatusBar() fullView := lipgloss.JoinVertical(lipgloss.Left, content, statusBar) v := tea.NewView(fullView) v.AltScreen = m.useAlt return v } // renderStatusBar creates a status bar with line numbers and progress percentage func (m *pagerModel) renderStatusBar() string { if m.totalLine == 0 { return "" } topIdx, _ := m.vp.GetTopItemIdxAndLineOffset() vpHeight := m.vp.GetHeight() bottomLine := min(m.totalLine, topIdx+vpHeight) var percentage int if m.totalLine > 0 { percentage = min(100, bottomLine*100/m.totalLine) } statusStyle := lipgloss.NewStyle(). Foreground(lipgloss.Color("241")). Background(lipgloss.Color("235")). Padding(0, 1). Width(m.width) statusText := fmt.Sprintf("Lines: %d-%d/%d (%d%%) | ↑/k up | ↓/j down | g top | G bottom | space/f page down | b page up | q quit", topIdx+1, bottomLine, m.totalLine, percentage) return statusStyle.Render(statusText) } ================================================ FILE: modules/vfs/LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2017 Sourced Technologies S.L. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: modules/vfs/bound.go ================================================ package vfs import ( "errors" "fmt" "os" "path/filepath" "strings" "github.com/antgroup/hugescm/modules/securejoin" ) const ( defaultDirectoryMode = 0o755 defaultCreateMode = 0o666 ) // BoundOS is a fs implementation based on the OS filesystem which is bound to // a base dir. // Prefer this fs implementation over ChrootOS. // // Behaviors of note: // 1. Read and write operations can only be directed to files which descends // from the base dir. // 2. Symlinks don't have their targets modified, and therefore can point // to locations outside the base dir or to non-existent paths. // 3. Readlink and Lstat ensures that the link file is located within the base // dir, evaluating any symlinks that file or base dir may contain. type BoundOS struct { baseDir string walkBaseDir string deduplicatePath bool } func newBoundOS(d string, deduplicatePath bool) VFS { walkBaseDir := d if wd, err := filepath.EvalSymlinks(d); err == nil && wd != "" { walkBaseDir = wd } return &BoundOS{baseDir: d, walkBaseDir: walkBaseDir, deduplicatePath: deduplicatePath} } func (fs *BoundOS) Create(filename string) (*os.File, error) { return fs.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, defaultCreateMode) } func openFile(fn string, flag int, perm os.FileMode, createDir func(string) error) (*os.File, error) { if flag&os.O_CREATE != 0 { if createDir == nil { return nil, errors.New("createDir func cannot be nil if file needs to be opened in create mode") } if err := createDir(fn); err != nil { return nil, err } } return os.OpenFile(fn, flag, perm) } func (fs *BoundOS) OpenFile(filename string, flag int, perm os.FileMode) (*os.File, error) { fn, err := fs.abs(filename) if err != nil { return nil, err } return openFile(fn, flag, perm, fs.createDir) } func (fs *BoundOS) ReadDir(path string) ([]os.DirEntry, error) { dir, err := fs.abs(path) if err != nil { return nil, err } return os.ReadDir(dir) } func (fs *BoundOS) Rename(from, to string) error { f, err := fs.abs(from) if err != nil { return err } t, err := fs.abs(to) if err != nil { return err } // MkdirAll for target name. if err := fs.createDir(t); err != nil { return err } return os.Rename(f, t) } func (fs *BoundOS) MkdirAll(path string, perm os.FileMode) error { dir, err := fs.abs(path) if err != nil { return err } return os.MkdirAll(dir, perm) } func (fs *BoundOS) Open(filename string) (*os.File, error) { return fs.OpenFile(filename, os.O_RDONLY, 0) } func (fs *BoundOS) Stat(filename string) (os.FileInfo, error) { filename, err := fs.abs(filename) if err != nil { return nil, err } return os.Stat(filename) } func (fs *BoundOS) Remove(filename string) error { fn, err := fs.abs(filename) if err != nil { return err } return os.Remove(fn) } func (fs *BoundOS) Join(elem ...string) string { return filepath.Join(elem...) } func (fs *BoundOS) RemoveAll(path string) error { dir, err := fs.abs(path) if err != nil { return err } return os.RemoveAll(dir) } func (fs *BoundOS) Symlink(target, link string) error { ln, err := fs.abs(link) if err != nil { return err } // MkdirAll for containing dir. if err := fs.createDir(ln); err != nil { return err } return os.Symlink(target, ln) } func (fs *BoundOS) Lstat(filename string) (os.FileInfo, error) { if !filepath.IsAbs(filename) { filename = filepath.Join(fs.baseDir, filename) } filename = filepath.Clean(filename) if ok, err := fs.insideBaseDirEval(filename); !ok { return nil, err } return os.Lstat(filename) } func (fs *BoundOS) Readlink(link string) (string, error) { if !filepath.IsAbs(link) { link = filepath.Join(fs.baseDir, link) } link = filepath.Clean(link) if ok, err := fs.insideBaseDirEval(link); !ok { return "", err } return os.Readlink(link) } // Root returns the current base dir of the billy.Filesystem. // This is required in order for this implementation to be a drop-in // replacement for other upstream implementations (e.g. memory and osfs). func (fs *BoundOS) Root() string { return fs.baseDir } func (fs *BoundOS) createDir(fullpath string) error { dir := filepath.Dir(fullpath) if dir != "." { if err := os.MkdirAll(dir, defaultDirectoryMode); err != nil { return err } } return nil } // abs transforms filename to an absolute path, taking into account the base dir. // Relative paths won't be allowed to ascend the base dir, so `../file` will become // `/working-dir/file`. // // Note that if filename is a symlink, the returned address will be the target of the // symlink. func (fs *BoundOS) abs(filename string) (string, error) { if filename == fs.baseDir { filename = string(filepath.Separator) } path, err := securejoin.SecureJoin(fs.baseDir, filename) if err != nil { return "", nil } if fs.deduplicatePath { vol := filepath.VolumeName(fs.baseDir) dup := filepath.Join(fs.baseDir, fs.baseDir[len(vol):]) if strings.HasPrefix(path, dup+string(filepath.Separator)) { return fs.abs(path[len(dup):]) } } return path, nil } var ( ErrPathOutsideBase = errors.New("path outside base dir") ) // insideBaseDir checks whether filename is located within // the fs.baseDir. func (fs *BoundOS) insideBaseDir(filename string) (bool, error) { if filename == fs.baseDir { return true, nil } if !strings.HasPrefix(filename, fs.baseDir+string(filepath.Separator)) { return false, ErrPathOutsideBase } return true, nil } type ErrNotInsideBaseDir struct { BaseDir string Path string } func (e *ErrNotInsideBaseDir) Error() string { return fmt.Sprintf("path '%s' outside base dir: %s", e.Path, e.BaseDir) } func IsErrNotInsideBaseDir(err error) bool { var e *ErrNotInsideBaseDir return errors.As(err, &e) } func insidePathOf(c, p string) bool { return strings.HasPrefix(c, p) && len(p) < len(c) && c[len(p)] == filepath.Separator } // insideBaseDirEval checks whether filename is contained within // a dir that is within the fs.baseDir, by first evaluating any symlinks // that either filename or fs.baseDir may contain. func (fs *BoundOS) insideBaseDirEval(filename string) (bool, error) { if filename == fs.baseDir { return true, nil } dir, err := filepath.EvalSymlinks(filepath.Dir(filename)) if os.IsNotExist(err) { if insidePathOf(filename, fs.baseDir) { return true, nil } return false, &ErrNotInsideBaseDir{BaseDir: fs.baseDir, Path: filename} } if dir != fs.walkBaseDir && dir != fs.baseDir && !insidePathOf(dir, fs.walkBaseDir) { return false, &ErrNotInsideBaseDir{BaseDir: fs.baseDir, Path: filename} } return true, nil } ================================================ FILE: modules/vfs/bound_test.go ================================================ package vfs import ( "fmt" "os" "testing" ) func TestInsideBaseDir(t *testing.T) { b := &BoundOS{baseDir: "/tmp/zeta-1", deduplicatePath: true} _, _ = b.insideBaseDir("D:////") } func TestInsideBaseDirEval(t *testing.T) { b := &BoundOS{baseDir: "/tmp/zeta-1", deduplicatePath: true} ok, err := b.Lstat("jack") fmt.Fprintf(os.Stderr, "%v %v\n", ok, err) } func TestInsideBaseDirEval2(t *testing.T) { b := &BoundOS{baseDir: "/", deduplicatePath: true} ok, err := b.insideBaseDirEval("abc") fmt.Fprintf(os.Stderr, "%v %v\n", ok, err) } ================================================ FILE: modules/vfs/glob.go ================================================ package vfs import ( "path/filepath" "sort" "strings" ) // Glob returns the names of all files matching pattern or nil // if there is no matching file. The syntax of patterns is the same // as in Match. The pattern may describe hierarchical names such as // /usr/*/bin/ed (assuming the Separator is '/'). // // Glob ignores file system errors such as I/O errors reading directories. // The only possible returned error is ErrBadPattern, when pattern // is malformed. // // Function originally from https://golang.org/src/path/filepath/match_test.go func Glob(fs VFS, pattern string) (matches []string, err error) { if !hasMeta(pattern) { if _, err = fs.Lstat(pattern); err != nil { return nil, nil } return []string{pattern}, nil } dir, file := filepath.Split(pattern) // Prevent infinite recursion. See issue 15879. if dir == pattern { return nil, filepath.ErrBadPattern } var m []string m, err = Glob(fs, cleanGlobPath(dir)) if err != nil { return } for _, d := range m { matches, err = glob(fs, d, file, matches) if err != nil { return } } return } // cleanGlobPath prepares path for glob matching. func cleanGlobPath(path string) string { switch path { case "": return "." case string(filepath.Separator): // do nothing to the path return path default: return path[0 : len(path)-1] // chop off trailing separator } } // glob searches for files matching pattern in the directory dir // and appends them to matches. If the directory cannot be // opened, it returns the existing matches. New matches are // added in lexicographical order. func glob(fs VFS, dir, pattern string, matches []string) (m []string, e error) { m = matches fi, err := fs.Stat(dir) if err != nil { return } if !fi.IsDir() { return } names, _ := readdirnames(fs, dir) sort.Strings(names) for _, n := range names { matched, err := filepath.Match(pattern, n) if err != nil { return m, err } if matched { m = append(m, filepath.Join(dir, n)) } } return } // hasMeta reports whether path contains any of the magic characters // recognized by Match. func hasMeta(path string) bool { // TODO(niemeyer): Should other magic characters be added here? return strings.ContainsAny(path, "*?[") } func readdirnames(fs VFS, dir string) ([]string, error) { files, err := fs.ReadDir(dir) if err != nil { return nil, err } var names []string for _, file := range files { names = append(names, file.Name()) } return names, nil } ================================================ FILE: modules/vfs/vfs.go ================================================ package vfs import ( "io/fs" "os" ) type VFS interface { // Create creates the named file with mode 0666 (before umask), truncating // it if it already exists. If successful, methods on the returned File can // be used for I/O; the associated file descriptor has mode O_RDWR. Create(filename string) (*os.File, error) // Open opens the named file for reading. If successful, methods on the // returned file can be used for reading; the associated file descriptor has // mode O_RDONLY. Open(filename string) (*os.File, error) // OpenFile is the generalized open call; most users will use Open or Create // instead. It opens the named file with specified flag (O_RDONLY etc.) and // perm, (0666 etc.) if applicable. If successful, methods on the returned // File can be used for I/O. OpenFile(filename string, flag int, perm os.FileMode) (*os.File, error) // Stat returns a FileInfo describing the named file. Stat(filename string) (os.FileInfo, error) // Rename renames (moves) oldpath to newpath. If newpath already exists and // is not a directory, Rename replaces it. OS-specific restrictions may // apply when oldpath and newpath are in different directories. Rename(oldpath, newpath string) error // Remove removes the named file or directory. Remove(filename string) error // RemoveAll removes path and any children it contains. // It removes everything it can but returns the first error // it encounters. If the path does not exist, RemoveAll // returns nil (no error). // If there is an error, it will be of type *PathError. RemoveAll(path string) error // Join joins any number of path elements into a single path, adding a // Separator if necessary. Join calls filepath.Clean on the result; in // particular, all empty strings are ignored. On Windows, the result is a // UNC path if and only if the first path element is a UNC path. Join(elem ...string) string // ReadDir reads the directory named by dirname and returns a list of // directory entries sorted by filename. ReadDir(path string) ([]fs.DirEntry, error) // MkdirAll creates a directory named path, along with any necessary // parents, and returns nil, or else returns an error. The permission bits // perm are used for all directories that MkdirAll creates. If path is/ // already a directory, MkdirAll does nothing and returns nil. MkdirAll(filename string, perm os.FileMode) error // Lstat returns a FileInfo describing the named file. If the file is a // symbolic link, the returned FileInfo describes the symbolic link. Lstat // makes no attempt to follow the link. Lstat(filename string) (os.FileInfo, error) // Symlink creates a symbolic-link from link to target. target may be an // absolute or relative path, and need not refer to an existing node. // Parent directories of link are created as necessary. Symlink(target, link string) error // Readlink returns the target path of link. Readlink(link string) (string, error) } func NewVFS(root string) VFS { return newBoundOS(root, true) } ================================================ FILE: modules/viewport/LICENSE ================================================ MIT License Copyright (c) 2026 Leo Robinovitch Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/viewport/README.md ================================================ # Viewport An advanced terminal viewport component for [Bubble Tea](https://github.com/charmbracelet/bubbletea) terminal UI (TUI) applications. This is a fork of [github.com/robinovitch61/viewport](https://github.com/robinovitch61/viewport) integrated into the zeta project. ## Overview The viewport module provides a feature-rich terminal viewport component for building interactive TUI applications. It offers advanced text display capabilities including wrapping, scrolling, selection, and filtering. ## Features ### Core Viewport - **Text wrapping** - Toggleable text wrapping with horizontal panning for unwrapped lines - **ANSI & Unicode support** - Full support for ANSI escape codes and Unicode characters - **Item selection** - Individual item selection with customizable styling - **Sticky scrolling** - Auto-follow new content with sticky top/bottom scrolling - **Sticky header** - Configurable sticky header that remains visible while scrolling - **Highlight ranges** - Highlight specific text ranges with custom styles - **Content saving** - Save viewport content to file - **Efficient concatenation** - Efficient item concatenation via `MultiItem` (e.g., prefixing line numbers) ### Filterable Viewport The `filterableviewport` package extends the core viewport with: - **Multiple filter modes** - Exact, regex, case-insensitive (built-in); custom modes supported - **Match highlighting** - Highlighted matches with focused/unfocused styles - **Match navigation** - Next/previous match navigation - **Matches-only view** - Hide non-matching items - **Match limiting** - Configurable match limit for large content - **Search history** - Browse previous searches (up/down arrow while editing) ## Installation This module is part of the zeta project and is located at `github.com/antgroup/hugescm/modules/viewport`. ## Usage ### Basic Viewport Implement the `Object` interface on your type: ```go import ( "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/item" ) type myObject struct { item item.Item } func (o myObject) GetItem() item.Item { return o.item } ``` Create a viewport and set content: ```go vp := viewport.New[myObject]( width, height, viewport.WithSelectionEnabled[myObject](true), viewport.WithWrapText[myObject](true), ) objects := []myObject{ {item: item.NewItem("first line")}, {item: item.NewItem("second line")}, } vp.SetObjects(objects) ``` Wire it into your Bubble Tea model's `Update` and `View`: ```go func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { var cmd tea.Cmd m.viewport, cmd = m.viewport.Update(msg) return m, cmd } func (m model) View() string { return m.viewport.View() } ``` ### Filterable Viewport Wrap an existing viewport to add filtering: ```go import "github.com/antgroup/hugescm/modules/viewport/filterableviewport" fvp := filterableviewport.New[myObject]( vp, filterableviewport.WithPrefixText[myObject]("Filter:"), filterableviewport.WithEmptyText[myObject]("No Current Filter"), filterableviewport.WithMatchingItemsOnly[myObject](false), filterableviewport.WithCanToggleMatchingItemsOnly[myObject](true), ) fvp.SetObjects(objects) ``` ### Custom Filter Modes Define custom filter logic with a `FilterMode`: ```go import ( "strings" "charm.land/bubbles/v2/key" "github.com/antgroup/hugescm/modules/viewport/filterableviewport" "github.com/antgroup/hugescm/modules/viewport/item" ) const FilterPrefix filterableviewport.FilterModeName = "prefix" prefixMode := filterableviewport.FilterMode{ Name: FilterPrefix, Key: key.NewBinding(key.WithKeys("p"), key.WithHelp("p", "prefix filter")), Label: "[prefix]", GetMatchFunc: func(filterText string) (filterableviewport.MatchFunc, error) { return func(content string) []item.ByteRange { if strings.HasPrefix(content, filterText) { return []item.ByteRange{{Start: 0, End: len(filterText)}} } return nil }, nil }, } ``` ## Default Key Bindings ### Viewport Navigation | Key | Action | |---|---| | `j` / `down` / `enter` | Scroll down | | `k` / `up` | Scroll up | | `f` / `pgdown` / `ctrl+f` / `space` | Page down | | `b` / `pgup` / `ctrl+b` | Page up | | `d` / `ctrl+d` | Half page down | | `u` / `ctrl+u` | Half page up | | `g` / `ctrl+g` / `home` | Jump to top | | `G` / `end` | Jump to bottom | | `left` / `right` | Horizontal pan | > **Note**: The viewport does not handle quit keys (`q`, `esc`, `ctrl+c`) - this is intentional as viewport is a generic scrolling component and the quit logic should be handled by the parent application. ### Filterable Viewport | Key | Action | |---|---| | `/` | Start exact filter | | `r` | Start regex filter | | `i` | Start case-insensitive filter | | `enter` | Apply filter | | `esc` | Cancel/clear filter | | `n` | Next match | | `N` (shift+n) | Previous match | | `o` | Toggle matches-only view | | `up` / `down` | Browse search history (while editing) | ## License MIT License - See [LICENSE](LICENSE) file for details. Original work Copyright (c) 2026 Leo Robinovitch ================================================ FILE: modules/viewport/configuration.go ================================================ package viewport import ( "charm.land/bubbles/v2/key" "charm.land/bubbles/v2/textinput" ) // fileSaveState tracks the state of file saving operations type fileSaveState struct { // saving is true when a save operation is in progress saving bool // showingResult is true when displaying save result showingResult bool // resultMsg is the message to display (filename or error) resultMsg string // isError is true if resultMsg is an error message isError bool // enteringFilename is true when user is typing a filename enteringFilename bool // filenameInput is the text input component for filename entry filenameInput textinput.Model } // configuration consolidates all configuration options for the viewport type configuration struct { // wrapText is true if the viewport wraps text rather than showing that a line is truncated/horizontally scrollable wrapText bool // footerEnabled is true if the viewport currently shows the footer based on its dimensions and content footerEnabled bool // continuationIndicator is the string to use to indicate that an unwrapped line continues to the left or right continuationIndicator string // postHeaderLine is an optional line to render just below the header. // When non-empty, takes up one line of vertical space. postHeaderLine string // preFooterLine is an optional line to render just above the footer. // When non-empty, takes up one line of vertical space. preFooterLine string // saveDir is the directory where files are saved when the save key is pressed saveDir string // saveKey is the key binding for saving viewport content to a file saveKey key.Binding // saveState tracks file saving state saveState fileSaveState // selectionStyleOverridesItemStyle controls whether the selection style replaces the item's // existing ANSI styling. When true (default), the selected item is stripped of its original // styling and the selection style is applied to all non-highlighted regions. When false, // the item keeps its original styling and the selection style is applied only to unstyled regions. selectionStyleOverridesItemStyle bool // progressBarEnabled controls whether the footer shows a Unicode progress bar in the footer progressBarEnabled bool } // newConfiguration creates a new configuration with default settings. func newConfiguration() *configuration { return &configuration{ wrapText: false, footerEnabled: true, continuationIndicator: "...", saveDir: "", saveKey: key.NewBinding(), selectionStyleOverridesItemStyle: true, } } ================================================ FILE: modules/viewport/content_manager.go ================================================ package viewport import "github.com/antgroup/hugescm/modules/viewport/item" // contentManager manages the actual Item and selection state type contentManager[T Object] struct { // objects is the viewport objects objects []T // header is the unselectable lines at the top of the viewport // these lines wrap, but don't pan horizontally like other non-wrapped lines header []string // selectedIdx is the index of objects of the current selection (only relevant when selection is enabled) selectedIdx int // highlights is what to highlight wherever it shows up within an item, even wrapped between lines highlights []Highlight // itemHighlightsByIndex is a cache of item highlights indexed by item index itemHighlightsByIndex map[int][]item.Highlight // compareFn is an optional function to compare items for maintaining the selection when Item changes // if set, the viewport will try to maintain the previous selected item when Item changes compareFn CompareFn[T] } // newContentManager creates a new contentManager with empty initial state func newContentManager[T Object]() *contentManager[T] { return &contentManager[T]{ objects: make([]T, 0), header: []string{}, selectedIdx: 0, itemHighlightsByIndex: make(map[int][]item.Highlight), } } // setSelectedIdx sets the selected item index func (cm *contentManager[T]) setSelectedIdx(idx int) { cm.selectedIdx = clampValZeroToMax(idx, len(cm.objects)-1) } // getSelectedIdx returns the current selected item index func (cm *contentManager[T]) getSelectedIdx() int { return cm.selectedIdx } // getSelectedItem returns a pointer to the currently selected item, or nil if none selected func (cm *contentManager[T]) getSelectedItem() *T { if cm.selectedIdx >= len(cm.objects) || cm.selectedIdx < 0 { return nil } return &cm.objects[cm.selectedIdx] } // numItems returns the total number of items func (cm *contentManager[T]) numItems() int { return len(cm.objects) } // isEmpty returns true if there are no items func (cm *contentManager[T]) isEmpty() bool { return len(cm.objects) == 0 } // rebuildHighlightsCache rebuilds the internal highlight cache func (cm *contentManager[T]) rebuildHighlightsCache() { cm.itemHighlightsByIndex = make(map[int][]item.Highlight) for _, highlight := range cm.highlights { itemIdx := highlight.ItemIndex cm.itemHighlightsByIndex[itemIdx] = append(cm.itemHighlightsByIndex[itemIdx], highlight.ItemHighlight) } } // setHighlights sets the highlights func (cm *contentManager[T]) setHighlights(highlights []Highlight) { cm.highlights = highlights cm.rebuildHighlightsCache() } // getHighlights returns all highlights func (cm *contentManager[T]) getHighlights() []Highlight { return cm.highlights } // getItemHighlightsForItem returns highlights for a specific item index func (cm *contentManager[T]) getItemHighlightsForItem(itemIndex int) []item.Highlight { return cm.itemHighlightsByIndex[itemIndex] } ================================================ FILE: modules/viewport/display_manager.go ================================================ package viewport import ( "charm.land/lipgloss/v2" ) // displayManager handles all display/rendering concerns type displayManager struct { // bounds contains the viewport dimensions in terminal cells bounds rectangle // topItemIdx is the index of the topmost visible item topItemIdx int // topItemLineOffset is the number of lines in the top item that are above the first visible line // Only non-zero when wrapped topItemLineOffset int // xOffset is the number of terminal cells (width) scrolled right when lines overflow and wrapping is off xOffset int // styles contains the styling configuration styles Styles } // newDisplayManager creates a new displayManager with the specified dimensions and styles func newDisplayManager(width, height int, styles Styles) *displayManager { return &displayManager{ bounds: rectangle{ width: max(0, width), height: max(0, height), }, topItemIdx: 0, topItemLineOffset: 0, xOffset: 0, styles: styles, } } // setBounds sets the viewport dimensions with validation func (dm *displayManager) setBounds(r rectangle) { r.width, r.height = max(0, r.width), max(0, r.height) dm.bounds = r } // setTopItemIdxAndOffset sets the top item index and line offset func (dm *displayManager) setTopItemIdxAndOffset(topItemIdx, topItemLineOffset int) { dm.topItemIdx, dm.topItemLineOffset = topItemIdx, topItemLineOffset } // getNumContentLines returns the number of lines in the content func (dm *displayManager) getNumContentLines(headerLines int, hasPostHeader bool, hasPreFooter bool, showFooter bool) int { contentHeight := dm.bounds.height - headerLines if hasPostHeader { contentHeight-- // one for post-header } if hasPreFooter { contentHeight-- // one for pre-footer } if showFooter { contentHeight-- // one for footer } return max(0, contentHeight) } // render applies final styling to the display func (dm *displayManager) render(display string) string { return lipgloss.NewStyle().Width(dm.bounds.width).Height(dm.bounds.height).Render(display) } // rectangle represents a rectangular area type rectangle struct { width, height int } ================================================ FILE: modules/viewport/filterableviewport/filterableviewport.go ================================================ package filterableviewport import ( "fmt" "strings" "charm.land/bubbles/v2/key" "charm.land/bubbles/v2/textinput" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/item" ) type filterMode int const ( filterModeOff filterMode = iota filterModeEditing filterModeApplied ) // FilterLinePosition controls where the filter line is rendered type FilterLinePosition int const ( // FilterLineBottom renders the filter line just above the footer (default) FilterLineBottom FilterLinePosition = iota // FilterLineTop renders the filter line just below the header FilterLineTop ) // Option is a functional option for configuring the filterable viewport type Option[T viewport.Object] func(*Model[T]) // WithKeyMap sets the key mapping for the viewport func WithKeyMap[T viewport.Object](keyMap KeyMap) Option[T] { return func(m *Model[T]) { m.keyMap = keyMap } } // WithStyles sets the styles for the filterable viewport func WithStyles[T viewport.Object](styles Styles) Option[T] { return func(m *Model[T]) { m.styles = styles } } // WithPrefixText sets the prefix text for the filter line func WithPrefixText[T viewport.Object](prefix string) Option[T] { return func(m *Model[T]) { m.prefixText = prefix } } // WithEmptyText sets the text to display when the filter is empty func WithEmptyText[T viewport.Object](whenEmpty string) Option[T] { return func(m *Model[T]) { m.emptyText = whenEmpty } } // WithMatchingItemsOnly sets whether to show only the matching items func WithMatchingItemsOnly[T viewport.Object](matchingItemsOnly bool) Option[T] { return func(m *Model[T]) { m.matchingItemsOnly = matchingItemsOnly } } // WithCanToggleMatchingItemsOnly sets whether this viewport can toggle matching items only mode func WithCanToggleMatchingItemsOnly[T viewport.Object](canToggleMatchingItemsOnly bool) Option[T] { return func(m *Model[T]) { m.canToggleMatchingItemsOnly = canToggleMatchingItemsOnly } } // WithVerticalPad sets the number of lines of context to keep above/below the focused match (scrolloff) func WithVerticalPad[T viewport.Object](verticalPad int) Option[T] { return func(m *Model[T]) { m.verticalPad = verticalPad } } // WithHorizontalPad sets the number of columns of context to keep left/right of the focused match (panoff) func WithHorizontalPad[T viewport.Object](horizontalPad int) Option[T] { return func(m *Model[T]) { m.horizontalPad = horizontalPad } } // WithMaxMatchLimit sets the maximum number of matches when searching. // When this limit is exceeded, match highlighting and navigation are disabled // and all items are shown regardless of matchingItemsOnly setting. // Set to 0 for unlimited matches. Default is 30000. func WithMaxMatchLimit[T viewport.Object](maxMatchLimit int) Option[T] { return func(m *Model[T]) { m.maxMatchLimit = maxMatchLimit } } // WithAdjustObjectsForFilter sets a function that returns the visible filterable viewport objects // based on the current filter. It's called internally whenever the filter changes. Use this when // your visible objects depend on the filter in complex ways—for example, a tree view where matching // one node should also show parent and child nodes. Return nil to keep the current objects unmodified. // This is independent behavior from SetMatchingItemsOnly - when showing matching items only, the filterable viewport // will still call this function to determine which items to show, but it will also filter that list down to matching // items only. See tests for concrete examples of use. func WithAdjustObjectsForFilter[T viewport.Object](fn func(filterText string, mode FilterModeName) []T) Option[T] { return func(m *Model[T]) { m.adjustObjectsForFilter = fn } } // WithFilterModes sets the filter modes for the filterable viewport. // If not provided, New() defaults to DefaultFilterModes(). func WithFilterModes[T viewport.Object](modes []FilterMode) Option[T] { return func(m *Model[T]) { m.filterModes = modes } } // WithFilterLinePosition sets whether the filter line renders at the top (below header) or bottom (above footer) func WithFilterLinePosition[T viewport.Object](position FilterLinePosition) Option[T] { return func(m *Model[T]) { m.filterLinePosition = position } } // WithFilterLinePrefix sets a string that is always prepended to the filter line, regardless of filter state. func WithFilterLinePrefix[T viewport.Object](prefix string) Option[T] { return func(m *Model[T]) { m.filterLinePrefix = prefix } } // WithItemDescriptor sets a word describing the items (e.g. "logs", "events"). // When set, match count text includes the total item count: "4/5 matches on 10 logs". // When empty (default), just "4/5 matches" is shown. func WithItemDescriptor[T viewport.Object](descriptor string) Option[T] { return func(m *Model[T]) { m.itemDescriptor = descriptor } } // SetFilterLinePrefix updates the string prepended to the filter line and re-renders it. func (m *Model[T]) SetFilterLinePrefix(prefix string) { m.filterLinePrefix = prefix m.setFilterLine(m.renderFilterLine()) } // SetAdjustObjectsForFilter updates the function used to adjust visible objects when the filter changes. func (m *Model[T]) SetAdjustObjectsForFilter(fn func(filterText string, mode FilterModeName) []T) { m.adjustObjectsForFilter = fn } // Model is the state and logic for a filterable viewport type Model[T viewport.Object] struct { vp *viewport.Model[T] keyMap KeyMap filterTextInput textinput.Model filterMode filterMode prefixText string emptyText string filterLinePosition FilterLinePosition filterLinePrefix string objects []T filterModes []FilterMode filterModesByName map[FilterModeName]int // name -> index in filterModes activeFilterModeName FilterModeName // "" when no mode active lastActiveFilterModeName FilterModeName styles Styles itemDescriptor string matchingItemsOnly bool canToggleMatchingItemsOnly bool allMatches []viewport.Highlight numMatchingItems int focusedMatchIdx int previousFocusedMatchIdx int totalMatchesOnAllItems int itemIdxToFilteredIdx map[int]int matchWidthsByMatchIdx map[int]item.WidthRange lastFilterValue string maxMatchLimit int // 0 = unlimited matchLimitExceeded bool adjustObjectsForFilter func(filterText string, mode FilterModeName) []T verticalPad int horizontalPad int searchHistory []string // oldest at 0, newest at end searchHistoryIdx int // index into searchHistory; == len(searchHistory) means "at draft" searchHistoryDraft string // current unsaved input preserved while browsing } // New creates a new filterable viewport model with default configuration func New[T viewport.Object](vp *viewport.Model[T], opts ...Option[T]) *Model[T] { ti := textinput.New() ti.CharLimit = 0 ti.Prompt = "" // Use unstyled text so the filter line doesn't include ANSI color codes // from the textinput's default dark theme styling. tiStyles := ti.Styles() tiStyles.Focused.Text = lipgloss.NewStyle() tiStyles.Blurred.Text = lipgloss.NewStyle() tiStyles.Focused.Placeholder = lipgloss.NewStyle() tiStyles.Blurred.Placeholder = lipgloss.NewStyle() ti.SetStyles(tiStyles) defaultKeyMap := DefaultKeyMap() defaultStyles := DefaultStyles() m := &Model[T]{ vp: vp, keyMap: defaultKeyMap, filterTextInput: ti, filterMode: filterModeOff, prefixText: "", emptyText: "No Filter", objects: []T{}, filterModes: DefaultFilterModes(), activeFilterModeName: "", lastActiveFilterModeName: "", styles: defaultStyles, matchingItemsOnly: false, canToggleMatchingItemsOnly: true, allMatches: []viewport.Highlight{}, numMatchingItems: 0, focusedMatchIdx: -1, previousFocusedMatchIdx: -1, totalMatchesOnAllItems: 0, itemIdxToFilteredIdx: make(map[int]int), matchWidthsByMatchIdx: make(map[int]item.WidthRange), lastFilterValue: "", maxMatchLimit: 30000, // reasonable default matchLimitExceeded: false, verticalPad: 0, horizontalPad: 0, searchHistory: []string{}, searchHistoryIdx: 0, } m.SetHeight(vp.GetHeight()) for _, opt := range opts { if opt != nil { opt(m) } } // validate that at least one filter mode is set if len(m.filterModes) == 0 { panic("filterableviewport: no filter modes set; use viewport.Model directly if filtering is not needed") } // build name -> index lookup and validate uniqueness m.filterModesByName = make(map[FilterModeName]int, len(m.filterModes)) for i, mode := range m.filterModes { if mode.Name == "" { panic(fmt.Sprintf("filterableviewport: FilterMode at index %d has empty Name", i)) } if _, exists := m.filterModesByName[mode.Name]; exists { panic(fmt.Sprintf("filterableviewport: duplicate FilterModeName %q", mode.Name)) } m.filterModesByName[mode.Name] = i } // set initial pre-footer line m.setFilterLine(m.renderFilterLine()) return m } // Init initializes the filterable viewport model func (m *Model[T]) Init() tea.Cmd { return nil } // Update processes messages and updates the model state func (m *Model[T]) Update(msg tea.Msg) (*Model[T], tea.Cmd) { var cmd tea.Cmd var cmds []tea.Cmd if m.vp.IsCapturingInput() { m.vp, cmd = m.vp.Update(msg) return m, cmd } switch msg := msg.(type) { case tea.KeyMsg: // check if any filter mode key matches if m.filterMode != filterModeEditing { for i := range m.filterModes { if key.Matches(msg, m.filterModes[i].Key) { m.activeFilterModeName = m.filterModes[i].Name m.filterTextInput.Focus() m.filterMode = filterModeEditing m.resetSearchHistoryBrowsing() m.updateMatchingItems() m.ensureCurrentMatchInView() return m, textinput.Blink } } } switch { case key.Matches(msg, m.keyMap.ApplyFilterKey): if m.filterMode == filterModeEditing { m.addToSearchHistory(m.filterTextInput.Value()) m.filterTextInput.Blur() m.filterMode = filterModeApplied m.resetSearchHistoryBrowsing() m.updateMatchingItems() m.ensureCurrentMatchInView() return m, nil } case key.Matches(msg, m.keyMap.ToggleMatchingItemsOnlyKey): if m.filterMode != filterModeEditing && m.canToggleMatchingItemsOnly { m.matchingItemsOnly = !m.matchingItemsOnly m.updateMatchingItems() m.ensureCurrentMatchInView() return m, nil } case key.Matches(msg, m.keyMap.NextMatchKey): if m.filterMode != filterModeEditing && m.filterMode != filterModeOff && len(m.allMatches) > 0 { m.navigateToNextMatch() return m, nil } case key.Matches(msg, m.keyMap.PrevMatchKey): if m.filterMode != filterModeEditing && m.filterMode != filterModeOff && len(m.allMatches) > 0 { m.navigateToPrevMatch() return m, nil } case key.Matches(msg, m.keyMap.CancelFilterKey): m.filterMode = filterModeOff m.activeFilterModeName = "" m.filterTextInput.Blur() m.filterTextInput.SetValue("") m.resetSearchHistoryBrowsing() m.updateMatchingItems() m.ensureCurrentMatchInView() return m, nil case key.Matches(msg, m.keyMap.SearchHistoryPrevKey): if m.filterMode == filterModeEditing && len(m.searchHistory) > 0 { m.navigateSearchHistoryPrev() m.updateMatchingItems() m.ensureCurrentMatchInView() return m, nil } case key.Matches(msg, m.keyMap.SearchHistoryNextKey): if m.filterMode == filterModeEditing && m.searchHistoryIdx < len(m.searchHistory) { m.navigateSearchHistoryNext() m.updateMatchingItems() m.ensureCurrentMatchInView() return m, nil } } } if m.filterMode != filterModeEditing { prevSelectedIdx := m.vp.GetSelectedItemIdx() m.vp, cmd = m.vp.Update(msg) cmds = append(cmds, cmd) // when the selection moves, re-evaluate focused match highlight style // since it differs depending on whether the focused match is on the selected item if m.vp.GetSelectedItemIdx() != prevSelectedIdx && len(m.allMatches) > 0 { m.updateFocusedMatchHighlight() } } else { m.filterTextInput, cmd = m.filterTextInput.Update(msg) m.updateMatchingItems() m.ensureCurrentMatchInView() cmds = append(cmds, cmd) } return m, tea.Batch(cmds...) } // View renders the filterable viewport model as a string func (m *Model[T]) View() string { return m.vp.View() } // GetWidth returns the width of the filterable viewport func (m *Model[T]) GetWidth() int { return m.vp.GetWidth() } // SetWidth updates the width of both the viewport and textinput func (m *Model[T]) SetWidth(width int) { m.vp.SetWidth(width) m.setFilterLine(m.renderFilterLine()) } // GetHeight returns the height of the filterable viewport func (m *Model[T]) GetHeight() int { return m.vp.GetHeight() } // SetHeight updates the height of the filterable viewport func (m *Model[T]) SetHeight(height int) { m.vp.SetHeight(height) } // SetObjects sets the viewport objects func (m *Model[T]) SetObjects(objects []T) { if objects == nil { objects = []T{} } m.objects = objects m.updateMatchingItems() } // AppendObjects appends objects to the viewport's existing objects func (m *Model[T]) AppendObjects(objects []T) { if objects == nil { return } startIdx := len(m.objects) m.objects = append(m.objects, objects...) // if filter active and not at limit, do incremental update if m.filterMode != filterModeOff && m.filterTextInput.Value() != "" && !m.matchLimitExceeded { m.appendMatchesForNewObjects(startIdx, objects) } else if m.matchLimitExceeded { // already at limit, just update viewport with all objects m.vp.SetObjects(m.objects) } else { m.updateMatchingItems() } } // FilterFocused returns true if the filter text input is focused func (m *Model[T]) FilterFocused() bool { return m.filterTextInput.Focused() } // IsCapturingInput returns true when the filterableviewport or its underlying // viewport is capturing input (e.g., filter entry, filename entry). Callers // should check this before processing their own key bindings. func (m *Model[T]) IsCapturingInput() bool { return m.filterTextInput.Focused() || m.vp.IsCapturingInput() } // GetWrapText returns whether text wrapping is enabled in the viewport func (m *Model[T]) GetWrapText() bool { return m.vp.GetWrapText() } // SetWrapText sets whether text wrapping is enabled in the viewport func (m *Model[T]) SetWrapText(wrapText bool) { m.vp.SetWrapText(wrapText) } // GetSelectionEnabled returns whether selection is enabled in the viewport func (m *Model[T]) GetSelectionEnabled() bool { return m.vp.GetSelectionEnabled() } // SetSelectionEnabled sets whether selection is enabled in the viewport func (m *Model[T]) SetSelectionEnabled(selectionEnabled bool) { m.vp.SetSelectionEnabled(selectionEnabled) } // GetFilterText returns the current filter text func (m *Model[T]) GetFilterText() string { return m.filterTextInput.Value() } // GetActiveFilterMode returns the currently active filter mode, or nil if none. func (m *Model[T]) GetActiveFilterMode() *FilterMode { idx, ok := m.filterModesByName[m.activeFilterModeName] if !ok { return nil } return &m.filterModes[idx] } // FilterModes returns the configured filter modes. func (m *Model[T]) FilterModes() []FilterMode { return m.filterModes } // GetSelectedItem returns the currently selected item, or nil if no selection func (m *Model[T]) GetSelectedItem() *T { return m.vp.GetSelectedItem() } // GetSelectedItemIdx returns the index of the currently selected item func (m *Model[T]) GetSelectedItemIdx() int { return m.vp.GetSelectedItemIdx() } // SetSelectedItemIdx sets the selected item index func (m *Model[T]) SetSelectedItemIdx(idx int) { m.vp.SetSelectedItemIdx(idx) } // SetTopSticky sets whether selection sticks to the top func (m *Model[T]) SetTopSticky(topSticky bool) { m.vp.SetTopSticky(topSticky) } // SetBottomSticky sets whether selection sticks to the bottom func (m *Model[T]) SetBottomSticky(bottomSticky bool) { m.vp.SetBottomSticky(bottomSticky) } // SetHeader sets the viewport header lines func (m *Model[T]) SetHeader(header []string) { m.vp.SetHeader(header) } // SetSelectionComparator sets the function used to maintain selection across object updates func (m *Model[T]) SetSelectionComparator(compareFn viewport.CompareFn[T]) { m.vp.SetSelectionComparator(compareFn) } // SetFilter sets the filter text and mode programmatically. // Use the FilterModeName constants (e.g. FilterExact, FilterRegex) or your own custom names. func (m *Model[T]) SetFilter(value string, mode FilterModeName) { m.filterTextInput.SetValue(value) if _, ok := m.filterModesByName[mode]; ok { m.activeFilterModeName = mode } if value != "" && m.filterMode == filterModeOff { m.filterMode = filterModeApplied } else if value == "" { m.filterMode = filterModeOff m.activeFilterModeName = "" } m.updateMatchingItems() m.ensureCurrentMatchInView() } // GetMatchingItemsOnly returns whether only matching items are shown func (m *Model[T]) GetMatchingItemsOnly() bool { return m.matchingItemsOnly } // SetMatchingItemsOnly sets whether to show only matching items func (m *Model[T]) SetMatchingItemsOnly(matchingItemsOnly bool) { m.matchingItemsOnly = matchingItemsOnly m.updateMatchingItems() } // SetFilterableViewportStyles sets the styles for the filterable viewport func (m *Model[T]) SetFilterableViewportStyles(styles Styles) { m.styles = styles // re-apply highlights with new styles m.updateFocusedMatchHighlight() } // SetViewportStyles sets styles on the underlying viewport func (m *Model[T]) SetViewportStyles(styles viewport.Styles) { m.vp.SetStyles(styles) } // updateMatchingItems recalculates the matching items and updates match tracking func (m *Model[T]) updateMatchingItems() { matchingObjects, filterChanged := m.getMatchingObjectsAndUpdateMatches() if !m.matchLimitExceeded { m.numMatchingItems = len(matchingObjects) } // when match limit exceeded, show all objects if m.showMatchesOnly() { m.vp.SetObjects(matchingObjects) } else { m.vp.SetObjects(m.objects) } // when no matches found with an active filter and items are unwrapped, reset horizontal scroll if m.totalMatchesOnAllItems == 0 && m.filterMode != filterModeOff && m.filterTextInput.Value() != "" && !m.vp.GetWrapText() { m.vp.SetXOffset(0) } // when the filter changed, move selection to the focused match if filterChanged { m.setSelectionToCurrentMatch() } m.updateFocusedMatchHighlight() // update the pre-footer line with the current filter state m.setFilterLine(m.renderFilterLine()) } // updateFocusedMatchHighlight sets a specific highlight for the currently focused match func (m *Model[T]) updateFocusedMatchHighlight() { if m.focusedMatchIdx < 0 || m.focusedMatchIdx >= len(m.allMatches) { m.vp.SetHighlights(nil) return } selectedIdx := m.vp.GetSelectedItemIdx() // try to update only changed highlights if only focus changed if m.canUpdateHighlightsIncrementally() { if m.updateHighlightsIncrementally(selectedIdx) { m.previousFocusedMatchIdx = m.focusedMatchIdx return } } // otherwise, rebuild all highlights m.rebuildAllHighlights(selectedIdx) } // canUpdateHighlightsIncrementally checks if we can update highlights without rebuilding func (m *Model[T]) canUpdateHighlightsIncrementally() bool { return m.previousFocusedMatchIdx >= 0 && m.previousFocusedMatchIdx < len(m.allMatches) && m.focusedMatchIdx != m.previousFocusedMatchIdx && len(m.allMatches) > 0 } // updateHighlightsIncrementally updates only the changed highlights func (m *Model[T]) updateHighlightsIncrementally(selectedIdx int) bool { currentHighlights := m.vp.GetHighlights() if len(currentHighlights) != len(m.allMatches) { return false } m.unfocusPreviousHighlight(currentHighlights) m.focusCurrentHighlight(currentHighlights, selectedIdx) m.vp.SetHighlights(currentHighlights) return true } // unfocusPreviousHighlight sets the previous highlight to unfocused style func (m *Model[T]) unfocusPreviousHighlight(highlights []viewport.Highlight) { if m.previousFocusedMatchIdx < len(highlights) { highlights[m.previousFocusedMatchIdx].ItemHighlight.Style = m.styles.Match.Unfocused } } // focusCurrentHighlight sets the current highlight to focused style func (m *Model[T]) focusCurrentHighlight(highlights []viewport.Highlight, selectedIdx int) { if m.focusedMatchIdx >= len(highlights) { return } focusedItemIdx := m.allMatches[m.focusedMatchIdx].ItemIndex if m.matchingItemsOnly { if filteredIdx, ok := m.itemIdxToFilteredIdx[focusedItemIdx]; ok { focusedItemIdx = filteredIdx } } if m.vp.GetSelectionEnabled() && focusedItemIdx == selectedIdx { highlights[m.focusedMatchIdx].ItemHighlight.Style = m.styles.Match.FocusedIfSelected } else { highlights[m.focusedMatchIdx].ItemHighlight.Style = m.styles.Match.Focused } } // rebuildAllHighlights reconstructs all highlights from scratch func (m *Model[T]) rebuildAllHighlights(selectedIdx int) { highlights := make([]viewport.Highlight, len(m.allMatches)) for matchIdx, match := range m.allMatches { itemIdx := m.getItemIdxForMatch(match.ItemIndex) style := m.getMatchStyle(matchIdx, itemIdx, selectedIdx) highlights[matchIdx] = viewport.Highlight{ ItemIndex: itemIdx, ItemHighlight: item.Highlight{ Style: style, ByteRangeUnstyledContent: match.ItemHighlight.ByteRangeUnstyledContent, }, } } m.vp.SetHighlights(highlights) m.previousFocusedMatchIdx = m.focusedMatchIdx } // getItemIdxForMatch converts match item index to display item index func (m *Model[T]) getItemIdxForMatch(itemIdx int) int { if m.matchingItemsOnly { if filteredIdx, ok := m.itemIdxToFilteredIdx[itemIdx]; ok { return filteredIdx } panic("focused match item index not found in filtered items") } return itemIdx } // getMatchStyle returns the appropriate style for a match func (m *Model[T]) getMatchStyle(matchIdx, itemIdx, selectedIdx int) lipgloss.Style { if matchIdx != m.focusedMatchIdx { return m.styles.Match.Unfocused } if m.vp.GetSelectionEnabled() && itemIdx == selectedIdx { return m.styles.Match.FocusedIfSelected } return m.styles.Match.Focused } func (m *Model[T]) renderFilterLine() string { var filterContent string switch m.filterMode { case filterModeOff: filterContent = m.emptyText case filterModeEditing, filterModeApplied: if m.filterTextInput.Value() == "" && m.filterMode == filterModeApplied { filterContent = m.emptyText } else { filterContent = strings.Join(removeEmpty([]string{ m.getModeIndicator(), m.prefixText, m.filterTextInput.View(), m.getTextAfterFilter(), matchingItemsOnlyText(m.showMatchesOnly()), }), " ", ) } default: panic(fmt.Sprintf("invalid filter mode: %d", m.filterMode)) } filterLine := strings.Join(removeEmpty([]string{m.filterLinePrefix, filterContent}), " ") filterItem := item.NewItem(filterLine) res, _ := filterItem.Take(0, m.GetWidth(), "...", []item.Highlight{}) return res } // setFilterLine sets the rendered filter line on the appropriate viewport line based on position func (m *Model[T]) setFilterLine(line string) { switch m.filterLinePosition { case FilterLineBottom: m.vp.SetPreFooterLine(line) case FilterLineTop: m.vp.SetPostHeaderLine(line) } } func (m *Model[T]) getModeIndicator() string { if mode := m.GetActiveFilterMode(); mode != nil { return mode.Label } return "" } // getMatchingObjectsAndUpdateMatches filters objects and updates match tracking. // Returns the matching objects and whether the filter value changed. func (m *Model[T]) getMatchingObjectsAndUpdateMatches() ([]T, bool) { filterValue := m.filterTextInput.Value() filterChanged := filterValue != m.lastFilterValue || m.activeFilterModeName != m.lastActiveFilterModeName m.lastFilterValue = filterValue m.lastActiveFilterModeName = m.activeFilterModeName if filterChanged && m.adjustObjectsForFilter != nil { modeName := m.activeFilterModeName if modeName == "" && len(m.filterModes) > 0 { modeName = m.filterModes[0].Name } if newObjects := m.adjustObjectsForFilter(filterValue, modeName); newObjects != nil { m.objects = newObjects } } m.allMatches = []viewport.Highlight{} prevFocusedMatchIdx := m.focusedMatchIdx m.focusedMatchIdx = -1 m.totalMatchesOnAllItems = 0 m.itemIdxToFilteredIdx = make(map[int]int) m.matchLimitExceeded = false if m.filterMode == filterModeOff || filterValue == "" { return m.objects, filterChanged } // get the MatchFunc from the active mode var matchFn MatchFunc if mode := m.GetActiveFilterMode(); mode != nil { var err error matchFn, err = mode.GetMatchFunc(filterValue) if err != nil { return []T{}, filterChanged } } if matchFn == nil { return m.objects, filterChanged } var highlights []viewport.Highlight matchIdx := 0 totalMatchCount := 0 maxReached := false itemsWithMatchesSet := make(map[int]bool) for itemIdx := range m.objects { matches := m.extractMatches(m.objects[itemIdx], matchFn) if len(matches) > 0 { itemsWithMatchesSet[itemIdx] = true } if m.maxMatchLimit > 0 && totalMatchCount+len(matches) > m.maxMatchLimit { maxReached = true break } totalMatchCount += len(matches) newHighlights := m.buildHighlightsFromMatches(itemIdx, matches, matchIdx) matchIdx += len(matches) highlights = append(highlights, newHighlights...) } m.matchLimitExceeded = maxReached if maxReached { // clear match state and return all objects - no highlighting or navigation when limit exceeded m.allMatches = []viewport.Highlight{} m.focusedMatchIdx = -1 m.totalMatchesOnAllItems = totalMatchCount // count of items with matches up to the limit m.numMatchingItems = len(itemsWithMatchesSet) return m.objects, filterChanged } filteredObjects := make([]T, 0, len(m.objects)) itemsWithMatches := make(map[int]bool) for _, highlight := range highlights { itemIdx := highlight.ItemIndex if !itemsWithMatches[itemIdx] { filteredObjects = append(filteredObjects, m.objects[itemIdx]) m.itemIdxToFilteredIdx[itemIdx] = len(filteredObjects) - 1 itemsWithMatches[itemIdx] = true } m.allMatches = append(m.allMatches, highlight) } m.totalMatchesOnAllItems = len(m.allMatches) if filterChanged { if m.totalMatchesOnAllItems > 0 { m.focusedMatchIdx = 0 } else { m.focusedMatchIdx = -1 } } else { if prevFocusedMatchIdx >= 0 && prevFocusedMatchIdx < len(m.allMatches) { m.focusedMatchIdx = prevFocusedMatchIdx } else if m.totalMatchesOnAllItems > 0 { m.focusedMatchIdx = 0 } else { m.focusedMatchIdx = -1 } } return filteredObjects, filterChanged } // appendMatchesForNewObjects processes only newly appended objects for matches // and incrementally updates match state without rescanning existing objects func (m *Model[T]) appendMatchesForNewObjects(startIdx int, newObjects []T) { filterValue := m.filterTextInput.Value() var matchFn MatchFunc if mode := m.GetActiveFilterMode(); mode != nil { var err error matchFn, err = mode.GetMatchFunc(filterValue) if err != nil { // invalid match (e.g. bad regex), fallback to full update m.updateMatchingItems() return } } if matchFn == nil { m.updateMatchingItems() return } matchIdx := len(m.allMatches) totalMatchCount := m.totalMatchesOnAllItems prevNumMatchingItems := m.numMatchingItems itemsWithMatchesSet := make(map[int]bool) var newHighlights []viewport.Highlight for i, obj := range newObjects { itemIdx := startIdx + i matches := m.extractMatches(obj, matchFn) if len(matches) > 0 { itemsWithMatchesSet[itemIdx] = true } if m.maxMatchLimit > 0 && totalMatchCount+len(matches) > m.maxMatchLimit { // transition to match limit exceeded m.matchLimitExceeded = true m.allMatches = []viewport.Highlight{} m.focusedMatchIdx = -1 m.totalMatchesOnAllItems = totalMatchCount m.numMatchingItems = prevNumMatchingItems + len(itemsWithMatchesSet) m.vp.SetObjects(m.objects) m.updateFocusedMatchHighlight() // update the pre-footer line with the current filter state m.setFilterLine(m.renderFilterLine()) return } totalMatchCount += len(matches) highlights := m.buildHighlightsFromMatches(itemIdx, matches, matchIdx) matchIdx += len(matches) newHighlights = append(newHighlights, highlights...) } // append new matches to existing m.allMatches = append(m.allMatches, newHighlights...) m.totalMatchesOnAllItems = totalMatchCount m.numMatchingItems = prevNumMatchingItems + len(itemsWithMatchesSet) // update viewport objects if m.showMatchesOnly() { // build filtered objects list including new matching items filteredObjects := make([]T, 0, m.numMatchingItems) itemsWithMatches := make(map[int]bool) for _, highlight := range m.allMatches { itemIdx := highlight.ItemIndex if !itemsWithMatches[itemIdx] { filteredObjects = append(filteredObjects, m.objects[itemIdx]) m.itemIdxToFilteredIdx[itemIdx] = len(filteredObjects) - 1 itemsWithMatches[itemIdx] = true } } m.vp.SetObjects(filteredObjects) } else { // already updated by append to m.objects m.vp.SetObjects(m.objects) } m.updateFocusedMatchHighlight() // update the pre-footer line with the current filter state m.setFilterLine(m.renderFilterLine()) } // extractMatches extracts matches from an object using the provided MatchFunc func (m *Model[T]) extractMatches(obj T, matchFn MatchFunc) []item.Match { itm := obj.GetItem() byteRanges := matchFn(itm.ContentNoAnsi()) return itm.ByteRangesToMatches(byteRanges) } // buildHighlightsFromMatches creates viewport highlights from item matches func (m *Model[T]) buildHighlightsFromMatches(itemIdx int, matches []item.Match, startMatchIdx int) []viewport.Highlight { highlights := make([]viewport.Highlight, 0, len(matches)) matchIdx := startMatchIdx for i := range matches { m.matchWidthsByMatchIdx[matchIdx] = matches[i].WidthRange matchIdx++ highlight := viewport.Highlight{ ItemIndex: itemIdx, ItemHighlight: item.Highlight{ Style: m.styles.Match.Unfocused, ByteRangeUnstyledContent: matches[i].ByteRange, }, } highlights = append(highlights, highlight) } return highlights } func (m *Model[T]) showMatchesOnly() bool { return m.matchingItemsOnly && !m.matchLimitExceeded } // matchingItemsOnlyText returns the text to display when showing matching items only func matchingItemsOnlyText(matchingItemsOnly bool) string { if matchingItemsOnly { return "showing matches only" } return "" } // removeEmpty removes empty strings from a slice func removeEmpty(s []string) []string { var result []string for _, str := range s { if str != "" { result = append(result, str) } } return result } // getTextAfterFilter returns the text to display after the filter input func (m *Model[T]) getTextAfterFilter() string { if m.filterTextInput.Value() == "" { return "type to filter" } return m.getMatchCountText() } // getMatchCountText returns the formatted match count text func (m *Model[T]) getMatchCountText() string { if m.matchLimitExceeded { if m.itemDescriptor != "" { return fmt.Sprintf("(%d+ matches on %d+ %s)", m.maxMatchLimit, m.numMatchingItems, m.itemDescriptor) } return fmt.Sprintf("(%d+ matches)", m.maxMatchLimit) } if m.totalMatchesOnAllItems == 0 { return "(no matches)" } currentMatch := m.focusedMatchIdx + 1 if m.focusedMatchIdx < 0 { currentMatch = 0 } if m.itemDescriptor != "" { return fmt.Sprintf("(%d/%d matches on %d %s)", currentMatch, m.totalMatchesOnAllItems, m.numMatchingItems, m.itemDescriptor) } return fmt.Sprintf("(%d/%d matches)", currentMatch, m.totalMatchesOnAllItems) } func (m *Model[T]) navigateToNextMatch() { if len(m.allMatches) == 0 { return } m.focusedMatchIdx = (m.focusedMatchIdx + 1) % len(m.allMatches) m.afterMatchNavigation() } func (m *Model[T]) navigateToPrevMatch() { if len(m.allMatches) == 0 { return } m.focusedMatchIdx-- if m.focusedMatchIdx < 0 { m.focusedMatchIdx = len(m.allMatches) - 1 } m.afterMatchNavigation() } func (m *Model[T]) afterMatchNavigation() { m.ensureCurrentMatchInView() m.setSelectionToCurrentMatch() m.updateFocusedMatchHighlight() m.setFilterLine(m.renderFilterLine()) } const maxSearchHistorySize = 100 func (m *Model[T]) addToSearchHistory(text string) { if text == "" { return } if len(m.searchHistory) > 0 && m.searchHistory[len(m.searchHistory)-1] == text { return } m.searchHistory = append(m.searchHistory, text) if len(m.searchHistory) > maxSearchHistorySize { m.searchHistory = m.searchHistory[len(m.searchHistory)-maxSearchHistorySize:] } } func (m *Model[T]) resetSearchHistoryBrowsing() { m.searchHistoryIdx = len(m.searchHistory) m.searchHistoryDraft = "" } func (m *Model[T]) navigateSearchHistoryPrev() { if len(m.searchHistory) == 0 { return } if m.searchHistoryIdx == len(m.searchHistory) { m.searchHistoryDraft = m.filterTextInput.Value() } if m.searchHistoryIdx > 0 { m.searchHistoryIdx-- } text := m.searchHistory[m.searchHistoryIdx] m.filterTextInput.SetValue(text) m.filterTextInput.SetCursor(len(text)) } func (m *Model[T]) navigateSearchHistoryNext() { if m.searchHistoryIdx >= len(m.searchHistory) { return } m.searchHistoryIdx++ if m.searchHistoryIdx == len(m.searchHistory) { m.filterTextInput.SetValue(m.searchHistoryDraft) m.filterTextInput.SetCursor(len(m.searchHistoryDraft)) } else { text := m.searchHistory[m.searchHistoryIdx] m.filterTextInput.SetValue(text) m.filterTextInput.SetCursor(len(text)) } } func (m *Model[T]) getFocusedMatch() *viewport.Highlight { if m.focusedMatchIdx < 0 || m.focusedMatchIdx >= len(m.allMatches) { return nil } return &m.allMatches[m.focusedMatchIdx] } // getItemIdx returns the viewport item index for a match, remapping when showing matches only func (m *Model[T]) getItemIdx(match *viewport.Highlight) int { itemIdx := match.ItemIndex if m.showMatchesOnly() { if filteredIdx, ok := m.itemIdxToFilteredIdx[itemIdx]; ok { return filteredIdx } } return itemIdx } func (m *Model[T]) ensureCurrentMatchInView() { currentMatch := m.getFocusedMatch() if currentMatch == nil { return } widthRange := m.matchWidthsByMatchIdx[m.focusedMatchIdx] m.vp.EnsureItemInView(m.getItemIdx(currentMatch), widthRange.Start, widthRange.End, m.verticalPad, m.horizontalPad) } func (m *Model[T]) setSelectionToCurrentMatch() { if !m.vp.GetSelectionEnabled() { return } currentMatch := m.getFocusedMatch() if currentMatch == nil { return } itemIdx := m.getItemIdx(currentMatch) if m.vp.GetSelectedItemIdx() != itemIdx { m.vp.SetSelectedItemIdx(itemIdx) } } ================================================ FILE: modules/viewport/filterableviewport/filterableviewport_filterlineposition_test.go ================================================ package filterableviewport import ( "testing" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestFilterLinePositionTop(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Filter line should appear at top (just below header, which is empty) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "No Filter", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionTopWithActiveFilter(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Apply a filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "[exact] Filter: l (1/3 matches on 3 items)", focusedStyle.Render("l") + "ine 1", unfocusedStyle.Render("l") + "ine 2", unfocusedStyle.Render("l") + "ine 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionTopWithHeader(t *testing.T) { fv := makeFilterableViewport( 50, 6, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetHeader([]string{"My Header"}) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Header, then filter line, then content, then footer expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "My Header", "No Filter", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionTopDuringEditing(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Enter filter editing mode fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('t')) fv, _ = fv.Update(internal.MakeKeyMsg('e')) // Filter line with cursor should appear at top expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "[exact] Filter: te" + cursorStyle.Render(" ") + " (no matches)", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionBottomIsDefault(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), // No WithFilterLinePosition - should default to bottom }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Filter line should appear at bottom (default behavior) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionTopScrolling(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", "line 4", "line 5", "line 6", })) // Filter line at top, 3 content lines visible (height 5 - 1 filter - 1 footer = 3) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "No Filter", "line 1", "line 2", "line 3", footerStyle.Render("50% (3/6)"), }) internal.CmpStr(t, expectedView, fv.View()) // Scroll down fv, _ = fv.Update(downKeyMsg) expectedAfterScroll := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "No Filter", "line 2", "line 3", "line 4", footerStyle.Render("66% (4/6)"), }) internal.CmpStr(t, expectedAfterScroll, fv.View()) } func TestFilterLinePositionTopWithWrap(t *testing.T) { fv := makeFilterableViewport( 15, 7, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{ WithEmptyText[object]("None"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "short", "longer text that wraps", })) // Filter line at top, then content (with wrapping) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "None", "short", "longer text tha", "t wraps", "", "", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionTopMatchNavigation(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "apricot", })) // Apply filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // First match focused (apple=1, banana=3, apricot=1 = 5 total matches) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "[exact] Filter: a (1/5 matches on 3 items)", focusedStyle.Render("a") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), unfocusedStyle.Render("a") + "pricot", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Navigate to next match fv, _ = fv.Update(nextMatchKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "[exact] Filter: a (2/5 matches on 3 items)", unfocusedStyle.Render("a") + "pple", "b" + focusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), unfocusedStyle.Render("a") + "pricot", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } ================================================ FILE: modules/viewport/filterableviewport/filterableviewport_filterlineprefix_test.go ================================================ package filterableviewport import ( "testing" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestFilterLinePrefixNoFilter(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("Prefix"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Prefix should be prepended to the empty text expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "Prefix No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixWithActiveFilter(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("Prefix"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Apply a filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) // Prefix should be prepended to the filter content expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("l") + "ine 1", unfocusedStyle.Render("l") + "ine 2", unfocusedStyle.Render("l") + "ine 3", "Prefix [exact] Filter: l (1/3 matches on 3 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixDuringEditing(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithFilterLinePrefix[object]("Prefix"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Enter filter editing mode fv, _ = fv.Update(filterKeyMsg) // Prefix should be prepended even during editing expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "Prefix [exact] Filter: " + cursorStyle.Render(" ") + " type to filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixWithPositionTop(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("Prefix"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Prefix at top position expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Prefix No Filter", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixEmpty(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object](""), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Empty prefix should behave the same as no prefix expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixWithFilterCancelRestore(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("Prefix"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Initially shows prefix with empty text expectedInitial := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "Prefix No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedInitial, fv.View()) // Apply filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) // Cancel filter - should go back to prefix + empty text fv, _ = fv.Update(cancelFilterKeyMsg) internal.CmpStr(t, expectedInitial, fv.View()) } func TestFilterLinePrefixTruncation(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("VeryLongLabelText"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", })) // Prefix + empty text exceeds width, should truncate expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "VeryLongLabelText...", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixAndPositionTopWithActiveFilter(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("Prefix"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Apply a filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) // Prefix at top with active filter expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Prefix [exact] Filter: l (1/3 matches on 3 items)", focusedStyle.Render("l") + "ine 1", unfocusedStyle.Render("l") + "ine 2", unfocusedStyle.Render("l") + "ine 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePrefixStyled(t *testing.T) { prefixStyle := lipgloss.NewStyle().Bold(true) fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object](prefixStyle.Render("Prefix:")), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Styled prefix should render correctly expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", prefixStyle.Render("Prefix:") + " No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterLinePrefixNoFilter(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Set prefix after construction fv.SetFilterLinePrefix("Prefix") expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "Prefix No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterLinePrefixWithActiveFilter(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Apply a filter first fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) // Set prefix after filter is active fv.SetFilterLinePrefix("Prefix") expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("l") + "ine 1", unfocusedStyle.Render("l") + "ine 2", unfocusedStyle.Render("l") + "ine 3", "Prefix [exact] Filter: l (1/3 matches on 3 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterLinePrefixChangesExistingPrefix(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("OldPrefix"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Verify old prefix is shown expectedOld := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "OldPrefix No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedOld, fv.View()) // Change prefix fv.SetFilterLinePrefix("NewPrefix") expectedNew := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "NewPrefix No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedNew, fv.View()) } func TestSetFilterLinePrefixToEmpty(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePrefix[object]("Prefix"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Clear prefix fv.SetFilterLinePrefix("") expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterLinePrefixWithPositionTop(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Set prefix with top position fv.SetFilterLinePrefix("Prefix") expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Prefix No Filter", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterLinePrefixPreservedAfterFilterCycle(t *testing.T) { fv := makeFilterableViewport( 60, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Set prefix after construction fv.SetFilterLinePrefix("Prefix") expectedInitial := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "Prefix No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedInitial, fv.View()) // Apply then cancel filter - prefix should be preserved fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) fv, _ = fv.Update(cancelFilterKeyMsg) internal.CmpStr(t, expectedInitial, fv.View()) } func TestSetWidthReRendersFilterLine(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), WithFilterLinePosition[object](FilterLineTop), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Set prefix while width is normal — filter line renders correctly fv.SetFilterLinePrefix("Prefix") expectedNormal := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Prefix No Filter", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedNormal, fv.View()) // Shrink to zero width (simulates hidden page in fullscreen) fv.SetWidth(0) // Change prefix while width is 0 (simulates focus change while hidden) fv.SetFilterLinePrefix("NewPrefix") // Restore width — filter line should re-render with new prefix fv.SetWidth(50) expectedRestored := internal.Pad(50, fv.GetHeight(), []string{ "NewPrefix No Filter", "line 1", "line 2", "line 3", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedRestored, fv.View()) } ================================================ FILE: modules/viewport/filterableviewport/filterableviewport_saving_test.go ================================================ package filterableviewport import ( "os" "path/filepath" "strings" "testing" "charm.land/bubbles/v2/key" tea "charm.land/bubbletea/v2" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) type saveTestObject struct { item item.Item } func (o saveTestObject) GetItem() item.Item { return o.item } var ( saveKey = key.NewBinding(key.WithKeys("ctrl+s")) saveKeyMsg = tea.KeyPressMsg{Code: 's', Mod: tea.ModCtrl} savingEnterKeyMsg = tea.KeyPressMsg{Code: tea.KeyEnter, Text: "enter"} savingEscapeKeyMsg = tea.KeyPressMsg{Code: tea.KeyEscape, Text: "esc"} ) func newSaveTestFilterableViewport(t *testing.T) (*Model[saveTestObject], string) { t.Helper() tmpDir := t.TempDir() vp := viewport.New[saveTestObject](80, 24, viewport.WithFileSaving[saveTestObject](tmpDir, saveKey), ) fv := New[saveTestObject](vp) return fv, tmpDir } func setSaveTestObjects(fv *Model[saveTestObject], lines []string) { objects := make([]saveTestObject, len(lines)) for i, line := range lines { objects[i] = saveTestObject{item: item.NewItem(line)} } fv.SetObjects(objects) } func TestFilterableViewport_AllHotkeysTypedIntoFilename(t *testing.T) { fv, tmpDir := newSaveTestFilterableViewport(t) setSaveTestObjects(fv, []string{"test content"}) // enter filename mode fv, _ = fv.Update(saveKeyMsg) if !strings.Contains(fv.View(), "Save as:") { t.Fatal("expected to be in filename entry mode") } // type all filterableviewport hotkeys - should go into filename, not trigger actions fv, _ = fv.Update(internal.MakeKeyMsg('/')) // filter key fv, _ = fv.Update(internal.MakeKeyMsg('r')) // regex filter key fv, _ = fv.Update(internal.MakeKeyMsg('n')) // next match key fv, _ = fv.Update(internal.MakeKeyMsg('N')) // prev match key fv, _ = fv.Update(internal.MakeKeyMsg('o')) // toggle matching items only key // filter should not be activated if fv.FilterFocused() { t.Error("filter should not be focused during filename entry") } // save and verify filename contains all typed keys _, cmd := fv.Update(savingEnterKeyMsg) cmd() expectedPath := filepath.Join(tmpDir, "/rnNo.txt") if _, err := os.Stat(expectedPath); os.IsNotExist(err) { t.Errorf("expected file %s to exist", expectedPath) } } func TestFilterableViewport_FilterWorksAfterCancelingSave(t *testing.T) { fv, _ := newSaveTestFilterableViewport(t) setSaveTestObjects(fv, []string{"line1", "line2"}) // enter save mode then cancel fv, _ = fv.Update(saveKeyMsg) fv, _ = fv.Update(savingEscapeKeyMsg) // filter should work normally fv, _ = fv.Update(internal.MakeKeyMsg('/')) if !fv.FilterFocused() { t.Error("expected filter to be focused after canceling save") } } func TestFilterableViewport_SaveDuringActiveFilter(t *testing.T) { fv, tmpDir := newSaveTestFilterableViewport(t) setSaveTestObjects(fv, []string{"foo one", "bar two", "foo three"}) // apply a filter fv, _ = fv.Update(internal.MakeKeyMsg('/')) for _, r := range "foo" { fv, _ = fv.Update(internal.MakeKeyMsg(r)) } fv, _ = fv.Update(savingEnterKeyMsg) // save with default filename fv, _ = fv.Update(saveKeyMsg) _, cmd := fv.Update(savingEnterKeyMsg) cmd() // find and read the saved file files, _ := os.ReadDir(tmpDir) if len(files) != 1 { t.Fatalf("expected 1 file, got %d", len(files)) } content, _ := os.ReadFile(filepath.Join(tmpDir, files[0].Name())) //nolint:gosec // test file path is safe contentStr := string(content) // should contain all lines, not just filtered ones if !strings.Contains(contentStr, "foo one") || !strings.Contains(contentStr, "bar two") || !strings.Contains(contentStr, "foo three") { t.Errorf("expected all lines in saved content, got: %s", contentStr) } } ================================================ FILE: modules/viewport/filterableviewport/filterableviewport_searchhistory_test.go ================================================ package filterableviewport import ( "fmt" "testing" tea "charm.land/bubbletea/v2" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/internal" ) var upKeyMsg = tea.KeyPressMsg{Code: tea.KeyUp, Text: "up"} func makeSearchHistoryFV() *Model[object] { fv := makeFilterableViewport( 40, 10, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "alpha", "bravo", "charlie", "delta", "echo", })) return fv } func typeFilter(fv *Model[object], text string) { for _, ch := range text { fv.Update(internal.MakeKeyMsg(ch)) } } func applyFilter(fv *Model[object], text string) { fv.Update(cancelFilterKeyMsg) // clear any existing filter text fv.Update(filterKeyMsg) typeFilter(fv, text) fv.Update(applyFilterKeyMsg) } func TestSearchHistoryBasic(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") applyFilter(fv, "bravo") // re-enter filter mode, Up shows most recent fv.Update(filterKeyMsg) fv.Update(upKeyMsg) if fv.filterTextInput.Value() != "bravo" { t.Errorf("expected 'bravo', got %q", fv.filterTextInput.Value()) } // Up again shows older fv.Update(upKeyMsg) if fv.filterTextInput.Value() != "alpha" { t.Errorf("expected 'alpha', got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryNoConsecutiveDuplicates(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") applyFilter(fv, "alpha") if len(fv.searchHistory) != 1 { t.Errorf("expected 1 history entry, got %d", len(fv.searchHistory)) } // non-consecutive duplicate is allowed applyFilter(fv, "bravo") applyFilter(fv, "alpha") if len(fv.searchHistory) != 3 { t.Errorf("expected 3 history entries, got %d: %v", len(fv.searchHistory), fv.searchHistory) } } func TestSearchHistoryDraftPreserved(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") // enter filter mode with clean text and type a draft fv.Update(cancelFilterKeyMsg) fv.Update(filterKeyMsg) typeFilter(fv, "draft") // Up should save draft and show history fv.Update(upKeyMsg) if fv.filterTextInput.Value() != "alpha" { t.Errorf("expected 'alpha', got %q", fv.filterTextInput.Value()) } // Down should return to draft fv.Update(downKeyMsg) if fv.filterTextInput.Value() != "draft" { t.Errorf("expected 'draft', got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryUpAtOldest(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") applyFilter(fv, "bravo") fv.Update(filterKeyMsg) fv.Update(upKeyMsg) // bravo fv.Update(upKeyMsg) // alpha fv.Update(upKeyMsg) // should stay at alpha if fv.filterTextInput.Value() != "alpha" { t.Errorf("expected 'alpha', got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryDownAtDraft(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") fv.Update(cancelFilterKeyMsg) fv.Update(filterKeyMsg) typeFilter(fv, "current") // Down at draft position should be no-op fv.Update(downKeyMsg) if fv.filterTextInput.Value() != "current" { t.Errorf("expected 'current', got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryEmptyNotSaved(t *testing.T) { fv := makeSearchHistoryFV() // apply with empty text fv.Update(filterKeyMsg) fv.Update(applyFilterKeyMsg) if len(fv.searchHistory) != 0 { t.Errorf("expected 0 history entries, got %d", len(fv.searchHistory)) } } func TestSearchHistoryResetOnReEnter(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") applyFilter(fv, "bravo") // enter filter mode, browse history fv.Update(filterKeyMsg) fv.Update(upKeyMsg) // bravo fv.Update(upKeyMsg) // alpha // cancel and re-enter fv.Update(cancelFilterKeyMsg) fv.Update(filterKeyMsg) // should start at draft (empty), not mid-browse fv.Update(upKeyMsg) if fv.filterTextInput.Value() != "bravo" { t.Errorf("expected 'bravo' (most recent), got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryUpDownNoHistory(t *testing.T) { fv := makeSearchHistoryFV() // enter filter mode with no history fv.Update(filterKeyMsg) typeFilter(fv, "test") // Up/Down should not change text (no history to browse) fv.Update(upKeyMsg) if fv.filterTextInput.Value() != "test" { t.Errorf("expected 'test' unchanged, got %q", fv.filterTextInput.Value()) } fv.Update(downKeyMsg) if fv.filterTextInput.Value() != "test" { t.Errorf("expected 'test' unchanged, got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryLimit(t *testing.T) { fv := makeSearchHistoryFV() for i := range maxSearchHistorySize + 1 { applyFilter(fv, fmt.Sprintf("search%d", i)) } if len(fv.searchHistory) != maxSearchHistorySize { t.Errorf("expected %d history entries, got %d", maxSearchHistorySize, len(fv.searchHistory)) } // oldest entry should have been trimmed if fv.searchHistory[0] != "search1" { t.Errorf("expected oldest entry 'search1', got %q", fv.searchHistory[0]) } // newest should be the last one if fv.searchHistory[len(fv.searchHistory)-1] != fmt.Sprintf("search%d", maxSearchHistorySize) { t.Errorf("expected newest entry 'search%d', got %q", maxSearchHistorySize, fv.searchHistory[len(fv.searchHistory)-1]) } } func TestSearchHistoryUpDownNotEditingDoesNotBrowseHistory(t *testing.T) { fv := makeSearchHistoryFV() applyFilter(fv, "alpha") applyFilter(fv, "bravo") // cancel filter so we're not editing fv.Update(cancelFilterKeyMsg) // verify we're not in editing mode if fv.filterMode != filterModeOff { t.Fatalf("expected filterModeOff, got %d", fv.filterMode) } // down/up should not change filter text input (should go to viewport) fv.Update(downKeyMsg) fv.Update(upKeyMsg) // re-enter filter mode - text should be empty (cleared by cancel), not a history entry fv.Update(filterKeyMsg) if fv.filterTextInput.Value() != "" { t.Errorf("expected empty filter text, got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryCaseInsensitiveNoPrefix(t *testing.T) { fv := makeSearchHistoryFV() // apply a plain exact search applyFilter(fv, "butt") // enter case-insensitive mode and browse history fv.Update(cancelFilterKeyMsg) fv.Update(caseInsensitiveFilterKeyMsg) fv.Update(upKeyMsg) // history text is stored without any prefix — mode is separate if fv.filterTextInput.Value() != "butt" { t.Errorf("expected 'butt', got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryCaseInsensitiveStoredPlain(t *testing.T) { fv := makeSearchHistoryFV() // apply a case-insensitive search fv.Update(cancelFilterKeyMsg) fv.Update(caseInsensitiveFilterKeyMsg) typeFilter(fv, "butt") fv.Update(applyFilterKeyMsg) // re-enter case-insensitive mode and browse history fv.Update(cancelFilterKeyMsg) fv.Update(caseInsensitiveFilterKeyMsg) fv.Update(upKeyMsg) // stored as plain "butt", no (?i) prefix if fv.filterTextInput.Value() != "butt" { t.Errorf("expected 'butt', got %q", fv.filterTextInput.Value()) } } func TestSearchHistoryRegexModeNoPrefix(t *testing.T) { fv := makeSearchHistoryFV() // apply a plain exact search applyFilter(fv, "butt") // enter regex mode and browse history fv.Update(cancelFilterKeyMsg) fv.Update(regexFilterKeyMsg) fv.Update(upKeyMsg) // should show plain text without any prefix if fv.filterTextInput.Value() != "butt" { t.Errorf("expected 'butt', got %q", fv.filterTextInput.Value()) } } ================================================ FILE: modules/viewport/filterableviewport/filterableviewport_test.go ================================================ package filterableviewport import ( "fmt" "strings" "testing" "time" "charm.land/bubbles/v2/key" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) type object struct { item item.Item } func (i object) GetItem() item.Item { return i.item } var _ viewport.Object = object{} var ( filterKeyMsg = internal.MakeKeyMsg('/') regexFilterKeyMsg = internal.MakeKeyMsg('r') caseInsensitiveFilterKeyMsg = internal.MakeKeyMsg('i') applyFilterKeyMsg = tea.KeyPressMsg{Code: tea.KeyEnter, Text: "enter"} cancelFilterKeyMsg = tea.KeyPressMsg{Code: tea.KeyEscape, Text: "esc"} toggleMatchesKeyMsg = internal.MakeKeyMsg('o') nextMatchKeyMsg = internal.MakeKeyMsg('n') prevMatchKeyMsg = internal.MakeKeyMsg('N') downKeyMsg = tea.KeyPressMsg{Code: tea.KeyDown, Text: "down"} footerStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("8")) selectedItemStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("15")) viewportStyles = viewport.Styles{ FooterStyle: footerStyle, SelectedItemStyle: selectedItemStyle, } // cursorStyle matches the default virtual cursor rendering from textinput v2: // cursor.Model.View() renders Style.Inline(true).Reverse(true).Render(char) // where Style = lipgloss.NewStyle().Foreground(cursorColor) and cursorColor defaults to "7" cursorStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("7")).Reverse(true) focusedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("0")).Background(lipgloss.Color("11")) focusedIfSelectedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("3")) unfocusedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("7")).Background(lipgloss.Color("12")) matchStyles = MatchStyles{ Focused: focusedStyle, FocusedIfSelected: focusedStyle, Unfocused: unfocusedStyle, } filterableViewportStyles = Styles{ Match: matchStyles, } ) func makeFilterableViewport( width int, height int, vpOptions []viewport.Option[object], fvOptions []Option[object], ) *Model[object] { // use default viewport test styles, will be overridden by options if passed in defaultTestVpStylesOption := viewport.WithStyles[object](viewportStyles) vpOptions = append([]viewport.Option[object]{defaultTestVpStylesOption}, vpOptions...) // use default filterable viewport test styles and item descriptor, will be overridden by options if passed in defaultTestFvStylesOption := WithStyles[object](filterableViewportStyles) defaultTestItemDescriptorOption := WithItemDescriptor[object]("items") fvOptions = append([]Option[object]{defaultTestFvStylesOption, defaultTestItemDescriptorOption}, fvOptions...) vp := viewport.New[object](width, height, vpOptions...) return New[object](vp, fvOptions...) } func TestNew(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "Line 1", "Line 2", "Line 3", })) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Line 1", "Line 2", "No Filter", footerStyle.Render("66% (2/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestNewLongText(t *testing.T) { fv := makeFilterableViewport( 10, // emptyText is longer than this 5, // increased height []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("Nada Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "Line 1", "Line 2", "Line 3", })) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Line 1", "Line 2", "Line 3", "Nada Fi...", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestNewWidthHeight(t *testing.T) { fv := makeFilterableViewport( 25, 8, []viewport.Option[object]{}, []Option[object]{}, ) if fv.GetWidth() != 25 { t.Errorf("expected width 25, got %d", fv.GetWidth()) } if fv.GetHeight() != 8 { t.Errorf("expected height 8, got %d", fv.GetHeight()) } } func TestZeroDimensions(t *testing.T) { fv := makeFilterableViewport( 0, 0, []viewport.Option[object]{}, []Option[object]{}, ) if fv.GetWidth() != 0 { t.Errorf("expected width 0, got %d", fv.GetWidth()) } if fv.GetHeight() != 0 { t.Errorf("expected height 0, got %d", fv.GetHeight()) } internal.CmpStr(t, "", fv.View()) } func TestNegativeDimensions(t *testing.T) { fv := makeFilterableViewport( -5, -3, []viewport.Option[object]{}, []Option[object]{}, ) if fv.GetWidth() != 0 { t.Errorf("expected width 0 for negative input, got %d", fv.GetWidth()) } if fv.GetHeight() != 0 { t.Errorf("expected height 0 for negative input, got %d", fv.GetHeight()) } internal.CmpStr(t, "", fv.View()) } func TestSetWidthSetHeight(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetWidth(30) if fv.GetWidth() != 30 { t.Errorf("expected width 30, got %d", fv.GetWidth()) } fv.SetHeight(6) if fv.GetHeight() != 6 { t.Errorf("expected height 6, got %d", fv.GetHeight()) } } func TestFilterFocusedInitial(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{}, ) if fv.FilterFocused() { t.Error("filter should not be focused initially") } } func TestEmptyContent(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No filter"), }, ) fv.SetObjects([]object{}) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "", "", "No filter", "", }) internal.CmpStr(t, expectedView, fv.View()) } func TestWithMatchesOnlyTrue(t *testing.T) { fv := makeFilterableViewport( 80, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithMatchingItemsOnly[object](true), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "cherry", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a" + focusedStyle.Render("p") + unfocusedStyle.Render("p") + "le", "", "[exact] Filter: p" + cursorStyle.Render(" ") + " (1/2 matches on 1 items) showing matches only", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestWithMatchesOnlyFalse(t *testing.T) { fv := makeFilterableViewport( 80, 5, // increased height []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithMatchingItemsOnly[object](false), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "cherry", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a" + focusedStyle.Render("p") + unfocusedStyle.Render("p") + "le", "banana", "cherry", "[exact] Filter: p" + cursorStyle.Render(" ") + " (1/2 matches on 1 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestNoItemDescriptor(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithItemDescriptor[object](""), // override the test default }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "cherry", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a" + focusedStyle.Render("p") + unfocusedStyle.Render("p") + "le", "banana", "cherry", "[exact] p (1/2 matches)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestWithCanToggleMatchesOnlyTrue(t *testing.T) { fv := makeFilterableViewport( 80, 4, []viewport.Option[object]{}, []Option[object]{ WithCanToggleMatchingItemsOnly[object](true), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "cherry", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a" + focusedStyle.Render("p") + unfocusedStyle.Render("p") + "le", "banana", "[exact] p (1/2 matches on 1 items)", footerStyle.Render("66% (2/3)"), }) internal.CmpStr(t, expectedView, fv.View()) fv, _ = fv.Update(toggleMatchesKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a" + focusedStyle.Render("p") + unfocusedStyle.Render("p") + "le", "", "[exact] p (1/2 matches on 1 items) showing matches only", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestWithCanToggleMatchesOnlyFalse(t *testing.T) { fv := makeFilterableViewport( 80, 4, []viewport.Option[object]{}, []Option[object]{ WithCanToggleMatchingItemsOnly[object](false), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "cherry", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a" + focusedStyle.Render("p") + unfocusedStyle.Render("p") + "le", "banana", "[exact] p (1/2 matches on 1 items)", footerStyle.Render("66% (2/3)"), }) internal.CmpStr(t, expectedView, fv.View()) fv, _ = fv.Update(toggleMatchesKeyMsg) internal.CmpStr(t, expectedView, fv.View()) } func TestNilContent(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(nil) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "", "", "No Filter", "", }) internal.CmpStr(t, expectedView, fv.View()) } func TestDefaultText(t *testing.T) { fv := makeFilterableViewport( 40, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"test"})) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "test", "", "No Filter", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "test", "", "[exact] p (no matches)", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterKeyFocus(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) if !fv.FilterFocused() { t.Error("filter should be focused after pressing filter key") } } func TestRegexFilterKeyFocus(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(regexFilterKeyMsg) if !fv.FilterFocused() { t.Error("filter should be focused after pressing regex filter key") } } func TestCaseInsensitiveFilterKeyEmpty(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"Apple", "banana"})) fv, _ = fv.Update(caseInsensitiveFilterKeyMsg) if !fv.FilterFocused() { t.Error("filter should be focused after pressing case insensitive filter key") } fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // 'a' matches 'A' in Apple and 3 'a's in banana = 4 matches on 2 items expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("A") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[iregex] Filter: a (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSwitchFromExactToCaseInsensitive(t *testing.T) { fv := makeFilterableViewport( 60, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"Apple", "banana"})) // exact filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // exact filter matches only lowercase 'a' expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Apple", "b" + focusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[exact] Filter: a (1/3 matches on 1 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) // 'i' to switch to case-insensitive mode fv, _ = fv.Update(caseInsensitiveFilterKeyMsg) // now matches both cases, no (?i) in text, label is [iregex] expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("A") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[iregex] Filter: a" + cursorStyle.Render(" ") + " (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSwitchFromCaseInsensitiveToExact(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"Apple", "banana"})) // start case-insensitive filter fv, _ = fv.Update(caseInsensitiveFilterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // case-insensitive matching (matches both 'A' and 'a') expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("A") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[iregex] Filter: a (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) // switch to exact mode with '/' fv, _ = fv.Update(filterKeyMsg) // filter text preserved as-is, just switches to exact mode expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "Apple", "b" + focusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[exact] Filter: a" + cursorStyle.Render(" ") + " (1/3 matches on 1 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestCaseInsensitiveKeyReEntersEditingMode(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"Apple", "banana"})) // start case-insensitive filter fv, _ = fv.Update(caseInsensitiveFilterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // case-insensitive matching expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("A") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[iregex] Filter: a (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) // press 'i' again - should just re-enter editing mode fv, _ = fv.Update(caseInsensitiveFilterKeyMsg) // still case-insensitive, filter should be focused for editing expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("A") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[iregex] Filter: a" + cursorStyle.Render(" ") + " (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestApplyFilterKey(t *testing.T) { fv := makeFilterableViewport( 40, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) if fv.FilterFocused() { t.Error("filter should not be focused after applying filter") } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("a") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "[exact] a (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestCancelFilterKey(t *testing.T) { fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(cancelFilterKeyMsg) if fv.FilterFocused() { t.Error("filter should not be focused after canceling") } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "No Filter", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestRegexFilterValidPattern(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana", "apricot"})) fv, _ = fv.Update(regexFilterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(internal.MakeKeyMsg('p')) fv, _ = fv.Update(internal.MakeKeyMsg('+')) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("app") + "le", "banana", "[regex] Filter: ap+" + cursorStyle.Render(" ") + " (1/2 matches on 2 items)", footerStyle.Render("66% (2/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestRegexFilterInvalidPattern(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(regexFilterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('[')) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "[regex] Filter: [" + cursorStyle.Render(" ") + " (no matches)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestStyleOverlay(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetSelectionEnabled(true) fv.SetObjects(stringsToItems([]string{ "apple pie", internal.RedFg.Render("apple") + " pie " + internal.BlueFg.Render("yum"), })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "apple pie" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // on selected lines, match highlights keep their original styles and selection fills gaps // first item is selected, has focused match covering entire content "apple pie" expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple pie"), unfocusedStyle.Render("apple pie") + " " + internal.BlueFg.Render("yum"), "[exact] apple pie (1/2 matches on 2 items)", footerStyle.Render("50% (1/2)"), }) internal.CmpStr(t, expectedView, fv.View()) // move selection down to second item: match keeps unfocused style, selection fills " yum" fv, _ = fv.Update(downKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple pie"), unfocusedStyle.Render("apple pie") + selectedItemStyle.Render(" yum"), "[exact] apple pie (1/2 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestRegexFilterMultipleMatchesInSingleLine(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{ "the cat sat on the mat", "dog", "another the and the end", })) fv, _ = fv.Update(regexFilterKeyMsg) // use regex pattern \bthe\b to match whole word "the" for _, c := range "\\bthe\\b" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // should focus on first match in first line expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("the") + " cat sat on " + unfocusedStyle.Render("the") + " mat", "dog", "another " + unfocusedStyle.Render("the") + " and " + unfocusedStyle.Render("the") + " end", "", "[regex] Filter: \\bthe\\b (1/4 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) // navigate to second match (still in first line) fv, _ = fv.Update(nextMatchKeyMsg) expectedSecondMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("the") + " cat sat on " + focusedStyle.Render("the") + " mat", "dog", "another " + unfocusedStyle.Render("the") + " and " + unfocusedStyle.Render("the") + " end", "", "[regex] Filter: \\bthe\\b (2/4 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedSecondMatch, fv.View()) // navigate to third match (third line, first match) fv, _ = fv.Update(nextMatchKeyMsg) expectedThirdMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("the") + " cat sat on " + unfocusedStyle.Render("the") + " mat", "dog", "another " + focusedStyle.Render("the") + " and " + unfocusedStyle.Render("the") + " end", "", "[regex] Filter: \\bthe\\b (3/4 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedThirdMatch, fv.View()) // navigate to fourth match (third line, second match) fv, _ = fv.Update(nextMatchKeyMsg) expectedFourthMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("the") + " cat sat on " + unfocusedStyle.Render("the") + " mat", "dog", "another " + unfocusedStyle.Render("the") + " and " + focusedStyle.Render("the") + " end", "", "[regex] Filter: \\bthe\\b (4/4 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedFourthMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedFourthMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedThirdMatch, fv.View()) } func TestNoMatchesShowsNoMatchesText(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('x')) fv, _ = fv.Update(internal.MakeKeyMsg('y')) fv, _ = fv.Update(internal.MakeKeyMsg('z')) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "[exact] xyz" + cursorStyle.Render(" ") + " (no matches)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestWithFilterModes(t *testing.T) { customModes := []FilterMode{ ExactFilterMode(key.NewBinding(key.WithKeys("g"))), } fv := makeFilterableViewport( 20, 4, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object](customModes), }, ) fv.SetObjects(stringsToItems([]string{"test"})) fv, _ = fv.Update(filterKeyMsg) // '/' should not match custom key 'g' if fv.FilterFocused() { t.Error("filter should not be focused with custom filter modes") } } func TestViewportControls(t *testing.T) { fv := makeFilterableViewport( 20, 3, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"line1", "line2", "line3"})) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line1", "No Filter", footerStyle.Render("33% (1/3)"), }) internal.CmpStr(t, expectedView, fv.View()) fv, _ = fv.Update(downKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line2", "No Filter", footerStyle.Render("66% (2/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestApplyEmptyFilterShowsWhenEmptyText(t *testing.T) { fv := makeFilterableViewport( 30, 4, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No filter applied"), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "No filter applied", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestEditingEmptyFilterShowsEditingMessage(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "[exact] Filter: " + cursorStyle.Render(" ") + " type to filter", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSpecialKeysWhileFiltering(t *testing.T) { fv := makeFilterableViewport( 80, 4, []viewport.Option[object]{}, []Option[object]{ WithCanToggleMatchingItemsOnly[object](true), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "book", "food", "cherry", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('p')) fv, _ = fv.Update(toggleMatchesKeyMsg) // 'o' fv, _ = fv.Update(nextMatchKeyMsg) // 'n' fv, _ = fv.Update(prevMatchKeyMsg) // 'N' fv, _ = fv.Update(filterKeyMsg) // '/' fv, _ = fv.Update(regexFilterKeyMsg) // 'r' expectedViewAfterO := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "book", "[exact] ponN/r" + cursorStyle.Render(" ") + " (no matches)", footerStyle.Render("50% (2/4)"), }) internal.CmpStr(t, expectedViewAfterO, fv.View()) } func TestAnsiEscapeCodesNotMatched(t *testing.T) { fv := makeFilterableViewport( 80, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ internal.RedFg.Render("apple"), internal.RedFg.Render("book"), internal.RedFg.Render("food"), internal.RedFg.Render("cherry"), })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "x1b" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ internal.RedFg.Render("apple"), internal.RedFg.Render("book"), "[exact] x1b (no matches)", footerStyle.Render("50% (2/4)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestMatchNavigationWithNoMatches(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('x')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "[exact] x (no matches)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedView, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedView, fv.View()) } func TestMatchNavigationWithOverlappingMatches(t *testing.T) { fv := makeFilterableViewport( 50, 4, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{"aaa"})) fv, _ = fv.Update(filterKeyMsg) for _, c := range "aa" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aa") + "a", "", "[exact] aa (1/1 matches on 1 items)", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) } func TestMatchNavigationWithAllItemsWrap(t *testing.T) { fv := makeFilterableViewport( 7, 6, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{ WithStyles[object](Styles{ Match: matchStyles, }), WithMatchingItemsOnly[object](false), WithEmptyText[object]("None"), }, ) fv.SetObjects(stringsToItems([]string{ "hi there", "hi over there", "no match", })) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hi ther", "e", "hi over", " there", "None", footerStyle.Render("66% ..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(filterKeyMsg) for _, c := range "there" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hi " + focusedStyle.Render("ther"), focusedStyle.Render("e"), "hi over", " " + unfocusedStyle.Render("there"), "[exa...", footerStyle.Render("66% ..."), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedSecondMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hi " + unfocusedStyle.Render("ther"), unfocusedStyle.Render("e"), "hi over", " " + focusedStyle.Render("there"), "[exa...", footerStyle.Render("66% ..."), }) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) } func TestMatchNavigationWithMatchingItemsOnlyWrap(t *testing.T) { fv := makeFilterableViewport( 7, 6, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{ WithStyles[object](Styles{ Match: matchStyles, }), WithMatchingItemsOnly[object](true), WithEmptyText[object]("None"), }, ) fv.SetObjects(stringsToItems([]string{ "hi there", "hi over there", "no match", })) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hi ther", "e", "hi over", " there", "None", footerStyle.Render("66% ..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(filterKeyMsg) for _, c := range "there" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hi " + focusedStyle.Render("ther"), focusedStyle.Render("e"), "hi over", " " + unfocusedStyle.Render("there"), "[exa...", footerStyle.Render("100%..."), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedSecondMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hi " + unfocusedStyle.Render("ther"), unfocusedStyle.Render("e"), "hi over", " " + focusedStyle.Render("there"), "[exa...", footerStyle.Render("100%..."), }) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) } func TestMatchNavigationWrapLineOffset(t *testing.T) { fv := makeFilterableViewport( 20, 5, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ strings.Repeat("a", 100) + "goose" + strings.Repeat("a", 100), })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "goose" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ strings.Repeat("a", 20), strings.Repeat("a", 20), focusedStyle.Render("goose") + strings.Repeat("a", 15), "[exact] goose (1...", footerStyle.Render("99% (1/1)"), }) internal.CmpStr(t, expected, fv.View()) } func TestMatchNavigationWrappedLinesWithMatches(t *testing.T) { fv := makeFilterableViewport( 4, 6, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ strings.Repeat("a", 10), strings.Repeat("b", 15), })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "aaa" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aaa") + unfocusedStyle.Render("a"), unfocusedStyle.Render("aa") + unfocusedStyle.Render("aa"), unfocusedStyle.Render("a") + "a", "bbbb", "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("aaa") + focusedStyle.Render("a"), focusedStyle.Render("aa") + unfocusedStyle.Render("aa"), unfocusedStyle.Render("a") + "a", "bbbb", "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(cancelFilterKeyMsg) fv, _ = fv.Update(filterKeyMsg) for _, c := range "bbb" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "aaaa", "aaaa", "aa", focusedStyle.Render("bbb") + unfocusedStyle.Render("b"), "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "aaaa", "aa", unfocusedStyle.Render("bbb") + focusedStyle.Render("b"), focusedStyle.Render("bb") + unfocusedStyle.Render("bb"), "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) } func TestMatchNavigationWrappedLinesWithWrappedMatches(t *testing.T) { fv := makeFilterableViewport( 4, 5, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ strings.Repeat("a", 10), strings.Repeat("a", 15), })) fv, _ = fv.Update(filterKeyMsg) for range 5 { fv, _ = fv.Update(internal.MakeKeyMsg('a')) } fv, _ = fv.Update(applyFilterKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aaaa"), focusedStyle.Render("a") + unfocusedStyle.Render("aaa"), unfocusedStyle.Render("aa"), "[...", footerStyle.Render("5..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("aaaa"), unfocusedStyle.Render("a") + focusedStyle.Render("aaa"), focusedStyle.Render("aa"), "[...", footerStyle.Render("5..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("aa"), focusedStyle.Render("aaaa"), focusedStyle.Render("a") + unfocusedStyle.Render("aaa"), "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("aaaa"), unfocusedStyle.Render("a") + focusedStyle.Render("aaa"), focusedStyle.Render("aa") + unfocusedStyle.Render("aa"), "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("a") + unfocusedStyle.Render("aaa"), unfocusedStyle.Render("aa") + focusedStyle.Render("aa"), focusedStyle.Render("aaa"), "[...", footerStyle.Render("1..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("a") + focusedStyle.Render("aaa"), focusedStyle.Render("aa") + unfocusedStyle.Render("aa"), unfocusedStyle.Render("aaa"), "[...", footerStyle.Render("1..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aaaa"), focusedStyle.Render("a") + unfocusedStyle.Render("aaa"), unfocusedStyle.Render("aa") + unfocusedStyle.Render("aa"), "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("a") + focusedStyle.Render("aaa"), focusedStyle.Render("aa"), unfocusedStyle.Render("aaaa"), "[...", footerStyle.Render("9..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aaaa"), focusedStyle.Render("a") + unfocusedStyle.Render("aaa"), unfocusedStyle.Render("aa"), "[...", footerStyle.Render("5..."), }) internal.CmpStr(t, expected, fv.View()) // rollover fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("a") + unfocusedStyle.Render("aaa"), unfocusedStyle.Render("aa") + focusedStyle.Render("aa"), focusedStyle.Render("aaa"), "[...", footerStyle.Render("1..."), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aaaa"), focusedStyle.Render("a") + unfocusedStyle.Render("aaa"), unfocusedStyle.Render("aa"), "[...", footerStyle.Render("5..."), }) internal.CmpStr(t, expected, fv.View()) } func TestMatchNavigationNoWrap(t *testing.T) { fv := makeFilterableViewport( 30, 6, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "duck duck duck duck duck duck duck duck duck duck goose", "duck duck duck duck duck goose duck duck duck duck duck", "goose duck duck duck duck duck duck duck duck duck duck", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "goose" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "...k duck duck duck duck " + focusedStyle.Render("goose"), unfocusedStyle.Render("...se") + " duck duck duck duck duck", "...ck duck duck duck duck duck", "", "[exact] goose (1/3 matches...", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedSecondMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "...k duck duck duck duck " + unfocusedStyle.Render("goose"), focusedStyle.Render("...se") + " duck duck duck duck duck", "...ck duck duck duck duck duck", "", "[exact] goose (2/3 matches...", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedThirdMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "duck duck duck duck duck du...", "duck duck duck duck duck " + unfocusedStyle.Render("go..."), focusedStyle.Render("goose") + " duck duck duck duck d...", "", "[exact] goose (3/3 matches...", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedThirdMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) } func TestMatchNavigationNoWrapPanning(t *testing.T) { fv := makeFilterableViewport( 10, 3, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ strings.Repeat("a", 32), })) fv, _ = fv.Update(filterKeyMsg) for range 4 { fv, _ = fv.Update(internal.MakeKeyMsg('a')) } fv, _ = fv.Update(applyFilterKeyMsg) expectedLeftmostMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("aaaa") + unfocusedStyle.Render("aaa.") + unfocusedStyle.Render(".."), "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedLeftmostMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("aaaa") + focusedStyle.Render("aaa.") + unfocusedStyle.Render(".."), "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedTravelingRight := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("..") + unfocusedStyle.Render(".aaa") + focusedStyle.Render("a..."), "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedTravelingRight, fv.View()) for range 4 { fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedTravelingRight, fv.View()) } fv, _ = fv.Update(nextMatchKeyMsg) expectedRightmostMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("..") + unfocusedStyle.Render(".aaa") + focusedStyle.Render("aaaa"), "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedRightmostMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("..") + focusedStyle.Render(".aaa") + unfocusedStyle.Render("aaaa"), "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expectedTravelingLeft := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("...a") + unfocusedStyle.Render("aaa.") + unfocusedStyle.Render(".."), "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedTravelingLeft, fv.View()) for range 4 { fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedTravelingLeft, fv.View()) } fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedLeftmostMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedRightmostMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedLeftmostMatch, fv.View()) } func TestMatchNavigationNoWrapUnicode(t *testing.T) { fv := makeFilterableViewport( 32, 3, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ // a (1w, 1b), 💖 (2w, 4b) "💖💖💖💖💖💖💖💖 hi aaaaaaaaaaaaaaaa", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "hi" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "💖💖💖💖💖💖💖💖 " + focusedStyle.Render("hi") + " aaaaaaaaa...", "[exact] hi (1/1 matches on 1...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) } func TestMatchNavigationManyMatchesWrap(t *testing.T) { fv := makeFilterableViewport( 100, 50, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{}, ) numAs := 10000 fv.SetObjects(stringsToItems([]string{ internal.RedFg.Render(strings.Repeat("a", numAs)), })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) firstRows := []string{ focusedStyle.Render("a") + strings.Repeat(unfocusedStyle.Render("a"), fv.GetWidth()-1), } rest := make([]string, fv.GetHeight()-3) // -3 for first row, filter, footer for i := range rest { rest[i] = strings.Repeat(unfocusedStyle.Render("a"), fv.GetWidth()) } rest = append(rest, fmt.Sprintf("[exact] a (1/%d matches on 1 items)", numAs)) rest = append(rest, footerStyle.Render("99% (1/1)")) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), append(firstRows, rest...)) internal.CmpStr(t, expected, fv.View()) } func TestMatchNavigationManyMatchesWrapPerformance(t *testing.T) { runTest := func(t *testing.T) { fv := makeFilterableViewport( 100, 50, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{}, ) numAs := 5000 fv.SetObjects(stringsToItems([]string{ internal.RedFg.Render(strings.Repeat("a", numAs)), })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) firstRows := []string{ focusedStyle.Render("a") + strings.Repeat(unfocusedStyle.Render("a"), fv.GetWidth()-1), } rest := make([]string, fv.GetHeight()-3) // -3 for first row, filter, footer for i := range rest { rest[i] = strings.Repeat(unfocusedStyle.Render("a"), fv.GetWidth()) } rest = append(rest, fmt.Sprintf("[exact] a (1/%d matches on 1 items)", numAs)) rest = append(rest, footerStyle.Render("99% (1/1)")) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), append(firstRows, rest...)) internal.CmpStr(t, expected, fv.View()) numNext := 40 for range numNext { fv, _ = fv.Update(nextMatchKeyMsg) } expectedAfterNext := []string{ strings.Repeat(unfocusedStyle.Render("a"), numNext) + focusedStyle.Render("a") + strings.Repeat(unfocusedStyle.Render("a"), fv.GetWidth()-numNext-1), } restAfterNext := make([]string, fv.GetHeight()-3) // -3 for first row, filter, footer for i := range restAfterNext { restAfterNext[i] = strings.Repeat(unfocusedStyle.Render("a"), fv.GetWidth()) } restAfterNext = append(restAfterNext, fmt.Sprintf("[exact] a (%d/%d matches on 1 items)", numNext+1, numAs)) restAfterNext = append(restAfterNext, footerStyle.Render("99% (1/1)")) expectedAfterNextView := internal.Pad(fv.GetWidth(), fv.GetHeight(), append(expectedAfterNext, restAfterNext...)) internal.CmpStr(t, expectedAfterNextView, fv.View()) } internal.RunWithTimeout(t, runTest, 200*time.Millisecond) } func TestScrollingWithManyHighlightedMatchesPerformance(t *testing.T) { runTest := func(t *testing.T) { width := 80 height := 20 fv := makeFilterableViewport( width, height, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{}, ) numItems := height * 5 items := make([]string, numItems) for i := range items { items[i] = strings.Repeat("a", width) } fv.SetObjects(stringsToItems(items)) // everything on screen highlighted fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) firstView := fv.View() if !strings.Contains(firstView, focusedStyle.Render("a")) { t.Fatal("expected focused match in initial view") } for i := range height { fv, _ = fv.Update(downKeyMsg) view := fv.View() // after first scroll, focused match should go out of view // but unfocused matches should still be visible if i > 0 && strings.Contains(view, focusedStyle.Render("a")) { t.Errorf("focused match should be out of view after scrolling %d times", i+1) } if !strings.Contains(view, unfocusedStyle.Render("a")) { t.Errorf("unfocused matches should still be visible after scrolling %d times", i+1) } } } internal.RunWithTimeout(t, runTest, 220*time.Millisecond) } func TestScrollingWithManyHighlightedMatchesPerformanceSelectionEnabled(t *testing.T) { runTest := func(t *testing.T) { width := 80 height := 20 fv := makeFilterableViewport( width, height, []viewport.Option[object]{ viewport.WithWrapText[object](false), viewport.WithSelectionEnabled[object](true), }, []Option[object]{}, ) numItems := height * 5 items := make([]string, numItems) for i := range items { items[i] = strings.Repeat("a", width) } fv.SetObjects(stringsToItems(items)) // everything on screen highlighted fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) firstView := fv.View() if !strings.Contains(firstView, focusedStyle.Render("a")) { t.Fatal("expected focused match in initial view") } // with selection enabled, the viewport keeps the selected item (with focused match) in view // height - 2 accounts for header and footer lines, leaving content lines contentLines := height - 2 for i := range height { fv, _ = fv.Update(downKeyMsg) view := fv.View() // for first (contentLines - 1) scrolls, focused match stays in view // after that, selection scrolls past visible area if i < contentLines-1 { if !strings.Contains(view, focusedStyle.Render("a")) { t.Errorf("focused match should stay in view after moving selection down %d times", i+1) } } else { if strings.Contains(view, focusedStyle.Render("a")) { t.Errorf("focused match should be out of view after moving selection down %d times", i+1) } } // unfocused matches should always be visible if !strings.Contains(view, unfocusedStyle.Render("a")) { t.Errorf("unfocused matches should still be visible after moving selection down %d times", i+1) } } } internal.RunWithTimeout(t, runTest, 200*time.Millisecond) } func TestMatchNavigationWithSelectionEnabled(t *testing.T) { fv := makeFilterableViewport( 40, 5, []viewport.Option[object]{ viewport.WithWrapText[object](false), viewport.WithSelectionEnabled[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "apple" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + selectedItemStyle.Render(" pie"), "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("33% (1/3)"), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedSecondMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("apple") + " pie", "banana bread", focusedStyle.Render("apple") + selectedItemStyle.Render(" cake"), "[exact] apple (2/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedFirstMatch, fv.View()) } func TestFocusedIfSelectedMatchStyle(t *testing.T) { fv := makeFilterableViewport( 40, 5, []viewport.Option[object]{ viewport.WithWrapText[object](false), viewport.WithSelectionEnabled[object](true), }, []Option[object]{ WithStyles[object](Styles{ Match: MatchStyles{ Focused: focusedStyle, FocusedIfSelected: focusedIfSelectedStyle, Unfocused: unfocusedStyle, }, }), }, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) // start filtering for "apple" fv, _ = fv.Update(filterKeyMsg) for _, c := range "apple" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // focused match is on item 0 (selected) — should use focusedIfSelectedStyle expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedIfSelectedStyle.Render("apple") + selectedItemStyle.Render(" pie"), "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("33% (1/3)"), }) internal.CmpStr(t, expected, fv.View()) // navigate to next match — focused match moves to item 2 (now selected), // item 0 becomes unfocused fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("apple") + " pie", "banana bread", focusedIfSelectedStyle.Render("apple") + selectedItemStyle.Render(" cake"), "[exact] apple (2/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expected, fv.View()) // navigate back — focused match on item 0 again (selected), // uses focusedIfSelectedStyle again fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedIfSelectedStyle.Render("apple") + selectedItemStyle.Render(" pie"), "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("33% (1/3)"), }) internal.CmpStr(t, expected, fv.View()) } func TestFocusedIfSelectedWithReverseSelection(t *testing.T) { reverseStyle := lipgloss.NewStyle().Reverse(true) cyanFgStyle := lipgloss.NewStyle().Foreground(lipgloss.Cyan) reverseCyanStyle := lipgloss.NewStyle().Reverse(true).Foreground(lipgloss.Cyan) brightRedStyle := lipgloss.NewStyle().Foreground(lipgloss.BrightRed) fv := makeFilterableViewport( 40, 5, []viewport.Option[object]{ viewport.WithWrapText[object](false), viewport.WithSelectionEnabled[object](true), viewport.WithStyles[object](viewport.Styles{ SelectedItemStyle: reverseStyle, }), }, []Option[object]{ WithStyles[object](Styles{ Match: MatchStyles{ Focused: reverseCyanStyle, FocusedIfSelected: cyanFgStyle, Unfocused: brightRedStyle, }, }), }, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) // Apply filter fv, _ = fv.Update(filterKeyMsg) for _, c := range "apple" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // After apply: focused match (1/2) on item 0 which IS selected // FocusedIfSelected should be used for "apple", SelectedItemStyle for " pie" expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ cyanFgStyle.Render("apple") + reverseStyle.Render(" pie"), "banana bread", brightRedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", "33% (1/3)", }) internal.CmpStr(t, expected, fv.View()) // Press n — focused match moves to item 2, selection follows fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ brightRedStyle.Render("apple") + " pie", "banana bread", cyanFgStyle.Render("apple") + reverseStyle.Render(" cake"), "[exact] apple (2/2 matches on 2 items)", "100% (3/3)", }) internal.CmpStr(t, expected, fv.View()) // Move selection up — focused match stays on item 2 but selection moves to item 1, // so focused match should now use Focused (reverse+cyan) instead of FocusedIfSelected fv, _ = fv.Update(internal.MakeKeyMsg('k')) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ brightRedStyle.Render("apple") + " pie", reverseStyle.Render("banana bread"), reverseCyanStyle.Render("apple") + " cake", "[exact] apple (2/2 matches on 2 items)", "66% (2/3)", }) internal.CmpStr(t, expected, fv.View()) } func TestMatchNavigationWithSelectionEnabledWrap(t *testing.T) { fv := makeFilterableViewport( 20, 6, []viewport.Option[object]{ viewport.WithWrapText[object](true), viewport.WithSelectionEnabled[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "the quick brown fox", "jumped over the lazy dog", "the end", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "the" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedFirstMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("the") + selectedItemStyle.Render(" quick brown fox"), "jumped over " + unfocusedStyle.Render("the") + " lazy", " dog", unfocusedStyle.Render("the") + " end", "[exact] the (1/3...", footerStyle.Render("33% (1/3)"), }) internal.CmpStr(t, expectedFirstMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedSecondMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("the") + " quick brown fox", selectedItemStyle.Render("jumped over ") + focusedStyle.Render("the") + selectedItemStyle.Render(" lazy"), selectedItemStyle.Render(" dog"), unfocusedStyle.Render("the") + " end", "[exact] the (2/3...", footerStyle.Render("66% (2/3)"), }) internal.CmpStr(t, expectedSecondMatch, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expectedThirdMatch := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("the") + " quick brown fox", "jumped over " + unfocusedStyle.Render("the") + " lazy", " dog", focusedStyle.Render("the") + selectedItemStyle.Render(" end"), "[exact] the (3/3...", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedThirdMatch, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedSecondMatch, fv.View()) } func TestMatchNavigationWithSelectionEnabledWrapScrolling(t *testing.T) { fv := makeFilterableViewport( 5, 4, []viewport.Option[object]{ viewport.WithWrapText[object](true), viewport.WithSelectionEnabled[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "long long long long ", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "long " { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedTopFocused := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("long "), unfocusedStyle.Render("long "), "[e...", footerStyle.Render("10..."), }) internal.CmpStr(t, expectedTopFocused, fv.View()) for range 2 { fv, _ = fv.Update(nextMatchKeyMsg) expectedBottomFocused := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("long "), focusedStyle.Render("long "), "[e...", footerStyle.Render("10..."), }) internal.CmpStr(t, expectedBottomFocused, fv.View()) } fv, _ = fv.Update(prevMatchKeyMsg) internal.CmpStr(t, expectedTopFocused, fv.View()) } func TestToggleWrap(t *testing.T) { fv := makeFilterableViewport( 20, 6, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "the quick brown fox jumped over the lazy dog", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "lazy" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // at first the match is in view expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "...ped over the " + focusedStyle.Render("l..."), "", "", "", "[exact] lazy (1/...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expected, fv.View()) // when we toggle wrapping here, the match happens to still be in view, but we don't force that // otherwise there would be surprising jumps if the user is scrolled away from the current match and toggles wrap fv.SetWrapText(true) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "the quick brown fox ", "jumped over the " + focusedStyle.Render("lazy"), " dog", "", "[exact] lazy (1/...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expected, fv.View()) // the match is out of view here, demonstrating the above comment fv.SetWrapText(false) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "the quick brown f...", "", "", "", "[exact] lazy (1/...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expected, fv.View()) } func TestApplyFilterScrollsToFirstMatch(t *testing.T) { fv := makeFilterableViewport( 30, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", "line 4", "line 5", "line 6", "match here", "line 8", })) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "No Filter", footerStyle.Render("37% (3/8)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 5", "line 6", focusedStyle.Render("match") + " here", "[exact] match (1/1 matches...", footerStyle.Render("87% (7/8)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(cancelFilterKeyMsg) fv, _ = fv.Update(filterKeyMsg) for _, c := range "lin" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("lin") + "e 1", unfocusedStyle.Render("lin") + "e 2", unfocusedStyle.Render("lin") + "e 3", "[exact] lin (1/7 matches o...", footerStyle.Render("37% (3/8)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("lin") + "e 1", focusedStyle.Render("lin") + "e 2", unfocusedStyle.Render("lin") + "e 3", "[exact] lin (2/7 matches o...", footerStyle.Render("37% (3/8)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('e')) fv, _ = fv.Update(applyFilterKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("line") + " 1", unfocusedStyle.Render("line") + " 2", unfocusedStyle.Render("line") + " 3", "[exact] line (1/7 matches ...", footerStyle.Render("37% (3/8)"), }) internal.CmpStr(t, expected, fv.View()) } func TestSetObjectsPreservesMatchIndex(t *testing.T) { fv := makeFilterableViewport( 30, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "match one", "match two", "match three", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) fv, _ = fv.Update(nextMatchKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("match") + " one", focusedStyle.Render("match") + " two", unfocusedStyle.Render("match") + " three", "[exact] match (2/3 matches...", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expected, fv.View()) // add a new item - should stay on match 2, now 2/4 fv.SetObjects(stringsToItems([]string{ "match one", "match new", "match two", "match three", })) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("match") + " one", focusedStyle.Render("match") + " new", unfocusedStyle.Render("match") + " two", "[exact] match (2/4 matches...", footerStyle.Render("75% (3/4)"), }) internal.CmpStr(t, expected, fv.View()) } func TestAppendObjectsPreservesMatchIndex(t *testing.T) { fv := makeFilterableViewport( 30, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "match one", "match two", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) fv, _ = fv.Update(nextMatchKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("match") + " one", focusedStyle.Render("match") + " two", "", "[exact] match (2/2 matches...", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expected, fv.View()) // append new items - should stay on match 2, now 2/4 fv.AppendObjects(stringsToItems([]string{ "match three", "match four", })) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("match") + " one", focusedStyle.Render("match") + " two", unfocusedStyle.Render("match") + " three", "[exact] match (2/4 matches...", footerStyle.Render("75% (3/4)"), }) internal.CmpStr(t, expected, fv.View()) } func TestAppendObjectsWithNil(t *testing.T) { fv := makeFilterableViewport( 30, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "item one", "item two", })) // appending nil should not crash or change objects fv.AppendObjects(nil) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "item one", "item two", "", "No Filter", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expected, fv.View()) } func TestAppendObjectsRespectsMatchLimit(t *testing.T) { fv := makeFilterableViewport( 40, 5, []viewport.Option[object]{}, []Option[object]{ WithMaxMatchLimit[object](5), }, ) fv.SetObjects(stringsToItems([]string{ "match one", "match two", "match three", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // 3 matches, under limit expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("match") + " one", unfocusedStyle.Render("match") + " two", unfocusedStyle.Render("match") + " three", "[exact] match (1/3 matches on 3 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expected, fv.View()) // append 3 more items, which will exceed the limit of 5 fv.AppendObjects(stringsToItems([]string{ "match four", "match five", "match six", })) // should now show limit exceeded message and all items expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "match one", "match two", "match three", "[exact] match (5+ matches on 6+ items)", footerStyle.Render("50% (3/6)"), }) internal.CmpStr(t, expected, fv.View()) } func TestAppendObjectsIncrementalWithMatchingItemsOnly(t *testing.T) { fv := makeFilterableViewport( 40, 6, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](true), }, ) fv.SetObjects(stringsToItems([]string{ "match one", "nothing here", "match two", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // should show only matching items expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("match") + " one", unfocusedStyle.Render("match") + " two", "", "", "[exact] match (1/2 matches on 2 item...", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expected, fv.View()) // append mixed items (some matching, some not) fv.AppendObjects(stringsToItems([]string{ "nothing", "match three", "also nothing", "match four", })) // should show only matching items, including new matches expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("match") + " one", unfocusedStyle.Render("match") + " two", unfocusedStyle.Render("match") + " three", unfocusedStyle.Render("match") + " four", "[exact] match (1/4 matches on 4 item...", footerStyle.Render("100% (4/4)"), }) internal.CmpStr(t, expected, fv.View()) } func TestVerticalPadding(t *testing.T) { fv := makeFilterableViewport( 30, 10, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{ WithVerticalPad[object](2), }, ) // create many items so we can test padding items := make([]string, 50) for i := range 50 { if i == 10 || i == 20 || i == 30 { items[i] = fmt.Sprintf("match item %d", i) } else { items[i] = fmt.Sprintf("item %d", i) } } fv.SetObjects(stringsToItems(items)) // apply filter to find "match" fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // first match at item 10 should have at least 2 lines above and below // with 8 content lines and verticalPad=2, it shows items 5-12 expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "item 5", "item 6", "item 7", "item 8", "item 9", focusedStyle.Render("match") + " item 10", "item 11", "item 12", "[exact] match (1/3 matches...", footerStyle.Render("26% (13/50)"), }) internal.CmpStr(t, expectedView, fv.View()) // navigate to second match at item 20 fv, _ = fv.Update(nextMatchKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "item 15", "item 16", "item 17", "item 18", "item 19", focusedStyle.Render("match") + " item 20", "item 21", "item 22", "[exact] match (2/3 matches...", footerStyle.Render("46% (23/50)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestHorizontalPadding(t *testing.T) { fv := makeFilterableViewport( 10, 5, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{ WithHorizontalPad[object](3), }, ) fv.SetObjects(stringsToItems([]string{ "short goose text with some more words here", "another goose line with extra padding test", })) fv, _ = fv.Update(filterKeyMsg) for _, c := range "goose" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) // first match attempted padding of 3 on each side expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ ".." + focusedStyle.Render(".oose") + "...", "... " + unfocusedStyle.Render("goo..") + ".", "", "[exact]...", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) // second match attempted padding of 3 on each side fv, _ = fv.Update(nextMatchKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("...se") + " t...", ".." + focusedStyle.Render(".oose") + "...", "", "[exact]...", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestMatchNavigationWithVerticalPadding(t *testing.T) { h := 34 fv := makeFilterableViewport( 100, h, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{ WithVerticalPad[object](10), }, ) nItems := 50 items := make([]string, nItems) for i := range nItems { items[i] = "hi" } fv.SetObjects(stringsToItems(items)) fv, _ = fv.Update(filterKeyMsg) for _, c := range "hi" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedStrings := []string{ focusedStyle.Render("hi"), } for i := 0; i < h-3; i++ { // -3 for filter line, focused line, & footer expectedStrings = append(expectedStrings, unfocusedStyle.Render("hi")) } expectedStrings = append(expectedStrings, "[exact] hi (1/50 matches on 50 items)") expectedStrings = append(expectedStrings, footerStyle.Render("64% (32/50)")) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), expectedStrings) internal.CmpStr(t, expectedView, fv.View()) // go to bottom match, then previous match 21 times to reach the 10 padding above fv, _ = fv.Update(prevMatchKeyMsg) nPrev := 21 for range nPrev { fv, _ = fv.Update(prevMatchKeyMsg) } expectedStrings = []string{} for range 10 { expectedStrings = append(expectedStrings, unfocusedStyle.Render("hi")) } expectedStrings = append(expectedStrings, focusedStyle.Render("hi")) for i := 0; i < h-10-3; i++ { expectedStrings = append(expectedStrings, unfocusedStyle.Render("hi")) } expectedStrings = append(expectedStrings, "[exact] hi (29/50 matches on 50 items)") expectedStrings = append(expectedStrings, footerStyle.Render("100% (50/50)")) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), expectedStrings) internal.CmpStr(t, expectedView, fv.View()) // next previous match should keep 10 lines above and scroll one up fv, _ = fv.Update(prevMatchKeyMsg) expectedStrings = []string{} for range 10 { expectedStrings = append(expectedStrings, unfocusedStyle.Render("hi")) } expectedStrings = append(expectedStrings, focusedStyle.Render("hi")) for i := 0; i < h-10-3; i++ { expectedStrings = append(expectedStrings, unfocusedStyle.Render("hi")) } expectedStrings = append(expectedStrings, "[exact] hi (28/50 matches on 50 items)") expectedStrings = append(expectedStrings, footerStyle.Render("98% (49/50)")) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), expectedStrings) internal.CmpStr(t, expectedView, fv.View()) } func TestMatchNavigationRolloverWithVerticalPadding(t *testing.T) { fv := makeFilterableViewport( 100, 10, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{ WithVerticalPad[object](10), }, ) fv.SetSelectionEnabled(true) nItems := 20 items := make([]string, nItems) for i := range nItems { items[i] = "hi" } fv.SetObjects(stringsToItems(items)) fv, _ = fv.Update(filterKeyMsg) for _, c := range "hi" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), "[exact] hi (1/20 matches on 20 items)", footerStyle.Render("5% (1/20)"), }) internal.CmpStr(t, expectedView, fv.View()) // previous match (last one) fv, _ = fv.Update(prevMatchKeyMsg) expectedViewAfterScroll := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), unfocusedStyle.Render("hi"), focusedStyle.Render("hi"), "[exact] hi (20/20 matches on 20 items)", footerStyle.Render("100% (20/20)"), }) internal.CmpStr(t, expectedViewAfterScroll, fv.View()) } func stringsToItems(vals []string) []object { items := make([]object, len(vals)) for i, s := range vals { items[i] = object{item: item.NewItem(s)} } return items } func TestSelectionAndFocusedMatchAfterItemsChange(t *testing.T) { fv := makeFilterableViewport( 100, 5, []viewport.Option[object]{ viewport.WithWrapText[object](false), viewport.WithSelectionEnabled[object](true), }, []Option[object]{}, ) initialItems := []string{ "1 2", "1 2", "1 2", "1 2", "1 2", } fv.SetObjects(stringsToItems(initialItems)) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('1')) fv, _ = fv.Update(applyFilterKeyMsg) // focus second match fv, _ = fv.Update(nextMatchKeyMsg) // move selection to third item fv, _ = fv.Update(downKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("1") + " 2", focusedStyle.Render("1") + " 2", unfocusedStyle.Render("1") + selectedItemStyle.Render(" 2"), "[exact] 1 (2/5 matches on 5 items)", footerStyle.Render("60% (3/5)"), }) internal.CmpStr(t, expected, fv.View()) // add a new item initialItems = append(initialItems, "1 2") fv.SetObjects(stringsToItems(initialItems)) // neither match nor selection should change expected = strings.ReplaceAll(expected, "2/5 matches on 5", "2/6 matches on 6") expected = strings.ReplaceAll(expected, "60% (3/5)", "50% (3/6)") internal.CmpStr(t, expected, fv.View()) // changing match should change selection too fv, _ = fv.Update(nextMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("1") + " 2", unfocusedStyle.Render("1") + " 2", focusedStyle.Render("1") + selectedItemStyle.Render(" 2"), "[exact] 1 (3/6 matches on 6 items)", footerStyle.Render("50% (3/6)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(prevMatchKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("1") + " 2", focusedStyle.Render("1") + selectedItemStyle.Render(" 2"), unfocusedStyle.Render("1") + " 2", "[exact] 1 (2/6 matches on 6 items)", footerStyle.Render("33% (2/6)"), }) internal.CmpStr(t, expected, fv.View()) } func TestCurrentMatchNotCenteredAfterItemsChange(t *testing.T) { fv := makeFilterableViewport( 100, 4, []viewport.Option[object]{}, []Option[object]{}, ) initialItems := []string{ "1", "2", "3", "4", "5", "6", } fv.SetObjects(stringsToItems(initialItems)) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('1')) fv, _ = fv.Update(applyFilterKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("1"), "2", "[exact] 1 (1/1 matches on 1 items)", footerStyle.Render("33% (2/6)"), }) internal.CmpStr(t, expected, fv.View()) // scroll so focused match out of view fv, _ = fv.Update(downKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "2", "3", "[exact] 1 (1/1 matches on 1 items)", footerStyle.Render("50% (3/6)"), }) internal.CmpStr(t, expected, fv.View()) initialItems = append(initialItems, "7", "8", "9") fv.SetObjects(stringsToItems(initialItems)) newExpected := strings.ReplaceAll(expected, "50% (3/6)", "33% (3/9)") internal.CmpStr(t, newExpected, fv.View()) } func TestMaxMatchLimit(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithMaxMatchLimit[object](5), WithMatchingItemsOnly[object](true), // Should be ignored when limit exceeded }, ) items := []string{ "apple apple", "apple apple", "apple apple", "apple apple", "apple apple", "banana", } fv.SetObjects(stringsToItems(items)) fv, _ = fv.Update(filterKeyMsg) for _, c := range "app" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple apple", "apple apple", "apple apple", "apple apple", "[exact] Filter: app (5+ matches on 3+ items)", footerStyle.Render("66% (4/6)"), }) internal.CmpStr(t, expectedView, fv.View()) // view should be unchanged by navigating matches when limit exceeded fv, _ = fv.Update(nextMatchKeyMsg) internal.CmpStr(t, expectedView, fv.View()) // clear search filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(cancelFilterKeyMsg) if fv.matchLimitExceeded { t.Error("matchLimitExceeded should be false after clearing filter") } // filter that doesn't exceed limit fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('b')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("b") + "anana", "", "", "", "[exact] Filter: b (1/1 matches on 1 items) showing matches only", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestMaxMatchLimitWithAppendObjects(t *testing.T) { fv := makeFilterableViewport( 80, 3, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithMaxMatchLimit[object](3), }, ) items := []string{ "a", "bbb", } fv.SetObjects(stringsToItems(items)) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("a"), "[exact] Filter: a (1/1 matches on 1 items)", footerStyle.Render("50% (1/2)"), }) internal.CmpStr(t, expected, fv.View()) // append new items that cause match limit to be exceeded fv.AppendObjects(stringsToItems([]string{"aaa", "aaa"})) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "a", "[exact] Filter: a (3+ matches on 2+ items)", footerStyle.Render("25% (1/4)"), }) internal.CmpStr(t, expected, fv.View()) } func TestMaxMatchLimitUnlimited(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithMaxMatchLimit[object](0), // unlimited }, ) fv.SetObjects(stringsToItems([]string{ "apple apple", })) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("a") + "pple " + unfocusedStyle.Render("a") + "pple", "", "", "", "[exact] Filter: a (1/2 matches on 1 items)", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestToggleWrap_DoesNotJumpToMatchWhenScrolledAway(t *testing.T) { fv := makeFilterableViewport( 30, 5, []viewport.Option[object]{ viewport.WithWrapText[object](false), viewport.WithSelectionEnabled[object](true), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", "line 4", "line 5", "line 6", "match here", "line 8", })) expected := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ selectedItemStyle.Render("line 1"), "line 2", "line 3", "No Filter", footerStyle.Render("12% (1/8)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(filterKeyMsg) for _, c := range "match" { fv, _ = fv.Update(internal.MakeKeyMsg(c)) } fv, _ = fv.Update(applyFilterKeyMsg) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 5", "line 6", focusedStyle.Render("match") + selectedItemStyle.Render(" here"), "[exact] match (1/1 matches...", footerStyle.Render("87% (7/8)"), }) internal.CmpStr(t, expected, fv.View()) fv, _ = fv.Update(internal.MakeKeyMsg('g')) expected = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ selectedItemStyle.Render("line 1"), "line 2", "line 3", "[exact] match (1/1 matches...", footerStyle.Render("12% (1/8)"), }) internal.CmpStr(t, expected, fv.View()) // toggling wrap should not change view fv.SetWrapText(true) internal.CmpStr(t, expected, fv.View()) fv.SetWrapText(false) internal.CmpStr(t, expected, fv.View()) } func TestFilterLineAtBottom(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Filter line should appear just above footer, not at top expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Apply a filter - filter line still at bottom fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('l')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("l") + "ine 1", unfocusedStyle.Render("l") + "ine 2", unfocusedStyle.Render("l") + "ine 3", "[exact] Filter: l (1/3 matches on 3 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestEmptyTextAtBottom(t *testing.T) { fv := makeFilterableViewport( 40, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No active filter"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Empty text should appear just above footer when filter mode is off expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "No active filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionWithWrap(t *testing.T) { fv := makeFilterableViewport( 15, 7, []viewport.Option[object]{ viewport.WithWrapText[object](true), }, []Option[object]{ WithEmptyText[object]("None"), }, ) fv.SetObjects(stringsToItems([]string{ "short", "longer text that wraps", })) // Filter line should appear just above footer, after wrapped content expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "short", "longer text tha", "t wraps", "", "", "None", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestFilterLinePositionDuringEditing(t *testing.T) { fv := makeFilterableViewport( 50, 5, []viewport.Option[object]{}, []Option[object]{ WithPrefixText[object]("Filter:"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", })) // Enter filter editing mode fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('t')) fv, _ = fv.Update(internal.MakeKeyMsg('e')) fv, _ = fv.Update(internal.MakeKeyMsg('s')) fv, _ = fv.Update(internal.MakeKeyMsg('t')) // Cursor should appear in filter line at bottom expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", "line 2", "line 3", "[exact] Filter: test" + cursorStyle.Render(" ") + " (no matches)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestHeightConsistencyAfterRefactor(t *testing.T) { widths := []int{10, 20, 50} heights := []int{3, 5, 10, 20} for _, w := range widths { for _, h := range heights { fv := makeFilterableViewport( w, h, []viewport.Option[object]{}, []Option[object]{}, ) // Verify GetHeight returns same value as SetHeight input if got := fv.GetHeight(); got != h { t.Errorf("width=%d height=%d: GetHeight() = %d, want %d", w, h, got, h) } // Verify GetWidth returns same value if got := fv.GetWidth(); got != w { t.Errorf("width=%d height=%d: GetWidth() = %d, want %d", w, h, got, w) } // Set new dimensions and verify newH := h + 5 fv.SetHeight(newH) if got := fv.GetHeight(); got != newH { t.Errorf("after SetHeight(%d): GetHeight() = %d, want %d", newH, got, newH) } newW := w + 10 fv.SetWidth(newW) if got := fv.GetWidth(); got != newW { t.Errorf("after SetWidth(%d): GetWidth() = %d, want %d", newW, got, newW) } } } } func TestContentStartsAtTop(t *testing.T) { fv := makeFilterableViewport( 40, 6, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "line 1", "line 2", "line 3", "line 4", })) // Content should start at the very top of the viewport // not shifted down by any filter header expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "line 1", // Content starts at top "line 2", "line 3", "line 4", "No Filter", // Filter line just above footer footerStyle.Render("100% (4/4)"), // Footer at bottom }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_ExactMode(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) fv.SetFilter("apple", FilterExact) if fv.GetFilterText() != "apple" { t.Errorf("expected filter text 'apple', got '%s'", fv.GetFilterText()) } if fv.GetActiveFilterMode().Name != FilterExact { t.Errorf("expected active filter mode %q, got %q", FilterExact, fv.GetActiveFilterMode().Name) } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_RegexMode(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apricot tart", })) fv.SetFilter("ap.*e", FilterRegex) if fv.GetFilterText() != "ap.*e" { t.Errorf("expected filter text 'ap.*e', got '%s'", fv.GetFilterText()) } if fv.GetActiveFilterMode().Name != FilterRegex { t.Errorf("expected active filter mode %q, got %q", FilterRegex, fv.GetActiveFilterMode().Name) } // regex ap.*e matches "apple pie" (greedy match to the last 'e') expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple pie"), "banana bread", "apricot tart", "[regex] ap.*e (1/1 matches on 1 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_ClearsFilterWhenEmpty(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", })) // First set a filter fv.SetFilter("apple", FilterExact) if fv.GetFilterText() != "apple" { t.Errorf("expected filter text 'apple', got '%s'", fv.GetFilterText()) } // Then clear it fv.SetFilter("", "") if fv.GetFilterText() != "" { t.Errorf("expected empty filter text, got '%s'", fv.GetFilterText()) } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple pie", "banana bread", "", "No Filter", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_SwitchBetweenModes(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "test123", "test456", })) // Start with exact mode fv.SetFilter("test", FilterExact) if fv.GetActiveFilterMode().Name != FilterExact { t.Errorf("expected active filter mode %q, got %q", FilterExact, fv.GetActiveFilterMode().Name) } // Switch to regex mode with same filter fv.SetFilter("test\\d+", FilterRegex) if fv.GetActiveFilterMode().Name != FilterRegex { t.Errorf("expected active filter mode %q, got %q", FilterRegex, fv.GetActiveFilterMode().Name) } if fv.GetFilterText() != "test\\d+" { t.Errorf("expected filter text 'test\\d+', got '%s'", fv.GetFilterText()) } // Both lines should match the regex expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("test123"), unfocusedStyle.Render("test456"), "", "[regex] test\\d+ (1/2 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_WithMatchingItemsOnly(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](true), }, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) fv.SetFilter("apple", FilterExact) // Only matching items should be shown expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", unfocusedStyle.Render("apple") + " cake", "", "[exact] apple (1/2 matches on 2 items) showing matches only", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetMatchingItemsOnly_EnableShowsOnlyMatches(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](false), // start with all items shown }, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) fv.SetFilter("apple", FilterExact) // Initially all items shown if fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be false initially") } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Enable matching items only fv.SetMatchingItemsOnly(true) if !fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be true after SetMatchingItemsOnly(true)") } expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", unfocusedStyle.Render("apple") + " cake", "", "[exact] apple (1/2 matches on 2 items) showing matches only", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetMatchingItemsOnly_DisableShowsAllItems(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](true), // start with matches only }, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) fv.SetFilter("apple", FilterExact) // Initially only matching items shown if !fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be true initially") } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", unfocusedStyle.Render("apple") + " cake", "", "[exact] apple (1/2 matches on 2 items) showing matches only", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) // Disable matching items only fv.SetMatchingItemsOnly(false) if fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be false after SetMatchingItemsOnly(false)") } expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetMatchingItemsOnly_ToggleBackAndForth(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "apricot", })) fv.SetFilter("a", FilterExact) // Default is false if fv.GetMatchingItemsOnly() { t.Error("expected default GetMatchingItemsOnly to be false") } // Toggle to true fv.SetMatchingItemsOnly(true) if !fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be true") } // Toggle back to false fv.SetMatchingItemsOnly(false) if fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be false") } // Toggle to true again fv.SetMatchingItemsOnly(true) if !fv.GetMatchingItemsOnly() { t.Error("expected GetMatchingItemsOnly to be true") } } func TestSetMatchingItemsOnly_NoEffectWithoutFilter(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithEmptyText[object]("No Filter"), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", "cherry", })) // Set matching items only without a filter - all items should still show fv.SetMatchingItemsOnly(true) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "cherry", "No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterableViewportStyles_ChangesMatchStyles(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "apple pie", "banana bread", "apple cake", })) fv.SetFilter("apple", FilterExact) // Verify initial styles are applied expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("apple") + " pie", "banana bread", unfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Change to new styles newFocusedStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("1")).Background(lipgloss.Color("2")) newUnfocusedStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("3")).Background(lipgloss.Color("4")) fv.SetFilterableViewportStyles(Styles{ Match: MatchStyles{ Focused: newFocusedStyle, Unfocused: newUnfocusedStyle, }, }) // Verify new styles are applied expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ newFocusedStyle.Render("apple") + " pie", "banana bread", newUnfocusedStyle.Render("apple") + " cake", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilterableViewportStyles_UpdatesExistingHighlights(t *testing.T) { fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "test one", "test two", "test three", })) fv.SetFilter("test", FilterExact) // Navigate to second match fv, _ = fv.Update(nextMatchKeyMsg) // Now second match should be focused expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ unfocusedStyle.Render("test") + " one", focusedStyle.Render("test") + " two", unfocusedStyle.Render("test") + " three", "[exact] test (2/3 matches on 3 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Change styles - should update all highlights including the focused one newFocusedStyle := lipgloss.NewStyle().Bold(true).Underline(true) newUnfocusedStyle := lipgloss.NewStyle().Italic(true) fv.SetFilterableViewportStyles(Styles{ Match: MatchStyles{ Focused: newFocusedStyle, Unfocused: newUnfocusedStyle, }, }) // Verify new styles applied with correct focus expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ newUnfocusedStyle.Render("test") + " one", newFocusedStyle.Render("test") + " two", newUnfocusedStyle.Render("test") + " three", "[exact] test (2/3 matches on 3 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestAdjustObjectsForFilter_CalledOnFilterChange(t *testing.T) { var hookCalls []struct { filterText string mode FilterModeName } fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithAdjustObjectsForFilter[object](func(filterText string, mode FilterModeName) []object { hookCalls = append(hookCalls, struct { filterText string mode FilterModeName }{filterText, mode}) return nil // return nil to keep existing objects }), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) // Start filter mode and type fv, _ = fv.Update(filterKeyMsg) _, _ = fv.Update(internal.MakeKeyMsg('a')) if len(hookCalls) < 1 { t.Fatal("expected hook to be called at least once") } // Check last call has correct filter text and mode lastCall := hookCalls[len(hookCalls)-1] if lastCall.filterText != "a" { t.Errorf("expected filterText 'a', got %q", lastCall.filterText) } if lastCall.mode != FilterExact { t.Errorf("expected mode %q (exact), got %q", FilterExact, lastCall.mode) } } func TestAdjustObjectsForFilter_CalledWithRegexMode(t *testing.T) { var lastMode FilterModeName fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithAdjustObjectsForFilter[object](func(_ string, mode FilterModeName) []object { lastMode = mode return nil }), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) // Start regex filter mode fv, _ = fv.Update(regexFilterKeyMsg) _, _ = fv.Update(internal.MakeKeyMsg('a')) if lastMode != FilterRegex { t.Errorf("expected mode %q (regex), got %q", FilterRegex, lastMode) } } func TestAdjustObjectsForFilter_ReplacesObjects(t *testing.T) { // Hook returns a different set of objects based on filter fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](false), WithAdjustObjectsForFilter[object](func(filterText string, _ FilterModeName) []object { if filterText == "" { return stringsToItems([]string{"apple", "banana", "cherry"}) } // When filtering, return parent + matching child (like a tree) return stringsToItems([]string{"parent", "child-apple"}) }), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana", "cherry"})) // Before filter: should show original objects expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "apple", "banana", "cherry", "", "No Filter", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Apply filter with "a" fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // After filter: should show hook's objects with "a" highlighted expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "p" + focusedStyle.Render("a") + "rent", "child-" + unfocusedStyle.Render("a") + "pple", "", "", "[exact] a (1/2 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestAdjustObjectsForFilter_NilKeepsExistingObjects(t *testing.T) { hookCallCount := 0 fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](false), WithAdjustObjectsForFilter[object](func(_ string, _ FilterModeName) []object { hookCallCount++ return nil // explicitly return nil }), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // hook is called twice: once when mode activates (empty text), once when text changes to "a" if hookCallCount != 2 { t.Errorf("hook should have been called twice, got %d", hookCallCount) } // Original objects should still be shown, with "a" highlighted expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("a") + "pple", "b" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a") + "n" + unfocusedStyle.Render("a"), "", "[exact] a (1/4 matches on 2 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestAdjustObjectsForFilter_WithMatchingItemsOnlyTrue(t *testing.T) { // Hook provides objects, but only matching ones should be shown fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](true), WithAdjustObjectsForFilter[object](func(_ string, _ FilterModeName) []object { // Return parent + child, but only child matches "apple" return stringsToItems([]string{"parent-node", "child-apple"}) }), }, ) fv.SetObjects(stringsToItems([]string{"initial"})) fv.SetFilter("apple", FilterExact) // Only child-apple matches "apple", so only it should be shown expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "child-" + focusedStyle.Render("apple"), "", "", "[exact] apple (1/1 matches on 1 items) showing matches only", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestAdjustObjectsForFilter_WithMatchingItemsOnlyFalse(t *testing.T) { // Hook provides objects, all should be shown (matches highlighted) fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](false), WithAdjustObjectsForFilter[object](func(_ string, _ FilterModeName) []object { return stringsToItems([]string{"parent-node", "child-apple"}) }), }, ) fv.SetObjects(stringsToItems([]string{"initial"})) fv.SetFilter("apple", FilterExact) // Both should be visible, child-apple has match highlighted expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "parent-node", "child-" + focusedStyle.Render("apple"), "", "[exact] apple (1/1 matches on 1 items)", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestAdjustObjectsForFilter_MatchNavigationWorks(t *testing.T) { // Verify n/N navigation works with hook-provided objects fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithMatchingItemsOnly[object](false), WithAdjustObjectsForFilter[object](func(_ string, _ FilterModeName) []object { return stringsToItems([]string{ "first-apple", "no-match-here", "second-apple", }) }), }, ) fv.SetObjects(stringsToItems([]string{"initial"})) fv.SetFilter("apple", FilterExact) // Should show "1/2 matches" (two items contain "apple"), first match focused expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "first-" + focusedStyle.Render("apple"), "no-match-here", "second-" + unfocusedStyle.Render("apple"), "", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Navigate to next match fv, _ = fv.Update(nextMatchKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "first-" + unfocusedStyle.Render("apple"), "no-match-here", "second-" + focusedStyle.Render("apple"), "", "[exact] apple (2/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) // Navigate to previous match fv, _ = fv.Update(prevMatchKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "first-" + focusedStyle.Render("apple"), "no-match-here", "second-" + unfocusedStyle.Render("apple"), "", "[exact] apple (1/2 matches on 2 items)", footerStyle.Render("100% (3/3)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestAdjustObjectsForFilter_ClearFilterRestoresOriginalBehavior(t *testing.T) { callCount := 0 fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithAdjustObjectsForFilter[object](func(filterText string, _ FilterModeName) []object { callCount++ if filterText != "" { return stringsToItems([]string{"hook-provided"}) } return stringsToItems([]string{"original-a", "original-b"}) }), }, ) fv.SetObjects(stringsToItems([]string{"original-a", "original-b"})) // Apply a filter fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('x')) fv, _ = fv.Update(applyFilterKeyMsg) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "hook-provided", "", "", "[exact] x (no matches)", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) // Clear filter fv, _ = fv.Update(cancelFilterKeyMsg) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "original-a", "original-b", "", "No Filter", footerStyle.Render("100% (2/2)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_SelectionAtBottomWithBottomSticky(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{ viewport.WithSelectionEnabled[object](true), viewport.WithStickyBottom[object](true), }, []Option[object]{}, ) items := stringsToItems([]string{ "error: something broke", "info: all good", "info: still good", "info: yep good", "error: another problem", "info: fine", "info: ok", "info: last line", }) fv.SetObjects(items) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "error: another problem", "info: fine", "info: ok", selectedItemStyle.Render("info: last line"), "No Filter", footerStyle.Render("100% (8/8)"), }) internal.CmpStr(t, expectedView, fv.View()) // apply filter - should move selection to the first match fv.SetFilter("error", FilterExact) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("error") + selectedItemStyle.Render(": something broke"), "info: all good", "info: still good", "info: yep good", "[exact] error (1/2 matches on 2 items)", footerStyle.Render("12% (1/8)"), }) internal.CmpStr(t, expectedView, fv.View()) } func TestSetFilter_SelectionAtBottomWithBottomSticky_AppendDoesNotJump(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{ viewport.WithSelectionEnabled[object](true), viewport.WithStickyBottom[object](true), }, []Option[object]{}, ) items := stringsToItems([]string{ "error: something broke", "info: all good", "info: still good", "info: yep good", "error: another problem", "info: fine", "info: ok", "info: last line", }) fv.SetObjects(items) expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "error: another problem", "info: fine", "info: ok", selectedItemStyle.Render("info: last line"), "No Filter", footerStyle.Render("100% (8/8)"), }) internal.CmpStr(t, expectedView, fv.View()) // apply filter while selection is at bottom fv.SetFilter("error", FilterExact) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("error") + selectedItemStyle.Render(": something broke"), "info: all good", "info: still good", "info: yep good", "[exact] error (1/2 matches on 2 items)", footerStyle.Render("12% (1/8)"), }) internal.CmpStr(t, expectedView, fv.View()) // append new logs - selection should stay at the first match, not jump to bottom fv.AppendObjects(stringsToItems([]string{ "error: whoops", })) expectedView = internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ focusedStyle.Render("error") + selectedItemStyle.Render(": something broke"), "info: all good", "info: still good", "info: yep good", "[exact] error (1/3 matches on 3 items)", footerStyle.Render("11% (1/9)"), }) internal.CmpStr(t, expectedView, fv.View()) } // TestCustomFilterMode verifies that a custom filter mode with a custom MatchFunc works correctly. func TestCustomFilterMode(t *testing.T) { // Custom filter mode: matches only lines that start with the filter text prefixMode := FilterMode{ Name: "prefix", Key: key.NewBinding( key.WithKeys("p"), key.WithHelp("p", "prefix filter"), ), Label: "[prefix]", GetMatchFunc: func(filterText string) (MatchFunc, error) { return func(content string) []item.ByteRange { if strings.HasPrefix(content, filterText) { return []item.ByteRange{{Start: 0, End: len(filterText)}} } return nil }, nil }, } fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object]([]FilterMode{prefixMode}), }, ) fv.SetObjects(stringsToItems([]string{ "alpha one", "beta two alpha", "alpha three", })) // Activate custom mode with 'p' fv, _ = fv.Update(internal.MakeKeyMsg('p')) if fv.GetActiveFilterMode().Name != "prefix" { t.Fatalf("expected active mode 'prefix', got %q", fv.GetActiveFilterMode().Name) } if fv.GetActiveFilterMode().Label != "[prefix]" { t.Fatalf("expected label '[prefix]', got %q", fv.GetActiveFilterMode().Label) } // Type "alpha" for _, ch := range "alpha" { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) // Should have 2 matches (alpha one and alpha three) if fv.totalMatchesOnAllItems != 2 { t.Errorf("expected 2 total matches, got %d", fv.totalMatchesOnAllItems) } if fv.numMatchingItems != 2 { t.Errorf("expected 2 matching items, got %d", fv.numMatchingItems) } } // TestCustomFilterModeWithError verifies that a custom filter mode returning an error shows no matches. func TestCustomFilterModeWithError(t *testing.T) { errorMode := FilterMode{ Name: "error", Key: key.NewBinding( key.WithKeys("e"), key.WithHelp("e", "error filter"), ), Label: "[error]", GetMatchFunc: func(_ string) (MatchFunc, error) { return nil, fmt.Errorf("always fails") }, } fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object]([]FilterMode{errorMode}), }, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", })) fv, _ = fv.Update(internal.MakeKeyMsg('e')) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // Error mode should result in 0 matches if fv.totalMatchesOnAllItems != 0 { t.Errorf("expected 0 matches with error mode, got %d", fv.totalMatchesOnAllItems) } } func TestFuzzyFilterMode(t *testing.T) { fuzzyMode := FuzzyFilterMode(key.NewBinding( key.WithKeys("f"), key.WithHelp("f", "fuzzy filter"), )) fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object]([]FilterMode{fuzzyMode}), }, ) fv.SetObjects(stringsToItems([]string{ "hello world", "help wanted", "goodbye", "hxexlxlxo", })) // Activate fuzzy mode fv, _ = fv.Update(internal.MakeKeyMsg('f')) if fv.GetActiveFilterMode().Label != "[fuzzy]" { t.Fatalf("expected label '[fuzzy]', got %q", fv.GetActiveFilterMode().Label) } // Type "hlo" — should match "hello world" (h-e-l-l-o), "hxexlxlxo" (h-x-e-x-l-x-l-x-o) // but not "help wanted" (no 'o' after 'l') or "goodbye" (no 'h') for _, ch := range "hlo" { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) if fv.numMatchingItems != 2 { t.Errorf("expected 2 matching items, got %d", fv.numMatchingItems) } } func TestFuzzyFilterModeNoMatch(t *testing.T) { fuzzyMode := FuzzyFilterMode(key.NewBinding( key.WithKeys("f"), key.WithHelp("f", "fuzzy filter"), )) fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object]([]FilterMode{fuzzyMode}), }, ) fv.SetObjects(stringsToItems([]string{ "abc", "def", })) fv, _ = fv.Update(internal.MakeKeyMsg('f')) for _, ch := range "xyz" { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) if fv.numMatchingItems != 0 { t.Errorf("expected 0 matching items, got %d", fv.numMatchingItems) } } func TestFuzzyFilterModeCaseInsensitive(t *testing.T) { fuzzyMode := FuzzyFilterMode(key.NewBinding( key.WithKeys("f"), key.WithHelp("f", "fuzzy filter"), )) fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object]([]FilterMode{fuzzyMode}), }, ) fv.SetObjects(stringsToItems([]string{ "Hello World", "HELLO", "goodbye", })) fv, _ = fv.Update(internal.MakeKeyMsg('f')) for _, ch := range "helo" { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) // Should match "Hello World" and "HELLO" (case-insensitive) if fv.numMatchingItems != 2 { t.Errorf("expected 2 matching items, got %d", fv.numMatchingItems) } } func TestFuzzyFilterModeEmptyFilter(t *testing.T) { mode := FuzzyFilterMode(key.NewBinding(key.WithKeys("f"))) matchFn, err := mode.GetMatchFunc("") if err != nil { t.Fatalf("unexpected error: %v", err) } // Empty filter should return nil (no matches highlighted) ranges := matchFn("hello") if ranges != nil { t.Errorf("expected nil for empty filter, got %+v", ranges) } } func TestFuzzyFilterModeHighlightRanges(t *testing.T) { mode := FuzzyFilterMode(key.NewBinding(key.WithKeys("f"))) matchFn, err := mode.GetMatchFunc("hlo") if err != nil { t.Fatalf("unexpected error: %v", err) } // "hello world" — h(0) to o(4), single span [0, 5) ranges := matchFn("hello world") if len(ranges) != 1 { t.Fatalf("expected 1 range, got %d", len(ranges)) } if ranges[0] != (item.ByteRange{Start: 0, End: 5}) { t.Errorf("expected {0, 5}, got %+v", ranges[0]) } // No match ranges = matchFn("goodbye") if ranges != nil { t.Errorf("expected nil for non-matching content, got %+v", ranges) } } func TestFuzzyFilterModeUnicode(t *testing.T) { mode := FuzzyFilterMode(key.NewBinding(key.WithKeys("f"))) matchFn, err := mode.GetMatchFunc("über") if err != nil { t.Fatalf("unexpected error: %v", err) } // "ü--b--e--r" — ü is 2 bytes, so total is 11 bytes; span from ü(0) to r(10-11) ranges := matchFn("ü--b--e--r") if len(ranges) != 1 { t.Fatalf("expected 1 range, got %d", len(ranges)) } if ranges[0] != (item.ByteRange{Start: 0, End: 11}) { t.Errorf("expected {0, 11}, got %+v", ranges[0]) } } // TestModeSwitching verifies that switching between filter modes preserves the filter text // and re-evaluates matches. func TestModeSwitching(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "Hello World", "hello world", "HELLO WORLD", })) // Activate exact mode and type "hello" fv, _ = fv.Update(filterKeyMsg) // '/' for _, ch := range "hello" { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) if fv.GetActiveFilterMode().Name != FilterExact { t.Fatalf("expected exact mode, got %q", fv.GetActiveFilterMode().Name) } // Exact match should find only "hello world" (case-sensitive) exactMatchCount := fv.totalMatchesOnAllItems if exactMatchCount != 1 { t.Fatalf("expected 1 exact match, got %d", exactMatchCount) } // Cancel and switch to case-insensitive mode fv, _ = fv.Update(cancelFilterKeyMsg) fv, _ = fv.Update(caseInsensitiveFilterKeyMsg) // 'i' if fv.GetActiveFilterMode().Name != FilterCaseInsensitive { t.Fatalf("expected case-insensitive mode, got %q", fv.GetActiveFilterMode().Name) } // Type "hello" again for _, ch := range "hello" { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) // Case-insensitive should match all 3 items if fv.totalMatchesOnAllItems != 3 { t.Errorf("expected 3 case-insensitive matches, got %d", fv.totalMatchesOnAllItems) } // Cancel and switch to regex mode fv, _ = fv.Update(cancelFilterKeyMsg) fv, _ = fv.Update(regexFilterKeyMsg) // 'r' if fv.GetActiveFilterMode().Name != FilterRegex { t.Fatalf("expected regex mode, got %q", fv.GetActiveFilterMode().Name) } // Type regex pattern for _, ch := range `^[hH]ello` { fv, _ = fv.Update(internal.MakeKeyMsg(ch)) } fv, _ = fv.Update(applyFilterKeyMsg) // Should match "Hello World" and "hello world" but not "HELLO WORLD" if fv.totalMatchesOnAllItems != 2 { t.Errorf("expected 2 regex matches for ^[hH]ello, got %d", fv.totalMatchesOnAllItems) } } // TestSetFilterWithVariousModes verifies that SetFilter works with different filter modes. func TestSetFilterWithVariousModes(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "Hello World", "hello world", "HELLO WORLD", })) // SetFilter with exact mode fv.SetFilter("hello", FilterExact) if fv.GetActiveFilterMode().Name != FilterExact { t.Errorf("expected mode %q, got %q", FilterExact, fv.GetActiveFilterMode().Name) } if fv.GetFilterText() != "hello" { t.Errorf("expected filter text 'hello', got %q", fv.GetFilterText()) } if fv.totalMatchesOnAllItems != 1 { t.Errorf("expected 1 exact match, got %d", fv.totalMatchesOnAllItems) } // SetFilter with regex mode fv.SetFilter("HELLO", FilterRegex) if fv.GetActiveFilterMode().Name != FilterRegex { t.Errorf("expected mode %q, got %q", FilterRegex, fv.GetActiveFilterMode().Name) } if fv.totalMatchesOnAllItems != 1 { t.Errorf("expected 1 regex match for 'HELLO', got %d", fv.totalMatchesOnAllItems) } // SetFilter with case-insensitive mode fv.SetFilter("hello", FilterCaseInsensitive) if fv.GetActiveFilterMode().Name != FilterCaseInsensitive { t.Errorf("expected mode %q, got %q", FilterCaseInsensitive, fv.GetActiveFilterMode().Name) } if fv.totalMatchesOnAllItems != 3 { t.Errorf("expected 3 case-insensitive matches, got %d", fv.totalMatchesOnAllItems) } // SetFilter with empty string clears filter fv.SetFilter("", "") if fv.GetActiveFilterMode() != nil { t.Errorf("expected nil active filter mode after empty filter, got %q", fv.GetActiveFilterMode().Name) } if fv.filterMode != filterModeOff { t.Errorf("expected filterModeOff after empty filter, got %d", fv.filterMode) } // SetFilter with unknown mode name should be ignored (keeps current mode) fv.SetFilter("test", "nonexistent") if fv.GetActiveFilterMode() != nil { t.Errorf("expected nil active filter mode for unknown mode, got %q", fv.GetActiveFilterMode().Name) } } // TestFilterModesAccessor verifies the FilterModes() accessor. func TestFilterModesAccessor(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{}, ) modes := fv.FilterModes() if len(modes) != 3 { t.Fatalf("expected 3 default filter modes, got %d", len(modes)) } if modes[0].Name != FilterExact || modes[0].Label != "[exact]" { t.Errorf("expected first mode Name=%q Label='[exact]', got Name=%q Label=%q", FilterExact, modes[0].Name, modes[0].Label) } if modes[1].Name != FilterRegex || modes[1].Label != "[regex]" { t.Errorf("expected second mode Name=%q Label='[regex]', got Name=%q Label=%q", FilterRegex, modes[1].Name, modes[1].Label) } if modes[2].Name != FilterCaseInsensitive || modes[2].Label != "[iregex]" { t.Errorf("expected third mode Name=%q Label='[iregex]', got Name=%q Label=%q", FilterCaseInsensitive, modes[2].Name, modes[2].Label) } } // TestGetActiveFilterModeNil verifies GetActiveFilterMode returns nil when no mode is active. func TestGetActiveFilterModeNil(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{}, ) if fv.GetActiveFilterMode() != nil { t.Errorf("expected nil active filter mode initially") } // Activate mode fv, _ = fv.Update(filterKeyMsg) if fv.GetActiveFilterMode() == nil { t.Errorf("expected non-nil active filter mode after activation") } // Cancel fv, _ = fv.Update(cancelFilterKeyMsg) if fv.GetActiveFilterMode() != nil { t.Errorf("expected nil active filter mode after cancel") } } // TestWithFilterModesCustom verifies WithFilterModes overrides defaults. func TestWithFilterModesCustom(t *testing.T) { customMode := FilterMode{ Name: "custom", Key: key.NewBinding( key.WithKeys("x"), key.WithHelp("x", "custom"), ), Label: "[custom]", GetMatchFunc: func(filterText string) (MatchFunc, error) { return func(content string) []item.ByteRange { // Simple: match everything if len(content) > 0 && filterText != "" { return []item.ByteRange{{Start: 0, End: len(content)}} } return nil }, nil }, } fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{ WithFilterModes[object]([]FilterMode{customMode}), }, ) modes := fv.FilterModes() if len(modes) != 1 { t.Fatalf("expected 1 custom filter mode, got %d", len(modes)) } if modes[0].Label != "[custom]" { t.Errorf("expected label '[custom]', got %q", modes[0].Label) } // Default filter key '/' should not activate anything since we replaced modes fv.SetObjects(stringsToItems([]string{"hello"})) fv, _ = fv.Update(filterKeyMsg) // '/' — should not match any mode key if fv.GetActiveFilterMode() != nil { t.Errorf("expected no mode activation from '/', got %q", fv.GetActiveFilterMode().Name) } // Custom key 'x' should work fv, _ = fv.Update(internal.MakeKeyMsg('x')) if fv.GetActiveFilterMode().Name != "custom" { t.Errorf("expected mode 'custom' after 'x', got %q", fv.GetActiveFilterMode().Name) } } // TestAdjustObjectsForFilter_ModeNonEmptyOnClear verifies that the callback // always receives a valid (non-empty) mode name, even when clearing the filter. func TestAdjustObjectsForFilter_ModeNonEmptyOnClear(t *testing.T) { var receivedModes []FilterModeName fv := makeFilterableViewport( 80, 5, []viewport.Option[object]{}, []Option[object]{ WithAdjustObjectsForFilter[object](func(_ string, mode FilterModeName) []object { receivedModes = append(receivedModes, mode) if mode == "" { t.Fatalf("adjustObjectsForFilter received empty mode name") } return nil }), }, ) fv.SetObjects(stringsToItems([]string{"apple", "banana"})) // Activate filter, type, apply fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // Clear filter — this sets activeFilterModeName to "" internally, // but the callback should still receive a valid (non-empty) mode name _, _ = fv.Update(cancelFilterKeyMsg) if len(receivedModes) == 0 { t.Fatal("expected adjustObjectsForFilter to be called at least once") } for i, mode := range receivedModes { if mode == "" { t.Errorf("call %d: received empty mode name", i) } } } func TestModeSwitchAfterCancel(t *testing.T) { fv := makeFilterableViewport( 80, 6, []viewport.Option[object]{}, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ "apple", "banana", })) // Activate exact mode, type, apply fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('a')) fv, _ = fv.Update(applyFilterKeyMsg) // "apple" has 1 'a', "banana" has 3 'a's = 4 total matches if fv.totalMatchesOnAllItems != 4 { t.Fatalf("expected 4 matches for 'a', got %d", fv.totalMatchesOnAllItems) } // Cancel filter fv, _ = fv.Update(cancelFilterKeyMsg) if fv.GetActiveFilterMode() != nil { t.Errorf("expected nil active filter mode after cancel, got %q", fv.GetActiveFilterMode().Name) } if fv.filterMode != filterModeOff { t.Errorf("expected filterModeOff after cancel") } // Switch to regex mode fv, _ = fv.Update(regexFilterKeyMsg) if fv.GetActiveFilterMode().Name != FilterRegex { t.Errorf("expected mode %q (regex), got %q", FilterRegex, fv.GetActiveFilterMode().Name) } // Filter text should be empty (was cleared on cancel) if fv.GetFilterText() != "" { t.Errorf("expected empty filter text after cancel+mode switch, got %q", fv.GetFilterText()) } } func TestDuplicateFilterModeNamePanics(t *testing.T) { defer func() { r := recover() if r == nil { t.Fatal("expected panic for duplicate FilterModeName, got none") } msg := fmt.Sprint(r) if !strings.Contains(msg, "duplicate FilterModeName") { t.Errorf("expected panic message about duplicate FilterModeName, got: %s", msg) } }() vp := viewport.New[object](80, 6) New[object](vp, WithFilterModes[object]([]FilterMode{ ExactFilterMode(key.NewBinding(key.WithKeys("/"))), ExactFilterMode(key.NewBinding(key.WithKeys("f"))), // same Name: "exact" }), ) } func TestNoFilterModesPanics(t *testing.T) { defer func() { r := recover() if r == nil { t.Fatal("expected panic for no filter modes, got none") } msg := fmt.Sprint(r) if !strings.Contains(msg, "no filter modes set") { t.Errorf("expected panic message about no filter modes, got: %s", msg) } }() vp := viewport.New[object](80, 6) New[object](vp, WithFilterModes[object]([]FilterMode{}), ) } func TestEmptyFilterModeNamePanics(t *testing.T) { defer func() { r := recover() if r == nil { t.Fatal("expected panic for empty FilterMode Name, got none") } msg := fmt.Sprint(r) if !strings.Contains(msg, "empty Name") { t.Errorf("expected panic message about empty Name, got: %s", msg) } }() vp := viewport.New[object](80, 6) New[object](vp, WithFilterModes[object]([]FilterMode{ {Key: key.NewBinding(key.WithKeys("x")), Label: "[x]", GetMatchFunc: func(_ string) (MatchFunc, error) { return nil, nil }}, }), ) } func TestNoMatchesResetsXOffsetWhenUnwrapped(t *testing.T) { fv := makeFilterableViewport( 10, 3, []viewport.Option[object]{ viewport.WithWrapText[object](false), }, []Option[object]{}, ) fv.SetObjects(stringsToItems([]string{ strings.Repeat("a", 32), })) // filter for "a" and navigate to a right-side match so xOffset > 0 fv, _ = fv.Update(filterKeyMsg) for range 4 { fv, _ = fv.Update(internal.MakeKeyMsg('a')) } fv, _ = fv.Update(applyFilterKeyMsg) fv, _ = fv.Update(nextMatchKeyMsg) fv, _ = fv.Update(nextMatchKeyMsg) if fv.vp.GetXOffsetWidth() == 0 { t.Fatal("expected xOffset > 0 after navigating to right-side match") } // cancel filter and start a new one that produces no matches fv, _ = fv.Update(cancelFilterKeyMsg) fv, _ = fv.Update(filterKeyMsg) fv, _ = fv.Update(internal.MakeKeyMsg('z')) fv, _ = fv.Update(applyFilterKeyMsg) if fv.vp.GetXOffsetWidth() != 0 { t.Fatalf("expected xOffset=0 when no matches and unwrapped, got %d", fv.vp.GetXOffsetWidth()) } expectedView := internal.Pad(fv.GetWidth(), fv.GetHeight(), []string{ "aaaaaaa...", "[exact]...", footerStyle.Render("100% (1/1)"), }) internal.CmpStr(t, expectedView, fv.View()) } ================================================ FILE: modules/viewport/filterableviewport/filtermode.go ================================================ package filterableviewport import ( "regexp" "strings" "charm.land/bubbles/v2/key" "github.com/antgroup/hugescm/modules/viewport/internal/fuzzy" "github.com/antgroup/hugescm/modules/viewport/item" ) // FilterModeName identifies a filter mode programmatically. // Built-in names are provided as package constants. // Define your own for custom filter modes. type FilterModeName string const ( // FilterExact identifies the built-in exact substring filter mode. FilterExact FilterModeName = "exact" // FilterRegex identifies the built-in regex filter mode. FilterRegex FilterModeName = "regex" // FilterCaseInsensitive identifies the built-in case-insensitive regex filter mode. FilterCaseInsensitive FilterModeName = "iregex" // FilterFuzzy identifies the built-in fuzzy filter mode. FilterFuzzy FilterModeName = "fuzzy" ) // MatchFunc extracts match byte ranges from ANSI-stripped item content. // Called once per item during a filter scan. type MatchFunc func(content string) []item.ByteRange // FilterMode defines a user-configurable filter type. type FilterMode struct { // Name is a stable programmatic identifier for this filter mode (e.g. FilterExact, FilterRegex). // Must be unique across all modes in a filterable viewport. Name FilterModeName // Key activates this filter mode Key key.Binding // Label shown in filter line, e.g. "[exact]" Label string // GetMatchFunc is called once when the filter text changes. It returns a MatchFunc // used for each item, or an error (e.g. invalid regex) to show no matches. GetMatchFunc func(filterText string) (MatchFunc, error) } // Matches reports whether content matches the given query according to this // filter mode's matching logic. It is a convenience wrapper around // GetMatchFunc for callers that only need a boolean result. func (fm FilterMode) Matches(query, content string) bool { if query == "" { return true } matchFn, err := fm.GetMatchFunc(query) if err != nil { return false } return len(matchFn(content)) > 0 } // ExactFilterMode returns a FilterMode that performs exact substring matching. func ExactFilterMode(k key.Binding) FilterMode { return FilterMode{ Name: FilterExact, Key: k, Label: "[exact]", GetMatchFunc: func(filterText string) (MatchFunc, error) { return func(content string) []item.ByteRange { if filterText == "" { return nil } var ranges []item.ByteRange startIndex := 0 for { foundIndex := strings.Index(content[startIndex:], filterText) if foundIndex == -1 { break } actualStart := startIndex + foundIndex end := actualStart + len(filterText) ranges = append(ranges, item.ByteRange{Start: actualStart, End: end}) startIndex = end } return ranges }, nil }, } } // RegexFilterMode returns a FilterMode that performs regex matching. func RegexFilterMode(k key.Binding) FilterMode { return FilterMode{ Name: FilterRegex, Key: k, Label: "[regex]", GetMatchFunc: func(filterText string) (MatchFunc, error) { re, err := regexp.Compile(filterText) if err != nil { return nil, err } return func(content string) []item.ByteRange { regexMatches := re.FindAllStringIndex(content, -1) if len(regexMatches) == 0 { return nil } ranges := make([]item.ByteRange, 0, len(regexMatches)) for _, rm := range regexMatches { ranges = append(ranges, item.ByteRange{Start: rm[0], End: rm[1]}) } return ranges }, nil }, } } // CaseInsensitiveFilterMode returns a FilterMode that performs case-insensitive // regex matching. The (?i) prefix is added internally — the user never sees it // in the text input. func CaseInsensitiveFilterMode(k key.Binding) FilterMode { return FilterMode{ Name: FilterCaseInsensitive, Key: k, Label: "[iregex]", GetMatchFunc: func(filterText string) (MatchFunc, error) { re, err := regexp.Compile("(?i)" + filterText) if err != nil { return nil, err } return func(content string) []item.ByteRange { regexMatches := re.FindAllStringIndex(content, -1) if len(regexMatches) == 0 { return nil } ranges := make([]item.ByteRange, 0, len(regexMatches)) for _, rm := range regexMatches { ranges = append(ranges, item.ByteRange{Start: rm[0], End: rm[1]}) } return ranges }, nil }, } } // FuzzyFilterMode returns a FilterMode that performs fuzzy matching similar to fzf. // Characters in the query must appear in order in the content but need not be contiguous. // Matching is case-insensitive. The highlighted range spans from the first to the last // matched character. func FuzzyFilterMode(k key.Binding) FilterMode { return FilterMode{ Name: FilterFuzzy, Key: k, Label: "[fuzzy]", GetMatchFunc: func(filterText string) (MatchFunc, error) { return func(content string) []item.ByteRange { if filterText == "" { return nil } matches := fuzzy.Find([]string{content}, filterText) if len(matches) == 0 { return nil } fuzzyRanges := matches[0].MatchedByteRanges() if len(fuzzyRanges) == 0 { return nil } // Single span from first matched char to last matched char. return []item.ByteRange{{ Start: fuzzyRanges[0].Start, End: fuzzyRanges[len(fuzzyRanges)-1].End, }} }, nil }, } } // DefaultFilterModes returns the default set of filter modes: // exact (/), regex (r), case-insensitive (i). func DefaultFilterModes() []FilterMode { return []FilterMode{ ExactFilterMode(key.NewBinding( key.WithKeys("/"), key.WithHelp("/", "filter"), )), RegexFilterMode(key.NewBinding( key.WithKeys("r"), key.WithHelp("r", "regex filter"), )), CaseInsensitiveFilterMode(key.NewBinding( key.WithKeys("i"), key.WithHelp("i", "case insensitive filter"), )), } } ================================================ FILE: modules/viewport/filterableviewport/keymap.go ================================================ package filterableviewport import ( "charm.land/bubbles/v2/key" ) // KeyMap defines the key bindings for the filterable viewport. // Filter mode activation keys (exact, regex, case-insensitive) are defined on // each FilterMode.Key — see DefaultFilterModes() and WithFilterModes(). type KeyMap struct { ApplyFilterKey key.Binding CancelFilterKey key.Binding ToggleMatchingItemsOnlyKey key.Binding NextMatchKey key.Binding PrevMatchKey key.Binding SearchHistoryPrevKey key.Binding SearchHistoryNextKey key.Binding } // DefaultKeyMap returns a default keymap for the filterable viewport func DefaultKeyMap() KeyMap { return KeyMap{ ApplyFilterKey: key.NewBinding( key.WithKeys("enter"), key.WithHelp("enter", "apply filter"), ), CancelFilterKey: key.NewBinding( key.WithKeys("esc"), key.WithHelp("esc", "cancel filter"), ), ToggleMatchingItemsOnlyKey: key.NewBinding( key.WithKeys("o"), key.WithHelp("o", "toggle matches only"), ), NextMatchKey: key.NewBinding( key.WithKeys("n"), key.WithHelp("n", "next match"), ), PrevMatchKey: key.NewBinding( key.WithKeys("N"), key.WithHelp("N", "previous match"), ), SearchHistoryPrevKey: key.NewBinding( key.WithKeys("up"), key.WithHelp("↑", "previous search"), ), SearchHistoryNextKey: key.NewBinding( key.WithKeys("down"), key.WithHelp("↓", "next search"), ), } } ================================================ FILE: modules/viewport/filterableviewport/styles.go ================================================ package filterableviewport import ( "charm.land/lipgloss/v2" ) // Styles contains styling configuration for the filterable viewport type Styles struct { Match MatchStyles } // MatchStyles contains styles for matches in the filterable viewport type MatchStyles struct { Focused lipgloss.Style FocusedIfSelected lipgloss.Style // used when the focused match is on the selected item Unfocused lipgloss.Style } // DefaultMatchStyles returns a set of default styles for matches. // Uses only reverse video and safe ANSI colors — no 256-color or true-color values. func DefaultMatchStyles() MatchStyles { return MatchStyles{ Focused: lipgloss.NewStyle().Reverse(true).Foreground(lipgloss.Cyan), FocusedIfSelected: lipgloss.NewStyle().Reverse(true).Foreground(lipgloss.Cyan), Unfocused: lipgloss.NewStyle().Reverse(true).Foreground(lipgloss.BrightRed), } } // DefaultStyles returns a set of default styles for the filterable viewport func DefaultStyles() Styles { return Styles{ Match: DefaultMatchStyles(), } } ================================================ FILE: modules/viewport/highlight.go ================================================ package viewport import ( "github.com/antgroup/hugescm/modules/viewport/item" ) // Highlight represents a specific position and style to highlight type Highlight struct { ItemIndex int // index of the item ItemHighlight item.Highlight } ================================================ FILE: modules/viewport/internal/fuzzy/fuzzy.go ================================================ // Package fuzzy provides fuzzy string matching. // // A query matches a string when every character in the query appears in the // string in order, but the characters need not be contiguous. Matching is // case-insensitive by default. // // Adapted from github.com/koki-develop/go-fzf. package fuzzy import ( "sort" "strings" "unicode/utf8" ) // Match describes a single successful fuzzy match. type Match struct { // Str is the original (unmodified) string that was matched. Str string // Index is the position of this string in the input slice. Index int // MatchedIndexes holds the rune indexes (0-based) of each query character // that was matched inside Str. MatchedIndexes []int } // MatchedByteRanges converts the rune-based MatchedIndexes into byte ranges // within Str. Each returned ByteRange covers exactly one matched rune. func (m Match) MatchedByteRanges() []ByteRange { if len(m.MatchedIndexes) == 0 { return nil } // Build a rune-index → byte-offset map for only the rune indexes we need. // We walk the string once, keeping a running rune counter. needed := make(map[int]struct{}, len(m.MatchedIndexes)) for _, ri := range m.MatchedIndexes { needed[ri] = struct{}{} } type runePos struct { byteOffset int byteLen int } found := make(map[int]runePos, len(needed)) runeIdx := 0 byteIdx := 0 for byteIdx < len(m.Str) && len(found) < len(needed) { _, size := utf8.DecodeRuneInString(m.Str[byteIdx:]) if _, ok := needed[runeIdx]; ok { found[runeIdx] = runePos{byteOffset: byteIdx, byteLen: size} } byteIdx += size runeIdx++ } ranges := make([]ByteRange, 0, len(m.MatchedIndexes)) for _, ri := range m.MatchedIndexes { rp := found[ri] ranges = append(ranges, ByteRange{Start: rp.byteOffset, End: rp.byteOffset + rp.byteLen}) } return ranges } // ByteRange represents a half-open byte range [Start, End). type ByteRange struct { Start int End int } // Matches is a sortable slice of Match values. // The default sort order ranks matches with fewer matched indexes first (shorter // queries matched sooner), breaking ties by matched-index position // (left-biased), then by original index. type Matches []Match func (m Matches) Len() int { return len(m) } func (m Matches) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m Matches) Less(i, j int) bool { mi, mj := m[i].MatchedIndexes, m[j].MatchedIndexes li, lj := len(mi), len(mj) if li != lj { return li < lj } for k := range li { if mi[k] != mj[k] { return mi[k] < mj[k] } } return m[i].Index < m[j].Index } // Option configures a fuzzy search. type Option func(*option) type option struct { caseSensitive bool } // WithCaseSensitive enables or disables case-sensitive matching. // The default is case-insensitive. func WithCaseSensitive(v bool) Option { return func(o *option) { o.caseSensitive = v } } // Find performs a fuzzy search of query against each string in items, // returning only the matches, sorted by quality. func Find(items []string, query string, opts ...Option) Matches { var o option for _, fn := range opts { fn(&o) } if !o.caseSensitive { query = strings.ToLower(query) } var result Matches for i, s := range items { if m, ok := match(s, query, o); ok { m.Index = i result = append(result, m) } } sort.Sort(result) return result } // match checks whether query fuzzy-matches str and returns the Match if so. // It uses a two-pass approach to find the tightest (shortest-span) match: // 1. Forward pass: greedily match left-to-right to confirm all query chars exist in order. // 2. Backward pass: from the end of the string, match query chars in reverse to find the // rightmost possible match. // 3. Forward pass over that window to tighten and record exact matched indexes. func match(str, query string, o option) (Match, bool) { normalizedStr := str if !o.caseSensitive { normalizedStr = strings.ToLower(str) } runes := []rune(normalizedStr) queryRunes := []rune(query) n := len(runes) qn := len(queryRunes) if qn == 0 { return Match{Str: str, MatchedIndexes: []int{}}, true } // Forward pass: confirm a match exists. qi := 0 for i := 0; i < n && qi < qn; i++ { if runes[i] == queryRunes[qi] { qi++ } } if qi < qn { return Match{}, false } // Backward pass: from the end of the string, match query chars in reverse. // This finds the rightmost end and the latest possible start. qi = qn - 1 endIdx := -1 startIdx := 0 for i := n - 1; i >= 0 && qi >= 0; i-- { if runes[i] == queryRunes[qi] { if qi == qn-1 { endIdx = i } if qi == 0 { startIdx = i } qi-- } } // Forward pass from startIdx to endIdx to tighten and collect matched indexes. matchedIndexes := make([]int, 0, qn) qi = 0 for i := startIdx; i <= endIdx && qi < qn; i++ { if runes[i] == queryRunes[qi] { matchedIndexes = append(matchedIndexes, i) qi++ } } return Match{Str: str, MatchedIndexes: matchedIndexes}, true } ================================================ FILE: modules/viewport/internal/fuzzy/fuzzy_test.go ================================================ package fuzzy import ( "fmt" "testing" ) func TestMatch(t *testing.T) { tests := []struct { str string query string matchedIndexes []int // nil means no match expected }{ // Basic ASCII {str: "abc", query: "", matchedIndexes: []int{}}, {str: "abc", query: "a", matchedIndexes: []int{0}}, {str: "abc", query: "ab", matchedIndexes: []int{0, 1}}, {str: "abc", query: "ac", matchedIndexes: []int{0, 2}}, {str: "abc", query: "abc", matchedIndexes: []int{0, 1, 2}}, {str: "abc", query: "b", matchedIndexes: []int{1}}, {str: "abc", query: "bc", matchedIndexes: []int{1, 2}}, {str: "abc", query: "c", matchedIndexes: []int{2}}, // Non-matches {str: "abc", query: "cba"}, {str: "abc", query: "d"}, {str: "abc", query: "abcd"}, // With gaps {str: "xaxbxc", query: "a", matchedIndexes: []int{1}}, {str: "xaxbxc", query: "ab", matchedIndexes: []int{1, 3}}, {str: "xaxbxc", query: "ac", matchedIndexes: []int{1, 5}}, {str: "xaxbxc", query: "abc", matchedIndexes: []int{1, 3, 5}}, {str: "xaxbxc", query: "b", matchedIndexes: []int{3}}, {str: "xaxbxc", query: "bc", matchedIndexes: []int{3, 5}}, {str: "xaxbxc", query: "c", matchedIndexes: []int{5}}, {str: "xaxbxc", query: "cba"}, {str: "xaxbxc", query: "d"}, {str: "xaxbxc", query: "abcd"}, // Unicode {str: "こんにちは", query: "こ", matchedIndexes: []int{0}}, {str: "こんにちは", query: "こん", matchedIndexes: []int{0, 1}}, {str: "こんにちは", query: "こには", matchedIndexes: []int{0, 2, 4}}, {str: "こんにちは", query: "こんにちは", matchedIndexes: []int{0, 1, 2, 3, 4}}, } for i, tt := range tests { t.Run(fmt.Sprintf("#%d_%s/%s", i, tt.str, tt.query), func(t *testing.T) { m, ok := match(tt.str, tt.query, option{}) if tt.matchedIndexes == nil { if ok { t.Fatalf("expected no match, got %+v", m) } return } if !ok { t.Fatalf("expected match with indexes %v, got no match", tt.matchedIndexes) } if len(m.MatchedIndexes) != len(tt.matchedIndexes) { t.Fatalf("expected %d matched indexes, got %d: %v", len(tt.matchedIndexes), len(m.MatchedIndexes), m.MatchedIndexes) } for j := range tt.matchedIndexes { if m.MatchedIndexes[j] != tt.matchedIndexes[j] { t.Errorf("index %d: expected %d, got %d", j, tt.matchedIndexes[j], m.MatchedIndexes[j]) } } if m.Str != tt.str { t.Errorf("expected Str=%q, got %q", tt.str, m.Str) } }) } } func TestMatchCaseSensitive(t *testing.T) { tests := []struct { str string query string matchedIndexes []int }{ {str: "abc", query: "abc", matchedIndexes: []int{0, 1, 2}}, {str: "abc", query: "Abc"}, {str: "abc", query: "ABC"}, {str: "Abc", query: "abc"}, {str: "Abc", query: "Abc", matchedIndexes: []int{0, 1, 2}}, {str: "Abc", query: "ABC"}, {str: "ABC", query: "abc"}, {str: "ABC", query: "Abc"}, {str: "ABC", query: "ABC", matchedIndexes: []int{0, 1, 2}}, } for i, tt := range tests { t.Run(fmt.Sprintf("#%d_%s/%s", i, tt.str, tt.query), func(t *testing.T) { m, ok := match(tt.str, tt.query, option{caseSensitive: true}) if tt.matchedIndexes == nil { if ok { t.Fatalf("expected no match, got %+v", m) } return } if !ok { t.Fatalf("expected match, got no match") } for j := range tt.matchedIndexes { if m.MatchedIndexes[j] != tt.matchedIndexes[j] { t.Errorf("index %d: expected %d, got %d", j, tt.matchedIndexes[j], m.MatchedIndexes[j]) } } }) } } func TestMatchCaseInsensitiveDefault(t *testing.T) { // Default (case-insensitive): "Hello" should match query "hello" m, ok := match("Hello World", "hello", option{}) if !ok { t.Fatal("expected case-insensitive match") } expected := []int{0, 1, 2, 3, 4} if len(m.MatchedIndexes) != len(expected) { t.Fatalf("expected %d indexes, got %d", len(expected), len(m.MatchedIndexes)) } for i, v := range expected { if m.MatchedIndexes[i] != v { t.Errorf("index %d: expected %d, got %d", i, v, m.MatchedIndexes[i]) } } } func TestFind(t *testing.T) { items := []string{ "apple", "banana", "application", "grape", "pineapple", } results := Find(items, "apl") // Should match: "apple" (0), "application" (2), "pineapple" (4) if len(results) != 3 { t.Fatalf("expected 3 matches, got %d", len(results)) } // Results should be sorted: "apple" and "application" have matched indexes // [0,2,3] and [0,3,4] so apple comes first; pineapple has [4,6,7] if results[0].Str != "apple" { t.Errorf("expected first match to be 'apple', got %q", results[0].Str) } } func TestFindCaseSensitive(t *testing.T) { items := []string{"Apple", "apple", "APPLE"} results := Find(items, "apple", WithCaseSensitive(true)) if len(results) != 1 { t.Fatalf("expected 1 match, got %d", len(results)) } if results[0].Str != "apple" { t.Errorf("expected 'apple', got %q", results[0].Str) } } func TestFindEmpty(t *testing.T) { items := []string{"abc", "def"} results := Find(items, "") // Empty query matches everything if len(results) != 2 { t.Fatalf("expected 2 matches for empty query, got %d", len(results)) } } func TestFindNoMatches(t *testing.T) { items := []string{"abc", "def"} results := Find(items, "xyz") if len(results) != 0 { t.Fatalf("expected 0 matches, got %d", len(results)) } } func TestMatchesSorting(t *testing.T) { items := []string{ "xxaxxbxxc", // indexes: [2, 5, 8] - spread out "abcxxxxxx", // indexes: [0, 1, 2] - tightest, leftmost "xabcxxxxx", // indexes: [1, 2, 3] - tight but later } results := Find(items, "abc") if len(results) != 3 { t.Fatalf("expected 3 matches, got %d", len(results)) } // Sort order: by matched indexes position (leftmost first) // "abcxxxxxx" [0,1,2] < "xabcxxxxx" [1,2,3] < "xxaxxbxxc" [2,5,8] if results[0].Str != "abcxxxxxx" { t.Errorf("expected first result 'abcxxxxxx', got %q", results[0].Str) } if results[1].Str != "xabcxxxxx" { t.Errorf("expected second result 'xabcxxxxx', got %q", results[1].Str) } if results[2].Str != "xxaxxbxxc" { t.Errorf("expected third result 'xxaxxbxxc', got %q", results[2].Str) } } func TestMatchedByteRanges(t *testing.T) { m := Match{ Str: "hello", MatchedIndexes: []int{0, 2, 4}, } ranges := m.MatchedByteRanges() expected := []ByteRange{ {Start: 0, End: 1}, // h {Start: 2, End: 3}, // l {Start: 4, End: 5}, // o } if len(ranges) != len(expected) { t.Fatalf("expected %d ranges, got %d", len(expected), len(ranges)) } for i, r := range ranges { if r != expected[i] { t.Errorf("range %d: expected %+v, got %+v", i, expected[i], r) } } } func TestMatchedByteRangesUnicode(t *testing.T) { // "über" — ü is 2 bytes in UTF-8 m := Match{ Str: "über", MatchedIndexes: []int{0, 2}, // ü and e } ranges := m.MatchedByteRanges() expected := []ByteRange{ {Start: 0, End: 2}, // ü (2 bytes) {Start: 3, End: 4}, // e (1 byte, after ü(2) + b(1)) } if len(ranges) != len(expected) { t.Fatalf("expected %d ranges, got %d", len(expected), len(ranges)) } for i, r := range ranges { if r != expected[i] { t.Errorf("range %d: expected %+v, got %+v", i, expected[i], r) } } } func TestMatchTightestSpan(t *testing.T) { tests := []struct { str string query string matchedIndexes []int }{ // "b" appears early in "foobar", but the tightest match for // "bar-baz" is the suffix starting at rune index 7. { str: "foobar-bar-baz", query: "bar-baz", matchedIndexes: []int{7, 8, 9, 10, 11, 12, 13}, }, // Should prefer the later, tighter "a_b" over the early spread "a...b". { str: "a____b____a_b", query: "ab", matchedIndexes: []int{10, 12}, }, // Contiguous match at end preferred over spread match from start. { str: "xaxbxcxabc", query: "abc", matchedIndexes: []int{7, 8, 9}, }, } for i, tt := range tests { t.Run(fmt.Sprintf("#%d_%s/%s", i, tt.str, tt.query), func(t *testing.T) { m, ok := match(tt.str, tt.query, option{}) if !ok { t.Fatalf("expected match, got no match") } if len(m.MatchedIndexes) != len(tt.matchedIndexes) { t.Fatalf("expected %d matched indexes, got %d: %v", len(tt.matchedIndexes), len(m.MatchedIndexes), m.MatchedIndexes) } for j := range tt.matchedIndexes { if m.MatchedIndexes[j] != tt.matchedIndexes[j] { t.Errorf("index %d: expected %d, got %d (full: %v)", j, tt.matchedIndexes[j], m.MatchedIndexes[j], m.MatchedIndexes) } } }) } } func TestMatchedByteRangesEmpty(t *testing.T) { m := Match{Str: "hello", MatchedIndexes: []int{}} ranges := m.MatchedByteRanges() if ranges != nil { t.Errorf("expected nil for empty MatchedIndexes, got %+v", ranges) } } ================================================ FILE: modules/viewport/internal/test_util.go ================================================ package internal import ( "fmt" "os" "runtime" "slices" "strings" "testing" "time" "unicode" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" ) // Test helper colors and styles var ( Blue = lipgloss.Color("#0000FF") BlueBg = lipgloss.NewStyle().Background(Blue) BlueFg = lipgloss.NewStyle().Foreground(Blue) Green = lipgloss.Color("#00FF00") GreenBg = lipgloss.NewStyle().Background(Green) GreenFg = lipgloss.NewStyle().Foreground(Green) Red = lipgloss.Color("#FF0000") RedBg = lipgloss.NewStyle().Background(Red) RedFg = lipgloss.NewStyle().Foreground(Red) ) // CmpStr compares two strings and fails the test if they are not equal func CmpStr(t *testing.T, expected, actual string, extra ...string) { _, file, line, _ := runtime.Caller(1) testName := t.Name() diff := cmp.Diff(expected, actual) if len(expected) > 80 { diff = cmp.Diff(expected, actual, cmpopts.AcyclicTransformer("SplitLines", func(s string) []string { return strings.Split(s, "\n") })) } if diff != "" { t.Errorf("\nTest %q failed at %s:%d\nDiff (-expected +actual):\n%s%s", testName, file, line, diff, strings.Join(extra, "\n")) } } // RunWithTimeout runs a test function with a timeout. func RunWithTimeout(t *testing.T, runTest func(t *testing.T), timeout time.Duration) { t.Helper() // warmup runs for range 3 { runTest(t) } // actual measured runs var durations []time.Duration for range 3 { done := make(chan struct{}) var testErr error start := time.Now() go func() { defer func() { if r := recover(); r != nil { testErr = fmt.Errorf("test panicked: %v", r) } close(done) }() subT := &testing.T{} runTest(subT) if subT.Failed() { testErr = fmt.Errorf("test failed in goroutine") } }() select { case <-done: if testErr != nil { t.Fatal(testErr) } durations = append(durations, time.Since(start)) case <-time.After(timeout): if os.Getenv("CI") != "" { t.Logf("Test took too long (%v) but not failing in CI", timeout) return } t.Fatalf("Test took too long: %v", timeout) } runtime.GC() time.Sleep(time.Millisecond * 10) } slices.Sort(durations) median := durations[len(durations)/2] t.Logf("Test timing: median=%v min=%v max=%v", median, durations[0], durations[len(durations)-1]) } // Pad pads the given lines to the specified width and height func Pad(width, height int, lines []string) string { var res []string for _, line := range lines { resLine := line numSpaces := width - lipgloss.Width(line) if numSpaces > 0 { resLine += strings.Repeat(" ", numSpaces) } res = append(res, resLine) } numEmptyLines := height - len(lines) for range numEmptyLines { res = append(res, strings.Repeat(" ", width)) } return strings.Join(res, "\n") } // MakeKeyMsg creates a tea.KeyPressMsg for the given rune. // For uppercase letters, it sets the shift modifier and uses the lowercase code. func MakeKeyMsg(k rune) tea.KeyPressMsg { if unicode.IsUpper(k) { return tea.KeyPressMsg{Code: unicode.ToLower(k), Text: string(k), Mod: tea.ModShift} } return tea.KeyPressMsg{Code: k, Text: string(k)} } ================================================ FILE: modules/viewport/item/ansi.go ================================================ package item import ( "strings" "unicode/utf8" "charm.land/lipgloss/v2" ) // StripAnsi removes all ANSI escape sequences from the input string. func StripAnsi(input string) string { ranges := findAnsiByteRanges(input) if len(ranges) == 0 { return input } totalAnsiLen := 0 for _, r := range ranges { totalAnsiLen += int(r[1] - r[0]) } finalLen := len(input) - totalAnsiLen var builder strings.Builder builder.Grow(finalLen) lastPos := 0 for _, r := range ranges { builder.WriteString(input[lastPos:int(r[0])]) lastPos = int(r[1]) } builder.WriteString(input[lastPos:]) return builder.String() } // highlightRange represents a highlight with start/end positions and style type highlightRange struct { startByte int endByte int style lipgloss.Style } // RST is the ansi escape sequence for resetting styles // lipgloss v2 uses the short form "\x1b[m" const RST = "\x1b[m" // isResetCode checks if a code is an ANSI reset sequence // Both "\x1b[0m" and "\x1b[m" are valid reset codes func isResetCode(code string) bool { return code == "\x1b[0m" || code == "\x1b[m" } // reapplyAnsi reconstructs ANSI escape sequences in a truncated string based on their positions in the original. // It ensures that any active text formatting (colors, styles) from the original string is correctly maintained // in the truncated output, and adds proper reset codes where needed. // // Parameters: // - original: the source string containing ANSI escape sequences // - truncated: the truncated version of the string, without ANSI sequences // - truncByteOffset: byte offset in the original string where truncation started // - ansiCodeIndexes: pairs of start/end byte positions of ANSI codes in the original string // // Returns a string with ANSI escape sequences reapplied at appropriate positions, // maintaining the original text formatting while preserving proper UTF-8 encoding. func reapplyAnsi(original, truncated string, truncByteOffset int, ansiCodeIndexes [][]uint32) string { var result strings.Builder result.Grow(len(truncated)) var lenAnsiAdded int isReset := true for i := 0; i < len(truncated); { // collect all ansi codes that should be applied immediately before the current runes var ansisToAdd []string for len(ansiCodeIndexes) > 0 { candidateAnsi := ansiCodeIndexes[0] codeStart, codeEnd := int(candidateAnsi[0]), int(candidateAnsi[1]) originalByteIdx := truncByteOffset + i + lenAnsiAdded if codeStart <= originalByteIdx { code := original[codeStart:codeEnd] isReset = isResetCode(code) ansisToAdd = append(ansisToAdd, code) lenAnsiAdded += codeEnd - codeStart ansiCodeIndexes = ansiCodeIndexes[1:] } else { break } } for _, ansi := range simplifyAnsiCodes(ansisToAdd) { result.WriteString(ansi) } // add the bytes of the current rune _, size := utf8.DecodeRuneInString(truncated[i:]) result.WriteString(truncated[i : i+size]) i += size } if !isReset { result.WriteString(RST) } return result.String() } // getNonAnsiBytes extracts a substring of specified length from the input string, excluding ANSI escape sequences. // It reads from the given start position until it has collected the requested number of non-ANSI bytes. // // Parameters: // - s: The input string that may contain ANSI escape sequences // - startIdx: The byte position in the input to start reading from // - numBytes: The number of non-ANSI bytes to collect // // Returns a string containing bytesToExtract bytes of the input with ANSI sequences removed. If the input text ends // before collecting bytesToExtract bytes, returns all available non-ANSI bytes. func getNonAnsiBytes(s string, startIdx, numBytes int) string { var result strings.Builder currentPos := startIdx bytesCollected := 0 for currentPos < len(s) && bytesCollected < numBytes { if strings.HasPrefix(s[currentPos:], "\x1b[") { escEnd := currentPos + strings.Index(s[currentPos:], "m") + 1 currentPos = escEnd continue } result.WriteByte(s[currentPos]) bytesCollected++ currentPos++ } return result.String() } // highlightString applies highlighting to a segment of text while handling cases where the highlight // might overflow the segment boundaries. It preserves any existing ANSI styling in the segment. // // Parameters: // - styledSegment: the text segment to highlight, which may contain ANSI codes // - highlights: a list of Highlight structs defining the styledLine byte offsets and styles to apply // NOTE: highlight byte ranges should not overlap // - plainLineSegmentStartByte: byte offset where styledSegment starts in full line without ansi codes // - plainLineSegmentEndByte: byte offset where styledSegment ends in full line without ansi codes // // Returns the segment with highlighting applied, preserving original ANSI codes. func highlightString( styledSegment string, highlights []Highlight, plainLineSegmentStartByte int, plainLineSegmentEndByte int, ) string { if len(highlights) == 0 { return styledSegment } var applicableHighlights []highlightRange for _, highlight := range highlights { unstyledByteRange := highlight.ByteRangeUnstyledContent if unstyledByteRange.Start < plainLineSegmentEndByte && unstyledByteRange.End > plainLineSegmentStartByte { startByte := max(unstyledByteRange.Start, plainLineSegmentStartByte) - plainLineSegmentStartByte endByte := min(unstyledByteRange.End, plainLineSegmentEndByte) - plainLineSegmentStartByte applicableHighlights = append(applicableHighlights, highlightRange{ startByte: startByte, endByte: endByte, style: highlight.Style, }) } } if len(applicableHighlights) == 0 { return styledSegment } // sort highlights by start position for i := 0; i < len(applicableHighlights); i++ { for j := i + 1; j < len(applicableHighlights); j++ { if applicableHighlights[j].startByte < applicableHighlights[i].startByte { applicableHighlights[i], applicableHighlights[j] = applicableHighlights[j], applicableHighlights[i] } } } var result strings.Builder // pre-allocation based on highlight density (~50 bytes per highlight for styling) estimatedSize := len(styledSegment) + len(applicableHighlights)*50 result.Grow(estimatedSize) var activeStyles []string nonAnsiBytes := 0 highlightIdx := 0 inAnsi := false i := 0 for i < len(styledSegment) { // handle ansi sequences if strings.HasPrefix(styledSegment[i:], "\x1b[") { inAnsi = true ansiLen := strings.Index(styledSegment[i:], "m") if ansiLen != -1 { escEnd := i + ansiLen + 1 ansi := styledSegment[i:escEnd] if isResetCode(ansi) { activeStyles = []string{} // reset } else { activeStyles = append(activeStyles, ansi) // add new active style } result.WriteString(ansi) i = escEnd inAnsi = false continue } } if !inAnsi { // check if need to start a highlight at this position highlighted := false for highlightIdx < len(applicableHighlights) && applicableHighlights[highlightIdx].startByte == nonAnsiBytes { highlight := applicableHighlights[highlightIdx] // reset current styles if any if len(activeStyles) > 0 { result.WriteString(RST) } // extract and apply highlight text plainText := getNonAnsiBytes(styledSegment, i, highlight.endByte-highlight.startByte) result.WriteString(highlight.style.Render(plainText)) // restore previous styles if any if len(activeStyles) > 0 { for _, style := range activeStyles { result.WriteString(style) } } // skip highlighted text count := 0 for count < len(plainText) && i < len(styledSegment) { if strings.HasPrefix(styledSegment[i:], "\x1b[") { escEnd := i + strings.Index(styledSegment[i:], "m") + 1 result.WriteString(styledSegment[i:escEnd]) i = escEnd continue } i++ count++ } nonAnsiBytes += len(plainText) highlightIdx++ // skip to next highlight that doesn't overlap for highlightIdx < len(applicableHighlights) && applicableHighlights[highlightIdx].startByte < nonAnsiBytes { highlightIdx++ } highlighted = true continue } if highlighted { continue } } // regular character if i < len(styledSegment) { result.WriteByte(styledSegment[i]) if !inAnsi { nonAnsiBytes++ } } i++ } return removeEmptyAnsiSequences(result.String()) } func simplifyAnsiCodes(ansis []string) []string { if len(ansis) == 0 { return []string{} } // if there's just a bunch of reset sequences, compress it to one allReset := true for _, ansi := range ansis { if !isResetCode(ansi) { allReset = false break } } if allReset { return []string{RST} } // return all ansis to the right of the rightmost reset seq for i := len(ansis) - 1; i >= 0; i-- { if isResetCode(ansis[i]) { result := ansis[i+1:] // keep reset at the start if present if isResetCode(ansis[0]) { return append([]string{RST}, result...) } return result } } return ansis } func runesHaveAnsiPrefix(runes []rune) bool { return len(runes) >= 2 && runes[0] == '\x1b' && runes[1] == '[' } func findAnsiByteRanges(s string) [][]uint32 { // pre-count to allocate exact size count := strings.Count(s, "\x1b[") if count == 0 { return nil } allRanges := make([]uint32, count*2) ranges := make([][]uint32, count) for i := range count { ranges[i] = allRanges[i*2 : i*2+2] } rangeIdx := 0 for i := 0; i < len(s); { if i+1 < len(s) && s[i] == '\x1b' && s[i+1] == '[' { start := i i += 2 // skip \x1b[ // find the 'm' that ends this sequence for i < len(s) && s[i] != 'm' { i++ } if i < len(s) && s[i] == 'm' { allRanges[rangeIdx*2] = clampIntToUint32(start) allRanges[rangeIdx*2+1] = clampIntToUint32(i + 1) rangeIdx++ i++ continue } } i++ } return ranges[:rangeIdx] } func findAnsiRuneRanges(s string) [][]uint32 { // pre-count to allocate exact size count := strings.Count(s, "\x1b[") if count == 0 { return nil } allRanges := make([]uint32, count*2) ranges := make([][]uint32, count) for i := range count { ranges[i] = allRanges[i*2 : i*2+2] } rangeIdx := 0 runes := []rune(s) for i := 0; i < len(runes); { if i+1 < len(runes) && runes[i] == '\x1b' && runes[i+1] == '[' { start := i i += 2 // skip \x1b[ // find the 'm' that ends this sequence for i < len(runes) && runes[i] != 'm' { i++ } if i < len(runes) && runes[i] == 'm' { allRanges[rangeIdx*2] = clampIntToUint32(start) allRanges[rangeIdx*2+1] = clampIntToUint32(i + 1) rangeIdx++ i++ continue } } i++ } return ranges[:rangeIdx] } // stripNonSGR removes all non-SGR ANSI escape sequences from the input string. // SGR sequences (\x1b[...m) are preserved. all other escape sequences (CSI non-SGR, // OSC, Fe, Fp, nF, SS2, SS3) are stripped. uses lazy allocation so lines containing // only SGR sequences (the common case) incur zero allocations. func stripNonSGR(line string) string { if !strings.Contains(line, "\x1b") { return line } var b strings.Builder allocated := false lastCopied := 0 i := 0 for i < len(line) { if line[i] != '\x1b' { i++ continue } seqStart := i i++ // past \x1b if i >= len(line) { // bare \x1b at end of string — keep it break } next := line[i] switch { case next == '[': // CSI sequence: \x1b[ + params (0x30-0x3F) + intermediates (0x20-0x2F) + final (0x40-0x7E) i++ // past [ // consume parameter bytes for i < len(line) && line[i] >= 0x30 && line[i] <= 0x3F { i++ } // consume intermediate bytes for i < len(line) && line[i] >= 0x20 && line[i] <= 0x2F { i++ } // final byte if i >= len(line) { // truncated CSI — keep as literal text i = seqStart + 1 continue } finalByte := line[i] i++ // past final byte if finalByte < 0x40 || finalByte > 0x7E { // malformed — keep as literal text i = seqStart + 1 continue } if finalByte == 'm' { // SGR — keep continue } // non-SGR CSI — strip if !allocated { b.Grow(len(line)) allocated = true } b.WriteString(line[lastCopied:seqStart]) lastCopied = i case next == ']': // OSC sequence: \x1b] ... terminated by BEL (\x07) or ST (\x1b\\) i++ // past ] for i < len(line) { if line[i] == '\x07' { i++ // past BEL break } if line[i] == '\x1b' && i+1 < len(line) && line[i+1] == '\\' { i += 2 // past ST break } i++ } // strip (including unterminated OSC at end of string) if !allocated { b.Grow(len(line)) allocated = true } b.WriteString(line[lastCopied:seqStart]) lastCopied = i case next == 'N' || next == 'O': // SS2 or SS3: \x1b + N/O + one designated character i++ // past N or O if i < len(line) { i++ // past designated character } if !allocated { b.Grow(len(line)) allocated = true } b.WriteString(line[lastCopied:seqStart]) lastCopied = i case next >= 0x40 && next <= 0x5F: // Fe sequence (excluding [, ], N, O handled above): \x1b + Fe byte i++ // past Fe byte if !allocated { b.Grow(len(line)) allocated = true } b.WriteString(line[lastCopied:seqStart]) lastCopied = i case next >= 0x30 && next <= 0x3F: // Fp (DEC private): \x1b + byte in 0x30-0x3F (ESC-7, ESC-8, ESC-=, ESC->) i++ // past Fp byte if !allocated { b.Grow(len(line)) allocated = true } b.WriteString(line[lastCopied:seqStart]) lastCopied = i case next >= 0x20 && next <= 0x2F: // nF sequence: \x1b + intermediates (0x20-0x2F) + final (0x30-0x7E) i++ // past first intermediate for i < len(line) && line[i] >= 0x20 && line[i] <= 0x2F { i++ } if i < len(line) && line[i] >= 0x30 && line[i] <= 0x7E { i++ // past final byte } if !allocated { b.Grow(len(line)) allocated = true } b.WriteString(line[lastCopied:seqStart]) lastCopied = i default: // bare \x1b followed by unrecognized byte — keep as literal continue } } if !allocated { return line } b.WriteString(line[lastCopied:]) return b.String() } func removeEmptyAnsiSequences(s string) string { if len(s) == 0 { return s } var result strings.Builder result.Grow(len(s)) i := 0 for i < len(s) { if i < len(s)-4 && s[i:i+2] == "\x1b[" { // find the end of this ansi sequence end := i + 2 for end < len(s) && s[end] != 'm' { end++ } if end < len(s) { end++ // include the 'm' ansiSeq := s[i:end] // check if this is followed immediately by a reset sequence if end < len(s)-2 && s[end:end+2] == "\x1b[" { resetEnd := end + 2 for resetEnd < len(s) && s[resetEnd] != 'm' { resetEnd++ } if resetEnd < len(s) { resetEnd++ // include the 'm' resetSeq := s[end:resetEnd] // if this is a reset sequence (\x1b[0m or \x1b[m), skip both sequences if resetSeq == "\x1b[0m" || resetSeq == "\x1b[m" { i = resetEnd continue } } } // not followed by reset, keep the sequence result.WriteString(ansiSeq) i = end continue } } result.WriteByte(s[i]) i++ } return result.String() } ================================================ FILE: modules/viewport/item/ansi_test.go ================================================ package item import ( "regexp" "testing" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestAnsi_reapplyAnsi(t *testing.T) { tests := []struct { name string original string truncated string truncByteOffset int expected string }{ { name: "no ansi, no offset", original: "1234567890123456789012345", truncated: "12345", truncByteOffset: 0, expected: "12345", }, { name: "no ansi, offset", original: "1234567890123456789012345", truncated: "2345", truncByteOffset: 1, expected: "2345", }, { name: "multi ansi, no offset", original: "\x1b[38;2;255;0;0m1" + RST + "\x1b[38;2;0;0;255m2" + RST + "\x1b[38;2;255;0;0m3" + RST + "45", truncated: "123", truncByteOffset: 0, expected: "\x1b[38;2;255;0;0m1" + RST + "\x1b[38;2;0;0;255m2" + RST + "\x1b[38;2;255;0;0m3" + RST, }, { name: "surrounding ansi, no offset", original: "\x1b[38;2;255;0;0m12345" + RST, truncated: "123", truncByteOffset: 0, expected: "\x1b[38;2;255;0;0m123" + RST, }, { name: "surrounding ansi, offset", original: "\x1b[38;2;255;0;0m12345" + RST, truncated: "234", truncByteOffset: 1, expected: "\x1b[38;2;255;0;0m234" + RST, }, { name: "left ansi, no offset", original: "\x1b[38;2;255;0;0m1" + RST + "2345", truncated: "123", truncByteOffset: 0, expected: "\x1b[38;2;255;0;0m1" + RST + "23", }, { name: "left ansi, offset", original: "\x1b[38;2;255;0;0m12" + RST + "345", truncated: "234", truncByteOffset: 1, expected: "\x1b[38;2;255;0;0m2" + RST + "34", }, { name: "right ansi, no offset", original: "1" + "\x1b[38;2;255;0;0m2345" + RST, truncated: "123", truncByteOffset: 0, expected: "1" + "\x1b[38;2;255;0;0m23" + RST, }, { name: "right ansi, offset", original: "12" + "\x1b[38;2;255;0;0m345" + RST, truncated: "234", truncByteOffset: 1, expected: "2" + "\x1b[38;2;255;0;0m34" + RST, }, { name: "left and right ansi, no offset", original: "\x1b[38;2;255;0;0m1" + RST + "2" + "\x1b[38;2;255;0;0m345" + RST, truncated: "123", truncByteOffset: 0, expected: "\x1b[38;2;255;0;0m1" + RST + "2" + "\x1b[38;2;255;0;0m3" + RST, }, { name: "left and right ansi, offset", original: "\x1b[38;2;255;0;0m12" + RST + "3" + "\x1b[38;2;255;0;0m45" + RST, truncated: "234", truncByteOffset: 1, expected: "\x1b[38;2;255;0;0m2" + RST + "3" + "\x1b[38;2;255;0;0m4" + RST, }, { name: "truncated right ansi, no offset", original: "\x1b[38;2;255;0;0m1" + RST + "234" + "\x1b[38;2;255;0;0m5" + RST, truncated: "123", truncByteOffset: 0, expected: "\x1b[38;2;255;0;0m1" + RST + "23", }, { name: "truncated right ansi, offset", original: "\x1b[38;2;255;0;0m12" + RST + "34" + "\x1b[38;2;255;0;0m5" + RST, truncated: "234", truncByteOffset: 1, expected: "\x1b[38;2;255;0;0m2" + RST + "34", }, { name: "truncated left ansi, offset", original: "\x1b[38;2;255;0;0m1" + RST + "23" + "\x1b[38;2;255;0;0m45" + RST, truncated: "234", truncByteOffset: 1, expected: "23" + "\x1b[38;2;255;0;0m4" + RST, }, { name: "nested color sequences", original: "\x1b[31m1\x1b[32m2\x1b[33m3" + RST + RST + RST + "45", truncated: "123", truncByteOffset: 0, expected: "\x1b[31m1\x1b[32m2\x1b[33m3" + RST, }, { name: "nested color sequences with offset", original: "\x1b[31m1\x1b[32m2\x1b[33m3" + RST + RST + RST + "45", truncated: "234", truncByteOffset: 1, expected: "\x1b[31m\x1b[32m2\x1b[33m3" + RST + "4", }, { name: "nested style sequences", original: "\x1b[1m1\x1b[4m2\x1b[3m3" + RST + RST + RST + "45", truncated: "123", truncByteOffset: 0, expected: "\x1b[1m1\x1b[4m2\x1b[3m3" + RST, }, { name: "mixed nested sequences", original: "\x1b[31m1\x1b[1m2\x1b[4;32m3" + RST + RST + RST + "45", truncated: "234", truncByteOffset: 1, expected: "\x1b[31m\x1b[1m2\x1b[4;32m3" + RST + "4", }, { name: "deeply nested sequences", original: "\x1b[31m1\x1b[1m2\x1b[4m3\x1b[32m4" + RST + RST + RST + RST + "5", truncated: "123", truncByteOffset: 0, expected: "\x1b[31m1\x1b[1m2\x1b[4m3" + RST, }, { name: "partial nested sequences", original: "1\x1b[31m2\x1b[1m3\x1b[4m4" + RST + RST + RST + "5", truncated: "234", truncByteOffset: 1, expected: "\x1b[31m2\x1b[1m3\x1b[4m4" + RST, }, { name: "overlapping nested sequences", original: "\x1b[31m1\x1b[1m2" + RST + "3\x1b[4m4" + RST + "5", truncated: "234", truncByteOffset: 1, expected: "\x1b[31m\x1b[1m2" + RST + "3\x1b[4m4" + RST, }, { name: "complex RGB nested sequences", original: "\x1b[38;2;255;0;0m1\x1b[1m2\x1b[38;2;0;255;0m3" + RST + RST + "45", truncated: "123", truncByteOffset: 0, expected: "\x1b[38;2;255;0;0m1\x1b[1m2\x1b[38;2;0;255;0m3" + RST, }, { name: "nested sequences with background colors", original: "\x1b[31;44m1\x1b[1m2\x1b[32;45m3" + RST + RST + "45", truncated: "234", truncByteOffset: 1, expected: "\x1b[31;44m\x1b[1m2\x1b[32;45m3" + RST + "4", }, { name: "emoji basic", original: "1️⃣2️⃣3️⃣4️⃣5️⃣", truncated: "1️⃣2️⃣3️⃣", truncByteOffset: 0, expected: "1️⃣2️⃣3️⃣", }, { name: "emoji with ansi", original: "\x1b[31m1️⃣\x1b[32m2️⃣\x1b[33m3️⃣" + RST, truncated: "1️⃣2️⃣", truncByteOffset: 0, expected: "\x1b[31m1️⃣\x1b[32m2️⃣" + RST, }, { name: "chinese characters", original: "你好世界星星", truncated: "你好世", truncByteOffset: 0, expected: "你好世", }, { name: "simple with ansi and offset", original: "\x1b[31ma\x1b[32mb\x1b[33mc" + RST + "de", truncated: "bcd", truncByteOffset: 1, expected: "\x1b[31m\x1b[32mb\x1b[33mc" + RST + "d", }, { name: "chinese with ansi and offset", original: "\x1b[31m你\x1b[32m好\x1b[33m世" + RST + "界星", truncated: "好世界", truncByteOffset: 3, // 你 is 3 bytes expected: "\x1b[31m\x1b[32m好\x1b[33m世" + RST + "界", }, { name: "lots of leading empty ansi", original: "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST + "\x1b[38;2;255;0;0mr" + RST, truncated: "r", truncByteOffset: 10, expected: "\x1b[38;2;255;0;0mr" + RST, }, { name: "complex ansi, no offset", original: "\x1b[38;2;0;0;255msome " + RST + "\x1b[38;2;255;0;0mred" + RST + "\x1b[38;2;0;0;255m t" + RST, truncated: "some red t", truncByteOffset: 0, expected: "\x1b[38;2;0;0;255msome " + RST + "\x1b[38;2;255;0;0mred" + RST + "\x1b[38;2;0;0;255m t" + RST, }, { name: "unicode with ansi", original: internal.RedBg.Render("A💖") + "中é", truncated: "A💖中é", truncByteOffset: 0, expected: internal.RedBg.Render("A💖") + "中é", }, } ansiRegex := regexp.MustCompile("\x1b\\[[0-9;]*m") toUInt32 := func(indexes [][]int) [][]uint32 { uint32Indexes := make([][]uint32, len(indexes)) for i, idx := range indexes { uint32Indexes[i] = []uint32{clampIntToUint32(idx[0]), clampIntToUint32(idx[1])} } return uint32Indexes } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ansiCodeIndexes := toUInt32(ansiRegex.FindAllStringIndex(tt.original, -1)) actual := reapplyAnsi(tt.original, tt.truncated, tt.truncByteOffset, ansiCodeIndexes) internal.CmpStr(t, tt.expected, actual) }) } } func TestHighlightString(t *testing.T) { for _, tt := range []struct { name string plainLine string // used for extracting highlights styledSegment string // line segment with ANSI codes toHighlight string // unstyled text to highlight in segment highlightStyle lipgloss.Style plainLineSegmentStartByte int // byte offset in plainLine where segment starts plainLineSegmentEndByte int // byte offset in plainLine where segment ends expected string }{ { name: "empty", plainLine: "", styledSegment: "", toHighlight: "", highlightStyle: internal.RedFg, plainLineSegmentStartByte: 0, plainLineSegmentEndByte: 0, expected: "", }, { name: "no highlight", plainLine: "hello", styledSegment: "hello", toHighlight: "", highlightStyle: internal.RedFg, plainLineSegmentStartByte: 0, plainLineSegmentEndByte: 5, expected: "hello", }, { name: "simple highlight", plainLine: "hello", styledSegment: "hello", toHighlight: "ell", highlightStyle: internal.RedFg, plainLineSegmentStartByte: 0, plainLineSegmentEndByte: 5, expected: "h" + internal.RedFg.Render("ell") + "o", }, { name: "highlight with existing style", plainLine: "first line", styledSegment: internal.RedFg.Render("first line"), toHighlight: "first", highlightStyle: internal.BlueFg, plainLineSegmentStartByte: 0, plainLineSegmentEndByte: 10, expected: internal.BlueFg.Render("first") + internal.RedFg.Render(" line"), }, { name: "left overflow", plainLine: "hello world", styledSegment: "ello world", toHighlight: "hello", highlightStyle: internal.RedFg, plainLineSegmentStartByte: 1, plainLineSegmentEndByte: 11, expected: internal.RedFg.Render("ello") + " world", }, { name: "right overflow", plainLine: "hello world", styledSegment: "hello wo", toHighlight: "world", highlightStyle: internal.RedFg, plainLineSegmentStartByte: 0, plainLineSegmentEndByte: 8, expected: "hello " + internal.RedFg.Render("wo"), }, { name: "both overflow with existing style", plainLine: "hello world", styledSegment: internal.RedFg.Render("ello wor"), toHighlight: "hello world", highlightStyle: internal.BlueFg, plainLineSegmentStartByte: 1, plainLineSegmentEndByte: 9, expected: internal.BlueFg.Render("ello wor"), }, { name: "no match in segment", plainLine: "outside middle outside", styledSegment: "middle", toHighlight: "outside", highlightStyle: internal.RedFg, plainLineSegmentStartByte: 8, plainLineSegmentEndByte: 14, expected: "middle", }, { name: "across ansi styles", plainLine: "hello world", styledSegment: internal.RedBg.Render("hello") + " " + internal.BlueBg.Render("world"), toHighlight: "lo wo", highlightStyle: internal.GreenBg, plainLineSegmentStartByte: 0, plainLineSegmentEndByte: 11, expected: internal.RedBg.Render("hel") + internal.GreenBg.Render("lo wo") + internal.BlueBg.Render("rld"), }, { name: "unicode", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b), A (1w, 1b) plainLine: "A💖中éA", styledSegment: internal.RedFg.Render("💖中éA"), toHighlight: "💖中", highlightStyle: internal.GreenBg, plainLineSegmentStartByte: 1, plainLineSegmentEndByte: 12, expected: internal.GreenBg.Render("💖中") + internal.RedFg.Render("éA"), }, } { t.Run(tt.name, func(t *testing.T) { matches := NewItem(tt.plainLine).ExtractExactMatches(tt.toHighlight) highlights := toHighlights(matches, tt.highlightStyle) result := highlightString( tt.styledSegment, highlights, tt.plainLineSegmentStartByte, tt.plainLineSegmentEndByte, ) internal.CmpStr(t, tt.expected, result) }) } } func TestAnsi_getNonAnsiBytes(t *testing.T) { tests := []struct { name string s string startByteIdx int numBytes int expected string shouldPanic bool }{ { name: "empty", s: "", startByteIdx: 0, numBytes: 0, expected: "", }, { name: "negative start panics", s: "a", startByteIdx: -1, numBytes: 1, shouldPanic: true, }, { name: "zero bytes", s: "abc", startByteIdx: 1, numBytes: 0, expected: "", }, { name: "negative bytes", s: "abc", startByteIdx: 1, numBytes: -1, expected: "", }, { name: "all from start", s: "abc", startByteIdx: 0, numBytes: 3, expected: "abc", }, { name: "some from start", s: "abc", startByteIdx: 0, numBytes: 2, expected: "ab", }, { name: "rest from offset", s: "abc", startByteIdx: 1, numBytes: 2, expected: "bc", }, { name: "some from offset", s: "abc", startByteIdx: 1, numBytes: 1, expected: "b", }, { name: "ignore ansi", s: "abc" + internal.RedBg.Render("def") + "ghi", startByteIdx: 1, numBytes: 7, expected: "bcdefgh", }, { name: "unicode", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖中é", startByteIdx: 1, numBytes: 7, expected: "💖中", }, { name: "unicode with ansi", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖" + internal.RedBg.Render("中") + "é", startByteIdx: 0, numBytes: 11, expected: "A💖中é", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if tt.shouldPanic { assertPanic(t, func() { getNonAnsiBytes(tt.s, tt.startByteIdx, tt.numBytes) }) return } if r := getNonAnsiBytes(tt.s, tt.startByteIdx, tt.numBytes); r != tt.expected { t.Errorf("expected %q, got %q", tt.expected, r) } }) } } func TestStripNonSGR(t *testing.T) { tests := []struct { name string input string expected string }{ { name: "empty string", input: "", expected: "", }, { name: "no escape sequences", input: "hello world", expected: "hello world", }, { name: "sgr only preserved", input: "\x1b[31mhello\x1b[m", expected: "\x1b[31mhello\x1b[m", }, { name: "complex sgr preserved", input: "\x1b[38;2;255;0;0mhi\x1b[m", expected: "\x1b[38;2;255;0;0mhi\x1b[m", }, { name: "cursor up stripped", input: "\x1b[Ahello", expected: "hello", }, { name: "cursor down stripped", input: "\x1b[Bhello", expected: "hello", }, { name: "cursor forward stripped", input: "\x1b[Chello", expected: "hello", }, { name: "cursor back stripped", input: "\x1b[Dhello", expected: "hello", }, { name: "cursor position stripped", input: "\x1b[10;20Hhello", expected: "hello", }, { name: "erase display stripped", input: "\x1b[2Jhello", expected: "hello", }, { name: "erase line 1K stripped", input: "\x1b[1Khello", expected: "hello", }, { name: "erase line 2K stripped", input: "\x1b[2Khello", expected: "hello", }, { name: "scroll up stripped", input: "\x1b[Shello", expected: "hello", }, { name: "scroll down stripped", input: "\x1b[Thello", expected: "hello", }, { name: "device status stripped", input: "\x1b[6nhello", expected: "hello", }, { name: "private marker stripped", input: "\x1b[?25hhello", expected: "hello", }, { name: "sgr mixed with csi", input: "\x1b[31m\x1b[2Jhello\x1b[m", expected: "\x1b[31mhello\x1b[m", }, { name: "osc bel terminated stripped", input: "\x1b]0;title\x07hello", expected: "hello", }, { name: "osc st terminated stripped", input: "\x1b]0;title\x1b\\hello", expected: "hello", }, { name: "osc hyperlink stripped", input: "\x1b]8;;https://example.com\x1b\\click\x1b]8;;\x1b\\", expected: "click", }, { name: "esc-M reverse index stripped", input: "\x1bMhello", expected: "hello", }, { name: "esc-D index stripped", input: "\x1bDhello", expected: "hello", }, { name: "esc-7 dec save cursor stripped", input: "\x1b7hello", expected: "hello", }, { name: "esc-8 dec restore cursor stripped", input: "\x1b8hello", expected: "hello", }, { name: "ss2 stripped", input: "\x1bNA hello", expected: " hello", }, { name: "ss3 stripped", input: "\x1bOA hello", expected: " hello", }, { name: "nf designate charset stripped", input: "\x1b(Bhello", expected: "hello", }, { name: "multiple non-sgr stripped", input: "\x1b[31m\x1b[2J\x1b[Hhello\x1b[m", expected: "\x1b[31mhello\x1b[m", }, { name: "bare esc at end preserved", input: "hello\x1b", expected: "hello\x1b", }, { name: "truncated csi preserved", input: "hello\x1b[", expected: "hello\x1b[", }, { name: "truncated csi params preserved", input: "hello\x1b[31", expected: "hello\x1b[31", }, { name: "unicode with non-sgr", input: "\x1b[2J世界\x1b[31m星\x1b[m", expected: "世界\x1b[31m星\x1b[m", }, { name: "all stripped leaves empty", input: "\x1b[2J\x1b[H", expected: "", }, { name: "unterminated osc stripped to end", input: "hello\x1b]0;title", expected: "hello", }, { name: "esc followed by lowercase kept", input: "\x1b" + "ahello", expected: "\x1b" + "ahello", }, // additional CSI variants { name: "csi with intermediate bytes stripped", input: "\x1b[ q hello", expected: " hello", }, { name: "csi insert line stripped", input: "\x1b[3Lhello", expected: "hello", }, { name: "csi delete line stripped", input: "\x1b[3Mhello", expected: "hello", }, { name: "csi delete char stripped", input: "\x1b[Phello", expected: "hello", }, { name: "csi erase char stripped", input: "\x1b[Xhello", expected: "hello", }, { name: "csi set mode stripped", input: "\x1b[4hhello", expected: "hello", }, { name: "csi reset mode stripped", input: "\x1b[4lhello", expected: "hello", }, { name: "csi cursor save stripped", input: "\x1b[shello", expected: "hello", }, { name: "csi cursor restore stripped", input: "\x1b[uhello", expected: "hello", }, { name: "csi sgr with intermediate preserved", input: "\x1b[1;31mhello\x1b[m", expected: "\x1b[1;31mhello\x1b[m", }, { name: "csi 256-color sgr preserved", input: "\x1b[38;5;196mhello\x1b[m", expected: "\x1b[38;5;196mhello\x1b[m", }, { name: "sgr reset 0m preserved", input: "\x1b[0mhello", expected: "\x1b[0mhello", }, { name: "sgr bare m preserved", input: "\x1b[mhello", expected: "\x1b[mhello", }, // more OSC variants { name: "osc with numeric param and bel", input: "\x1b]2;my window\x07text", expected: "text", }, { name: "osc empty payload bel terminated", input: "\x1b]\x07text", expected: "text", }, { name: "osc with embedded semicolons", input: "\x1b]8;id=link;https://example.com\x1b\\click here\x1b]8;;\x1b\\", expected: "click here", }, { name: "osc between text", input: "before\x1b]0;title\x07after", expected: "beforeafter", }, // more Fe sequences { name: "esc-E next line stripped", input: "\x1bEhello", expected: "hello", }, { name: "esc-H set tab stop stripped", input: "\x1bHhello", expected: "hello", }, { name: "esc-P (DCS) stripped as Fe", input: "\x1bPhello", expected: "hello", }, // more Fp sequences { name: "esc-= keypad application mode stripped", input: "\x1b=hello", expected: "hello", }, { name: "esc-> keypad numeric mode stripped", input: "\x1b>hello", expected: "hello", }, // nF variants { name: "nf G0 designate stripped", input: "\x1b(0hello", expected: "hello", }, { name: "nf G1 designate stripped", input: "\x1b)Bhello", expected: "hello", }, { name: "nf multi-intermediate stripped", input: "\x1b$ Bhello", expected: "hello", }, { name: "nf truncated at end stripped", input: "hello\x1b(", expected: "hello", }, // SS2/SS3 edge cases { name: "ss2 at end of string", input: "hello\x1bN", expected: "hello", }, { name: "ss3 at end of string", input: "hello\x1bO", expected: "hello", }, // complex mixed sequences { name: "sgr surrounded by many non-sgr", input: "\x1b[2J\x1b[H\x1b7\x1b[31mhello\x1b[m\x1b8\x1b[?25h", expected: "\x1b[31mhello\x1b[m", }, { name: "alternating sgr and non-sgr", input: "\x1b[1mA\x1b[HB\x1b[32mC\x1b[2JD\x1b[m", expected: "\x1b[1mAB\x1b[32mCD\x1b[m", }, { name: "non-sgr between text preserves all text", input: "one\x1b[Htwo\x1b[2Jthree", expected: "onetwothree", }, { name: "consecutive non-sgr sequences stripped", input: "\x1b[A\x1b[B\x1b[C\x1b[Dhello", expected: "hello", }, { name: "sgr only no alloc returns same string", input: "\x1b[1m\x1b[31m\x1b[42mhello\x1b[m", expected: "\x1b[1m\x1b[31m\x1b[42mhello\x1b[m", }, // unicode and wide chars { name: "emoji with non-sgr stripped", input: "\x1b[H🎉\x1b[31m🌍\x1b[m", expected: "🎉\x1b[31m🌍\x1b[m", }, { name: "cjk wide chars with non-sgr stripped", input: "\x1b[2J你好\x1b[31m世界\x1b[m", expected: "你好\x1b[31m世界\x1b[m", }, // malformed sequences { name: "csi with invalid final byte kept as text", input: "hello\x1b[\x10world", expected: "hello\x1b[\x10world", }, { name: "multiple bare esc preserved", input: "\x1b\x1b\x1bhello", expected: "\x1b\x1b\x1bhello", }, { name: "esc followed by space is nf stripped", input: "\x1b Fhello", expected: "hello", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { internal.CmpStr(t, tt.expected, stripNonSGR(tt.input)) }) } } // testing helper func assertPanic(t *testing.T, f func()) { defer func() { if r := recover(); r == nil { t.Error("did not panic as expected") } }() f() } ================================================ FILE: modules/viewport/item/concat.go ================================================ package item import ( "regexp" "strings" ) // ConcatItem implements Item by wrapping multiple SingleItem's without extra memory allocation // It is useful for e.g. prefixing content on an Item without needing to recompute that entire Item. type ConcatItem struct { items []SingleItem totalWidth int // cached total width across all items contentNoAnsi string // cached concatenated content without ANSI escape codes pinnedCount int // number of items to pin on the left (0 = no pinning) pinnedWidth int // cached total width of pinned items } // type assertion that ConcatItem implements Item var _ Item = ConcatItem{} // type assertion that *ConcatItem implements Item var _ Item = (*ConcatItem)(nil) // NewConcat creates a new ConcatItem from the given items func NewConcat(items ...SingleItem) ConcatItem { return NewConcatWithPinned(0, items...) } // NewConcatWithPinned creates a new ConcatItem with the first pinnedCount items pinned to the left. // Pinned items are not affected by horizontal panning (widthToLeft) in Take(). func NewConcatWithPinned(pinnedCount int, items ...SingleItem) ConcatItem { if len(items) == 0 { return ConcatItem{} } if pinnedCount < 0 { pinnedCount = 0 } if pinnedCount > len(items) { pinnedCount = len(items) } totalWidth := 0 pinnedWidth := 0 for i, item := range items { w := item.Width() totalWidth += w if i < pinnedCount { pinnedWidth += w } } return ConcatItem{ items: items, totalWidth: totalWidth, pinnedCount: pinnedCount, pinnedWidth: pinnedWidth, } } // Width returns the total width across all items. func (m ConcatItem) Width() int { return m.totalWidth } // Content returns the concatenated content of all items. func (m ConcatItem) Content() string { if len(m.items) == 0 { return "" } if len(m.items) == 1 { return m.items[0].Content() } totalLen := 0 for _, items := range m.items { totalLen += len(items.Content()) } var builder strings.Builder builder.Grow(totalLen) for _, item := range m.items { builder.WriteString(item.Content()) } return builder.String() } // ContentNoAnsi returns the concatenated content of all items without ANSI escape codes that style the string func (m ConcatItem) ContentNoAnsi() string { if m.contentNoAnsi != "" { return m.contentNoAnsi } if len(m.items) == 0 { return "" } if len(m.items) == 1 { m.contentNoAnsi = m.items[0].ContentNoAnsi() return m.contentNoAnsi } // make a single allocation for the concatenated string totalLen := 0 for _, items := range m.items { totalLen += len(items.ContentNoAnsi()) } var builder strings.Builder builder.Grow(totalLen) for _, item := range m.items { builder.WriteString(item.ContentNoAnsi()) } m.contentNoAnsi = builder.String() return m.contentNoAnsi } // Take returns a substring of the item that fits within the specified width. // If pinnedCount > 0, the first pinnedCount items are rendered at offset 0 (ignoring widthToLeft), // and the remaining items are rendered with widthToLeft applied in the remaining viewport width. func (m ConcatItem) Take( widthToLeft, takeWidth int, continuation string, highlights []Highlight, ) (string, int) { if len(m.items) == 0 { return "", 0 } // for single item with no pinning, delegate directly if len(m.items) == 1 && m.pinnedCount == 0 { return m.items[0].Take(widthToLeft, takeWidth, continuation, highlights) } // if no pinned items, use standard logic if m.pinnedCount == 0 { return m.takeUnpinned(widthToLeft, takeWidth, continuation, highlights) } // handle pinned items (including single item that is pinned) return m.takePinned(widthToLeft, takeWidth, continuation, highlights) } // takeUnpinned is used when no items are pinned func (m ConcatItem) takeUnpinned( widthToLeft, takeWidth int, continuation string, highlights []Highlight, ) (string, int) { if widthToLeft >= m.totalWidth { return "", 0 } // find which item contains our start position skippedWidth := 0 skippedBytes := 0 firstItemIdx := 0 firstByteIdx := 0 startWidthFirstItem := widthToLeft for i := range m.items { itemWidth := m.items[i].Width() if skippedWidth+itemWidth > widthToLeft { firstItemIdx = i startWidthFirstItem = widthToLeft - skippedWidth runeIdx := m.items[i].findRuneIndexWithWidthToLeft(startWidthFirstItem) var firstItemByteIdx int if runeIdx < m.items[i].numNoAnsiRunes { firstItemByteIdx = int(m.items[i].getByteOffsetAtRuneIdx(runeIdx)) } else { firstItemByteIdx = len(m.items[i].line) } firstByteIdx = skippedBytes + firstItemByteIdx break } skippedWidth += itemWidth skippedBytes += len(m.items[i].lineNoAnsi) startWidthFirstItem -= itemWidth } // take from first item res, takenWidth := m.items[firstItemIdx].Take(startWidthFirstItem, takeWidth, "", []Highlight{}) remainingTotalWidth := takeWidth - takenWidth // if we have more width to take and more items available, continue currentItemIdx := firstItemIdx + 1 for remainingTotalWidth > 0 && currentItemIdx < len(m.items) { nextPart, partWidth := m.items[currentItemIdx].Take(0, remainingTotalWidth, "", []Highlight{}) if partWidth == 0 { break } res += nextPart remainingTotalWidth -= partWidth currentItemIdx++ } res = highlightString( res, highlights, firstByteIdx, firstByteIdx+len(StripAnsi(res)), ) // apply continuation indicators if needed if len(continuation) > 0 { contentToLeft := widthToLeft > 0 contentToRight := m.totalWidth-widthToLeft > takeWidth-remainingTotalWidth if contentToLeft || contentToRight { continuationRunes := []rune(continuation) if contentToLeft { res = replaceStartWithContinuation(res, continuationRunes) } if contentToRight { res = replaceEndWithContinuation(res, continuationRunes) } } } res = removeEmptyAnsiSequences(res) return res, takeWidth - remainingTotalWidth } // takePinned handles rendering when there are pinned items func (m ConcatItem) takePinned( widthToLeft, takeWidth int, continuation string, highlights []Highlight, ) (string, int) { // edge case: pinned width >= takeWidth (pinned items fill entire viewport) if m.pinnedWidth >= takeWidth { return m.takePinnedOnly(takeWidth, continuation, highlights) } // calculate available width for non-pinned content nonPinnedTakeWidth := takeWidth - m.pinnedWidth // render pinned items at offset 0 pinnedResult, pinnedTaken := m.takePinnedItems(m.pinnedWidth, highlights) // render non-pinned items with the original widthToLeft nonPinnedResult, nonPinnedTaken := m.takeNonPinnedItems( widthToLeft, nonPinnedTakeWidth, continuation, highlights, ) return pinnedResult + nonPinnedResult, pinnedTaken + nonPinnedTaken } // takePinnedItems renders just the pinned items at offset 0 func (m ConcatItem) takePinnedItems(takeWidth int, highlights []Highlight) (string, int) { if m.pinnedCount == 0 || takeWidth <= 0 { return "", 0 } // take from pinned items var result strings.Builder remainingWidth := takeWidth for i := 0; i < m.pinnedCount && remainingWidth > 0; i++ { part, partWidth := m.items[i].Take(0, remainingWidth, "", []Highlight{}) if partWidth == 0 { break } result.WriteString(part) remainingWidth -= partWidth } res := result.String() // calculate end byte for highlights (byte offset at end of pinned items) endByteIdx := 0 for i := 0; i < m.pinnedCount; i++ { endByteIdx += len(m.items[i].lineNoAnsi) } // apply highlights to pinned section res = highlightString( res, highlights, 0, min(endByteIdx, len(StripAnsi(res))), ) return res, takeWidth - remainingWidth } // takeNonPinnedItems renders items after the pinned ones with the given offset func (m ConcatItem) takeNonPinnedItems( widthToLeft, takeWidth int, continuation string, highlights []Highlight, ) (string, int) { if m.pinnedCount >= len(m.items) || takeWidth <= 0 { return "", 0 } // calculate the byte offset where non-pinned content starts pinnedByteOffset := 0 for i := 0; i < m.pinnedCount; i++ { pinnedByteOffset += len(m.items[i].lineNoAnsi) } // calculate total width of non-pinned items nonPinnedTotalWidth := m.totalWidth - m.pinnedWidth // if widthToLeft exceeds non-pinned content, return empty if widthToLeft >= nonPinnedTotalWidth { return "", 0 } // find starting item and position within non-pinned items skippedWidth := 0 skippedBytes := pinnedByteOffset firstItemIdx := m.pinnedCount startWidthFirstItem := widthToLeft for i := m.pinnedCount; i < len(m.items); i++ { itemWidth := m.items[i].Width() if skippedWidth+itemWidth > widthToLeft { firstItemIdx = i startWidthFirstItem = widthToLeft - skippedWidth runeIdx := m.items[i].findRuneIndexWithWidthToLeft(startWidthFirstItem) var firstItemByteIdx int if runeIdx < m.items[i].numNoAnsiRunes { firstItemByteIdx = int(m.items[i].getByteOffsetAtRuneIdx(runeIdx)) } else { firstItemByteIdx = len(m.items[i].line) } skippedBytes += firstItemByteIdx break } skippedWidth += itemWidth skippedBytes += len(m.items[i].lineNoAnsi) startWidthFirstItem -= itemWidth } firstByteIdx := skippedBytes // take from first non-pinned item res, takenWidth := m.items[firstItemIdx].Take(startWidthFirstItem, takeWidth, "", []Highlight{}) remainingTotalWidth := takeWidth - takenWidth // continue with subsequent items currentItemIdx := firstItemIdx + 1 for remainingTotalWidth > 0 && currentItemIdx < len(m.items) { nextPart, partWidth := m.items[currentItemIdx].Take(0, remainingTotalWidth, "", []Highlight{}) if partWidth == 0 { break } res += nextPart remainingTotalWidth -= partWidth currentItemIdx++ } // apply highlights res = highlightString( res, highlights, firstByteIdx, firstByteIdx+len(StripAnsi(res)), ) // apply continuation indicators for non-pinned section if len(continuation) > 0 { contentToLeft := widthToLeft > 0 contentToRight := nonPinnedTotalWidth-widthToLeft > takeWidth-remainingTotalWidth if contentToLeft || contentToRight { continuationRunes := []rune(continuation) if contentToLeft { res = replaceStartWithContinuation(res, continuationRunes) } if contentToRight { res = replaceEndWithContinuation(res, continuationRunes) } } } return res, takeWidth - remainingTotalWidth } // takePinnedOnly handles case where pinned width >= viewport width func (m ConcatItem) takePinnedOnly(takeWidth int, continuation string, highlights []Highlight) (string, int) { // render only pinned items, applying continuation if they overflow var result strings.Builder remainingWidth := takeWidth for i := 0; i < m.pinnedCount && remainingWidth > 0; i++ { part, partWidth := m.items[i].Take(0, remainingWidth, "", []Highlight{}) if partWidth == 0 { break } result.WriteString(part) remainingWidth -= partWidth } res := result.String() // calculate byte range for highlights endByteIdx := 0 for i := 0; i < m.pinnedCount; i++ { endByteIdx += len(m.items[i].lineNoAnsi) } res = highlightString(res, highlights, 0, min(endByteIdx, len(StripAnsi(res)))) // apply continuation if pinned items overflow viewport if len(continuation) > 0 && m.pinnedWidth > takeWidth { res = replaceEndWithContinuation(res, []rune(continuation)) } return res, takeWidth - remainingWidth } // NumWrappedLines returns the number of wrapped lines given a wrap width func (m ConcatItem) NumWrappedLines(wrapWidth int) int { if wrapWidth <= 0 { return 0 } else if m.totalWidth == 0 { return 1 } return (m.totalWidth + wrapWidth - 1) / wrapWidth } // LineBrokenItems returns a slice containing just this item (single-line). func (m ConcatItem) LineBrokenItems() []Item { return []Item{m} } // Repr returns a string representation of the ConcatItem for debugging. func (m ConcatItem) repr() string { var v strings.Builder v.WriteString("Concat(") for i := range m.items { if i > 0 { v.WriteString(", ") } v.WriteString(m.items[i].repr()) } v.WriteString(")") return v.String() } // ByteRangesToMatches converts byte ranges in the concatenated ANSI-stripped content to Matches. func (m ConcatItem) ByteRangesToMatches(byteRanges []ByteRange) []Match { if len(m.items) == 0 || len(byteRanges) == 0 { return nil } if len(m.items) == 1 { return m.items[0].ByteRangesToMatches(byteRanges) } itemByteOffsets, itemWidthOffsets := m.computeItemOffsets() matches := make([]Match, 0, len(byteRanges)) for _, br := range byteRanges { startWidth, endWidth := m.concatByteRangeToWidthRange(br.Start, br.End, itemByteOffsets, itemWidthOffsets) matches = append(matches, Match{ ByteRange: br, WidthRange: WidthRange{Start: startWidth, End: endWidth}, }) } return matches } // concatByteRangeToWidthRange converts a byte range in the concatenated content to a // width range using precomputed item offsets. func (m ConcatItem) concatByteRangeToWidthRange( startByte, endByte int, itemByteOffsets, itemWidthOffsets []int, ) (startWidth, endWidth int) { startItemIdx, startLocalByteOffset := m.findItemForByteOffset(startByte, itemByteOffsets) endItemIdx, endLocalByteOffset := m.findItemForByteOffset(endByte, itemByteOffsets) if startItemIdx >= 0 && startItemIdx < len(m.items) { startRuneIdx := m.items[startItemIdx].getRuneIndexAtByteOffset(startLocalByteOffset) if startRuneIdx > 0 { startWidth = int(m.items[startItemIdx].getCumulativeWidthAtRuneIdx(startRuneIdx - 1)) } startWidth += itemWidthOffsets[startItemIdx] } if endItemIdx >= 0 && endItemIdx < len(m.items) { endRuneIdx := m.items[endItemIdx].getRuneIndexAtByteOffset(endLocalByteOffset) if endRuneIdx > 0 { endWidth = int(m.items[endItemIdx].getCumulativeWidthAtRuneIdx(endRuneIdx - 1)) } endWidth += itemWidthOffsets[endItemIdx] } return } // computeItemOffsets precomputes cumulative byte and width offsets for each item. func (m ConcatItem) computeItemOffsets() (itemByteOffsets, itemWidthOffsets []int) { itemByteOffsets = make([]int, len(m.items)+1) itemWidthOffsets = make([]int, len(m.items)+1) for i, it := range m.items { itemByteOffsets[i+1] = itemByteOffsets[i] + len(it.ContentNoAnsi()) itemWidthOffsets[i+1] = itemWidthOffsets[i] + it.Width() } return } // ExtractExactMatches extracts exact matches from the item's content without ANSI codes func (m ConcatItem) ExtractExactMatches(exactMatch string) []Match { if len(m.items) == 0 || exactMatch == "" { return []Match{} } if len(m.items) == 1 { return m.items[0].ExtractExactMatches(exactMatch) } concatenated := m.ContentNoAnsi() var byteRanges []ByteRange startIndex := 0 for { foundIndex := strings.Index(concatenated[startIndex:], exactMatch) if foundIndex == -1 { break } actualStartIndex := startIndex + foundIndex endIndex := actualStartIndex + len(exactMatch) byteRanges = append(byteRanges, ByteRange{Start: actualStartIndex, End: endIndex}) startIndex = endIndex } return m.ByteRangesToMatches(byteRanges) } // findItemForByteOffset finds which item contains the given byte offset in concatenated content // Returns (itemIndex, localByteOffset) where localByteOffset is the offset within that item func (m ConcatItem) findItemForByteOffset(byteOffset int, itemByteOffsets []int) (int, int) { // binary search to find the item containing this byte offset left, right := 0, len(m.items)-1 for left <= right { mid := left + (right-left)/2 if byteOffset >= itemByteOffsets[mid] && byteOffset < itemByteOffsets[mid+1] { return mid, byteOffset - itemByteOffsets[mid] } else if byteOffset < itemByteOffsets[mid] { right = mid - 1 } else { left = mid + 1 } } // if not found within items, handle edge cases if byteOffset >= itemByteOffsets[len(m.items)] { // past the end - return last item with offset at end lastItemIdx := len(m.items) - 1 return lastItemIdx, len(m.items[lastItemIdx].ContentNoAnsi()) } // before the beginning return 0, 0 } // ExtractRegexMatches extracts regex matches from the item's content without ANSI codes func (m ConcatItem) ExtractRegexMatches(regex *regexp.Regexp) []Match { if len(m.items) == 0 { return []Match{} } if len(m.items) == 1 { return m.items[0].ExtractRegexMatches(regex) } concatenated := m.ContentNoAnsi() regexMatches := regex.FindAllStringIndex(concatenated, -1) if len(regexMatches) == 0 { return []Match{} } byteRanges := make([]ByteRange, 0, len(regexMatches)) for _, rm := range regexMatches { byteRanges = append(byteRanges, ByteRange{Start: rm[0], End: rm[1]}) } return m.ByteRangesToMatches(byteRanges) } ================================================ FILE: modules/viewport/item/concat_test.go ================================================ package item import ( "fmt" "regexp" "testing" "github.com/antgroup/hugescm/modules/viewport/internal" "charm.land/lipgloss/v2" ) func getEquivalentItems() map[string][]Item { return map[string][]Item{ "none": {}, "hello world": { NewItem("hello world"), NewConcat(NewItem("hello world")), NewConcat( NewItem("hello"), NewItem(" world"), ), NewConcat( NewItem("hel"), NewItem("lo "), NewItem("wo"), NewItem("rld"), ), NewConcat( NewItem("h"), NewItem("e"), NewItem("l"), NewItem("l"), NewItem("o"), NewItem(" "), NewItem("w"), NewItem("o"), NewItem("r"), NewItem("l"), NewItem("d"), ), }, "ansi": { NewItem(internal.RedBg.Render("hello") + " " + internal.BlueBg.Render("world")), NewConcat(NewItem(internal.RedBg.Render("hello") + " " + internal.BlueBg.Render("world"))), NewConcat( NewItem(internal.RedBg.Render("hello")+" "), NewItem(internal.BlueBg.Render("world")), ), NewConcat( NewItem(internal.RedBg.Render("hello")), NewItem(" "), NewItem(internal.BlueBg.Render("world")), ), }, "unicode_ansi": { // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b NewItem(internal.RedBg.Render("A💖") + "中é"), NewConcat(NewItem(internal.RedBg.Render("A💖") + "中é")), NewConcat( NewItem(internal.RedBg.Render("A💖")), NewItem("中"), NewItem("é"), ), }} } func TestConcatItem_Width(t *testing.T) { for _, eq := range getEquivalentItems() { for _, item := range eq { if item.Width() != eq[0].Width() { t.Errorf("expected %d, got %d for item %s", eq[0].Width(), item.Width(), item.repr()) } } } } func TestConcatItem_Content(t *testing.T) { for _, eq := range getEquivalentItems() { for _, item := range eq { if item.Content() != eq[0].Content() { t.Errorf("expected %q, got %q for item %s", eq[0].Content(), item.Content(), item.repr()) } } } } func TestConcatItem_Take(t *testing.T) { tests := []struct { name string key string widthToLeft int takeWidth int continuation string toHighlight string highlightStyle lipgloss.Style expected string }{ { name: "hello world start at 0", key: "hello world", widthToLeft: 0, takeWidth: 7, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "hello w", }, { name: "hello world start at 1", key: "hello world", widthToLeft: 1, takeWidth: 7, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "ello wo", }, { name: "hello world end", key: "hello world", widthToLeft: 10, takeWidth: 3, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "d", }, { name: "hello world past end", key: "hello world", widthToLeft: 11, takeWidth: 3, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "", }, { name: "hello world with continuation at end", key: "hello world", widthToLeft: 0, takeWidth: 7, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "hell...", }, { name: "hello world with continuation at start", key: "hello world", widthToLeft: 4, takeWidth: 7, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "...orld", }, { name: "hello world with continuation both ends", key: "hello world", widthToLeft: 2, takeWidth: 7, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "... ...", }, { name: "hello world with highlight whole word", key: "hello world", widthToLeft: 0, takeWidth: 11, continuation: "", toHighlight: "hello", highlightStyle: internal.RedBg, expected: internal.RedBg.Render("hello") + " world", }, { name: "hello world with highlight across boundary", key: "hello world", widthToLeft: 3, takeWidth: 6, continuation: "", toHighlight: "lo wo", highlightStyle: internal.RedBg, expected: internal.RedBg.Render("lo wo") + "r", }, { name: "hello world with highlight and middle continuation", key: "hello world", widthToLeft: 1, takeWidth: 7, continuation: "..", toHighlight: "lo ", highlightStyle: internal.RedBg, expected: ".." + internal.RedBg.Render("lo ") + "..", }, { name: "hello world with highlight and overlapping continuation", key: "hello world", widthToLeft: 1, takeWidth: 7, continuation: "...", toHighlight: "lo ", highlightStyle: internal.RedBg, expected: "..\x1b[48;2;255;0;0m.o." + RST + "..", }, { name: "ansi start at 0", key: "ansi", widthToLeft: 0, takeWidth: 7, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("hello") + " " + internal.BlueBg.Render("w"), }, { name: "ansi start at 1", key: "ansi", widthToLeft: 1, takeWidth: 7, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("ello") + " " + internal.BlueBg.Render("wo"), }, { name: "ansi end", key: "ansi", widthToLeft: 10, takeWidth: 3, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.BlueBg.Render("d"), }, { name: "ansi past end", key: "ansi", widthToLeft: 11, takeWidth: 3, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "", }, { name: "ansi with continuation at end", key: "ansi", widthToLeft: 0, takeWidth: 7, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("hell.") + "." + internal.BlueBg.Render("."), }, { name: "ansi with continuation at start", key: "ansi", widthToLeft: 4, takeWidth: 7, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render(".") + "." + internal.BlueBg.Render(".orld"), }, { name: "ansi with continuation both ends", key: "ansi", widthToLeft: 2, takeWidth: 7, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("...") + " " + internal.BlueBg.Render("..."), }, { name: "ansi with highlight whole word", key: "ansi", widthToLeft: 0, takeWidth: 11, continuation: "", toHighlight: "hello", highlightStyle: internal.GreenBg, expected: internal.GreenBg.Render("hello") + " " + internal.BlueBg.Render("world"), }, { name: "ansi with highlight partial word", key: "ansi", widthToLeft: 0, takeWidth: 11, continuation: "", toHighlight: "ell", highlightStyle: internal.GreenBg, expected: internal.RedBg.Render("h") + internal.GreenBg.Render("ell") + internal.RedBg.Render("o") + " " + internal.BlueBg.Render("world"), }, { name: "ansi with highlight across boundary", key: "ansi", widthToLeft: 0, takeWidth: 11, continuation: "", toHighlight: "lo wo", highlightStyle: internal.GreenBg, expected: internal.RedBg.Render("hel") + internal.GreenBg.Render("lo wo") + internal.BlueBg.Render("rld"), }, { name: "ansi with highlight and middle continuation", key: "ansi", widthToLeft: 1, takeWidth: 7, continuation: "..", toHighlight: "lo ", highlightStyle: internal.GreenBg, expected: internal.RedBg.Render("..") + internal.GreenBg.Render("lo ") + internal.BlueBg.Render(".."), }, { name: "ansi with highlight and overlapping continuation", key: "ansi", widthToLeft: 1, takeWidth: 7, continuation: "...", toHighlight: "lo ", highlightStyle: internal.GreenBg, expected: internal.RedBg.Render("..") + internal.GreenBg.Render(".o.") + internal.BlueBg.Render(".."), }, { name: "unicode_ansi start at 0", key: "unicode_ansi", widthToLeft: 0, takeWidth: 6, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("A💖") + "中é", }, { name: "unicode_ansi start at 1", key: "unicode_ansi", widthToLeft: 1, takeWidth: 5, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("💖") + "中é", }, { name: "unicode_ansi end", key: "unicode_ansi", widthToLeft: 5, takeWidth: 1, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "é", }, { name: "unicode_ansi past end", key: "unicode_ansi", widthToLeft: 6, takeWidth: 3, continuation: "", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: "", }, { name: "unicode_ansi with continuation at end", key: "unicode_ansi", widthToLeft: 0, takeWidth: 5, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("A💖") + "..", // bit of an edge cases, seems fine }, { name: "unicode_ansi with continuation at start", key: "unicode_ansi", widthToLeft: 1, takeWidth: 5, continuation: "...", toHighlight: "", highlightStyle: lipgloss.NewStyle(), expected: internal.RedBg.Render("..") + "中é", }, { name: "unicode_ansi with highlight whole word", key: "unicode_ansi", widthToLeft: 0, takeWidth: 6, continuation: "", toHighlight: "A💖", highlightStyle: internal.GreenBg, expected: internal.GreenBg.Render("A💖") + "中é", }, { name: "unicode_ansi with highlight partial word", key: "unicode_ansi", widthToLeft: 0, takeWidth: 6, continuation: "", toHighlight: "A", highlightStyle: internal.GreenBg, expected: internal.GreenBg.Render("A") + internal.RedBg.Render("💖") + "中é", }, { name: "unicode_ansi with highlight across boundary", key: "unicode_ansi", widthToLeft: 0, takeWidth: 6, continuation: "", toHighlight: "💖中", highlightStyle: internal.GreenBg, expected: internal.RedBg.Render("A") + internal.GreenBg.Render("💖中") + "é", }, { name: "unicode_ansi with highlight and overlapping continuation", key: "unicode_ansi", widthToLeft: 1, takeWidth: 5, continuation: "..", toHighlight: "💖", highlightStyle: internal.GreenBg, expected: internal.GreenBg.Render("..") + "中é", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, eq := range getEquivalentItems()[tt.key] { byteRanges := eq.ExtractExactMatches(tt.toHighlight) highlights := toHighlights(byteRanges, tt.highlightStyle) actual, _ := eq.Take(tt.widthToLeft, tt.takeWidth, tt.continuation, highlights) internal.CmpStr(t, tt.expected, actual, fmt.Sprintf("for %s", eq.repr())) } }) } } func TestConcatItem_TakeWithPinned(t *testing.T) { tests := []struct { name string items []SingleItem pinnedCount int widthToLeft int takeWidth int continuation string toHighlight string highlightStyle lipgloss.Style expected string }{ { name: "single pinned item, no pan", items: []SingleItem{NewItem("123"), NewItem("hello world")}, pinnedCount: 1, widthToLeft: 0, takeWidth: 14, expected: "123hello world", }, { name: "single pinned item, panned right", items: []SingleItem{NewItem("123"), NewItem("hello world")}, pinnedCount: 1, widthToLeft: 6, // pans "hello " off screen takeWidth: 8, // 3 for "123" + 5 for "world" expected: "123world", }, { name: "pinned item with continuation on non-pinned left and right", items: []SingleItem{NewItem("123"), NewItem("hello world")}, pinnedCount: 1, widthToLeft: 3, // pans "hel" off screen takeWidth: 10, continuation: "...", // non-pinned takeWidth = 10-3 = 7, "hello world" skips "hel" -> "lo world" (8 chars) // take 7 -> "lo worl", contentToLeft=true, contentToRight=true // replaceStart -> "...worl", replaceEnd -> "...w..." expected: "123...w...", }, { name: "pinned item with continuation on non-pinned right only", items: []SingleItem{NewItem("123"), NewItem("hello world")}, pinnedCount: 1, widthToLeft: 0, takeWidth: 8, continuation: "...", // non-pinned takeWidth = 8-3 = 5, "hello world" take 5 -> "hello" // contentToLeft=false, contentToRight=true (11 > 5) // replaceEnd -> "he..." expected: "123he...", }, { name: "pinned width equals viewport", items: []SingleItem{NewItem("12345"), NewItem("hello")}, pinnedCount: 1, widthToLeft: 0, takeWidth: 5, expected: "12345", }, { name: "pinned width exceeds viewport", items: []SingleItem{NewItem("1234567890"), NewItem("hello")}, pinnedCount: 1, widthToLeft: 0, takeWidth: 5, continuation: "...", expected: "12...", }, { name: "all items pinned ignores widthToLeft", items: []SingleItem{NewItem("abc"), NewItem("def")}, pinnedCount: 2, widthToLeft: 5, // should be ignored takeWidth: 6, expected: "abcdef", }, { name: "panned past non-pinned content returns only pinned", items: []SingleItem{NewItem("123"), NewItem("hi")}, pinnedCount: 1, widthToLeft: 10, // past "hi" takeWidth: 5, expected: "123", // only pinned content }, { name: "zero pinned count behaves like regular Take", items: []SingleItem{NewItem("abc"), NewItem("def")}, pinnedCount: 0, widthToLeft: 2, takeWidth: 3, expected: "cde", }, { name: "highlight in pinned section", items: []SingleItem{NewItem("123"), NewItem("hello")}, pinnedCount: 1, widthToLeft: 0, takeWidth: 8, toHighlight: "12", highlightStyle: internal.RedBg, expected: internal.RedBg.Render("12") + "3hello", }, { name: "highlight in non-pinned section", items: []SingleItem{NewItem("123"), NewItem("hello")}, pinnedCount: 1, widthToLeft: 0, takeWidth: 8, toHighlight: "ell", highlightStyle: internal.RedBg, expected: "123h" + internal.RedBg.Render("ell") + "o", }, { name: "pinned item with ANSI", items: []SingleItem{NewItem(internal.RedBg.Render("123")), NewItem("hello")}, pinnedCount: 1, widthToLeft: 2, // pans "he" off takeWidth: 6, // 3 for "123" + 3 for "llo" expected: internal.RedBg.Render("123") + "llo", }, { name: "two pinned items", items: []SingleItem{NewItem("A"), NewItem("B"), NewItem("hello world")}, pinnedCount: 2, widthToLeft: 6, // pans "hello " off takeWidth: 7, // 2 for "AB" + 5 for "world" expected: "ABworld", }, { name: "pinned with unicode non-pinned", items: []SingleItem{NewItem("12"), NewItem("A💖中é")}, // 💖 is 2w, 中 is 2w, é is 1w = 6 total pinnedCount: 1, widthToLeft: 1, // skip "A" (1w) takeWidth: 7, // 2 for "12" + 5 for "💖中é" expected: "12💖中é", }, { name: "empty items", items: []SingleItem{}, pinnedCount: 1, widthToLeft: 0, takeWidth: 10, expected: "", }, { name: "single item pinned", items: []SingleItem{NewItem("hello")}, pinnedCount: 1, widthToLeft: 2, // should be ignored for single item takeWidth: 5, expected: "hello", }, { name: "pinnedCount clamped to len", items: []SingleItem{NewItem("ab"), NewItem("cd")}, pinnedCount: 10, // exceeds 2 items, should clamp to 2 (all items pinned) widthToLeft: 5, // panning should have no effect when all items pinned takeWidth: 4, expected: "abcd", }, { name: "negative pinnedCount clamped to zero", items: []SingleItem{NewItem("ab"), NewItem("cd")}, pinnedCount: -5, // should clamp to 0 (no pinning) widthToLeft: 2, // with no pinning, panning 2 chars should skip "ab" takeWidth: 2, expected: "cd", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { concat := NewConcatWithPinned(tt.pinnedCount, tt.items...) var highlights []Highlight if tt.toHighlight != "" { matches := concat.ExtractExactMatches(tt.toHighlight) highlights = toHighlights(matches, tt.highlightStyle) } actual, _ := concat.Take(tt.widthToLeft, tt.takeWidth, tt.continuation, highlights) internal.CmpStr(t, tt.expected, actual, fmt.Sprintf("for pinnedCount=%d", tt.pinnedCount)) }) } } func TestConcatItem_NumWrappedLines(t *testing.T) { tests := []struct { name string key string wrapWidth int expected int }{ { name: "none no width", key: "none", wrapWidth: 0, expected: 0, }, { name: "none with width", key: "none", wrapWidth: 5, expected: 1, }, { name: "hello world negative width", key: "hello world", // 11 width wrapWidth: -1, expected: 0, }, { name: "hello world zero width", key: "hello world", // 11 width wrapWidth: 0, expected: 0, }, { name: "hello world wrap 1", key: "hello world", // 11 width wrapWidth: 1, expected: 11, }, { name: "hello world wrap 5", key: "hello world", // 11 width wrapWidth: 5, expected: 3, }, { name: "hello world wrap 11", key: "hello world", // 11 width wrapWidth: 11, expected: 1, }, { name: "hello world wrap 12", key: "hello world", // 11 width wrapWidth: 12, expected: 1, }, { name: "ansi wrap 5", key: "ansi", // 11 width wrapWidth: 5, expected: 3, }, { name: "unicode_ansi wrap 3", key: "unicode_ansi", // 6 width wrapWidth: 3, expected: 2, }, { name: "unicode_ansi wrap 6", key: "unicode_ansi", // 6 width wrapWidth: 6, expected: 1, }, { name: "unicode_ansi wrap 7", key: "unicode_ansi", // 6 width wrapWidth: 7, expected: 1, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, eq := range getEquivalentItems()[tt.key] { actual := eq.NumWrappedLines(tt.wrapWidth) if actual != tt.expected { t.Errorf("expected %d, got %d for item %s with wrap width %d", tt.expected, actual, eq.repr(), tt.wrapWidth) } } }) } } func TestConcatItem_ExtractExactMatches(t *testing.T) { tests := []struct { name string key string exactMatch string expected []Match }{ { name: "hello world empty exact match", key: "hello world", exactMatch: "", expected: []Match{}, }, { name: "hello world no matches", key: "hello world", exactMatch: "xyz", expected: []Match{}, }, { name: "hello world single match hello", key: "hello world", exactMatch: "hello", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, }, }, { name: "hello world single match world", key: "hello world", exactMatch: "world", expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "hello world match full content", key: "hello world", exactMatch: "hello world", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 11, }, WidthRange: WidthRange{ Start: 0, End: 11, }, }, }, }, { name: "hello world partial match lo wo", key: "hello world", exactMatch: "lo wo", expected: []Match{ { ByteRange: ByteRange{ Start: 3, End: 8, }, WidthRange: WidthRange{ Start: 3, End: 8, }, }, }, }, { name: "hello world single character match l", key: "hello world", exactMatch: "l", expected: []Match{ { ByteRange: ByteRange{ Start: 2, End: 3, }, WidthRange: WidthRange{ Start: 2, End: 3, }, }, { ByteRange: ByteRange{ Start: 3, End: 4, }, WidthRange: WidthRange{ Start: 3, End: 4, }, }, { ByteRange: ByteRange{ Start: 9, End: 10, }, WidthRange: WidthRange{ Start: 9, End: 10, }, }, }, }, { name: "hello world overlapping matches ll", key: "hello world", exactMatch: "ll", expected: []Match{ { ByteRange: ByteRange{ Start: 2, End: 4, }, WidthRange: WidthRange{ Start: 2, End: 4, }, }, }, }, { name: "hello world case sensitive Hello", key: "hello world", exactMatch: "Hello", expected: []Match{}, }, { name: "ansi match hello", key: "ansi", exactMatch: "hello", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, }, }, { name: "ansi match world", key: "ansi", exactMatch: "world", expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "ansi match across boundary lo wo", key: "ansi", exactMatch: "lo wo", expected: []Match{ { ByteRange: ByteRange{ Start: 3, End: 8, }, WidthRange: WidthRange{ Start: 3, End: 8, }, }, }, }, { name: "unicode_ansi match A💖", key: "unicode_ansi", exactMatch: "A💖", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 3, }, }, }, }, { name: "unicode_ansi match 中é", key: "unicode_ansi", exactMatch: "中é", expected: []Match{ { ByteRange: ByteRange{ Start: 5, End: 11, }, WidthRange: WidthRange{ Start: 3, End: 6, }, }, }, }, { name: "unicode_ansi match single character A", key: "unicode_ansi", exactMatch: "A", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 1, }, WidthRange: WidthRange{ Start: 0, End: 1, }, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, eq := range getEquivalentItems()[tt.key] { matches := eq.ExtractExactMatches(tt.exactMatch) if len(matches) != len(tt.expected) { t.Errorf("for item %s: expected %d matches, got %d", eq.repr(), len(tt.expected), len(matches)) return } for i, expected := range tt.expected { match := matches[i] if match.ByteRange.Start != expected.ByteRange.Start || match.ByteRange.End != expected.ByteRange.End { t.Errorf("for item %s, match %d: expected byte range Start=%d End=%d, got Start=%d End=%d", eq.repr(), i, expected.ByteRange.Start, expected.ByteRange.End, match.ByteRange.Start, match.ByteRange.End) } if match.WidthRange.Start != expected.WidthRange.Start || match.WidthRange.End != expected.WidthRange.End { t.Errorf("for item %s, match %d: expected width range Start=%d End=%d, got Start=%d End=%d", eq.repr(), i, expected.WidthRange.Start, expected.WidthRange.End, match.WidthRange.Start, match.WidthRange.End) } } } }) } } func TestConcatItem_ExtractRegexMatches(t *testing.T) { tests := []struct { name string key string regexPattern string expected []Match expectError bool }{ { name: "hello world no matches", key: "hello world", regexPattern: "xyz", expected: []Match{}, }, { name: "hello world simple word match", key: "hello world", regexPattern: "world", expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "hello world word boundary match", key: "hello world", regexPattern: `\bworld\b`, expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "hello world character class match l", key: "hello world", regexPattern: `l`, expected: []Match{ { ByteRange: ByteRange{ Start: 2, End: 3, }, WidthRange: WidthRange{ Start: 2, End: 3, }, }, { ByteRange: ByteRange{ Start: 3, End: 4, }, WidthRange: WidthRange{ Start: 3, End: 4, }, }, { ByteRange: ByteRange{ Start: 9, End: 10, }, WidthRange: WidthRange{ Start: 9, End: 10, }, }, }, }, { name: "hello world case insensitive pattern", key: "hello world", regexPattern: `(?i)HELLO`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, }, }, { name: "hello world across boundary lo wo", key: "hello world", regexPattern: `lo wo`, expected: []Match{ { ByteRange: ByteRange{ Start: 3, End: 8, }, WidthRange: WidthRange{ Start: 3, End: 8, }, }, }, }, { name: "hello world capturing groups", key: "hello world", regexPattern: `(hello) (world)`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 11, }, WidthRange: WidthRange{ Start: 0, End: 11, }, }, }, }, { name: "hello world dot metacharacter", key: "hello world", regexPattern: `l.o`, expected: []Match{ { ByteRange: ByteRange{ Start: 2, End: 5, }, WidthRange: WidthRange{ Start: 2, End: 5, }, }, }, }, { name: "hello world anchored pattern start", key: "hello world", regexPattern: `^hello`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, }, }, { name: "hello world anchored pattern end", key: "hello world", regexPattern: `world$`, expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "ansi match hello", key: "ansi", regexPattern: "hello", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, }, }, { name: "ansi match across boundary", key: "ansi", regexPattern: "lo wo", expected: []Match{ { ByteRange: ByteRange{ Start: 3, End: 8, }, WidthRange: WidthRange{ Start: 3, End: 8, }, }, }, }, { name: "unicode_ansi match A with unicode", key: "unicode_ansi", regexPattern: "A💖", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 3, }, }, }, }, { name: "unicode_ansi match 中é", key: "unicode_ansi", regexPattern: "中é", expected: []Match{ { ByteRange: ByteRange{ Start: 5, End: 11, }, WidthRange: WidthRange{ Start: 3, End: 6, }, }, }, }, { name: "unicode_ansi match unicode across boundary", key: "unicode_ansi", regexPattern: "💖中", expected: []Match{ { ByteRange: ByteRange{ Start: 1, End: 8, }, WidthRange: WidthRange{ Start: 1, End: 5, }, }, }, }, { name: "unicode_ansi wildcard match", key: "unicode_ansi", regexPattern: ".💖", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 3, }, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { regex, err := regexp.Compile(tt.regexPattern) if tt.expectError { if err == nil { t.Errorf("expected error but got none") } return } if err != nil { t.Errorf("unexpected error compiling regex: %v", err) return } for _, eq := range getEquivalentItems()[tt.key] { matches := eq.ExtractRegexMatches(regex) if len(matches) != len(tt.expected) { t.Errorf("for item %s: expected %d matches, got %d", eq.repr(), len(tt.expected), len(matches)) return } for i, expected := range tt.expected { match := matches[i] if match.ByteRange.Start != expected.ByteRange.Start || match.ByteRange.End != expected.ByteRange.End { t.Errorf("for item %s, match %d: expected byte range Start=%d End=%d, got Start=%d End=%d", eq.repr(), i, expected.ByteRange.Start, expected.ByteRange.End, match.ByteRange.Start, match.ByteRange.End) } if match.WidthRange.Start != expected.WidthRange.Start || match.WidthRange.End != expected.WidthRange.End { t.Errorf("for item %s, match %d: expected width range Start=%d End=%d, got Start=%d End=%d", eq.repr(), i, expected.WidthRange.Start, expected.WidthRange.End, match.WidthRange.Start, match.WidthRange.End) } } } }) } } func toHighlights(matches []Match, style lipgloss.Style) []Highlight { var highlights []Highlight for _, match := range matches { highlights = append(highlights, Highlight{ ByteRangeUnstyledContent: match.ByteRange, Style: style, }) } return highlights } func TestConcatItem_ByteRangesToMatches(t *testing.T) { tests := []struct { name string items []SingleItem byteRanges []ByteRange expected []Match }{ { name: "nil byte ranges", items: []SingleItem{NewItem("hello"), NewItem(" world")}, byteRanges: nil, expected: nil, }, { name: "empty byte ranges", items: []SingleItem{NewItem("hello"), NewItem(" world")}, byteRanges: []ByteRange{}, expected: nil, }, { name: "empty items", items: []SingleItem{}, byteRanges: []ByteRange{{Start: 0, End: 5}}, expected: nil, }, { name: "single item delegates to SingleItem", items: []SingleItem{NewItem("hello world")}, byteRanges: []ByteRange{ {Start: 6, End: 11}, }, expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, { name: "range in first item", items: []SingleItem{NewItem("hello"), NewItem(" world")}, byteRanges: []ByteRange{ {Start: 0, End: 5}, // "hello" }, expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 5}, WidthRange: WidthRange{Start: 0, End: 5}, }, }, }, { name: "range in second item", items: []SingleItem{NewItem("hello"), NewItem(" world")}, byteRanges: []ByteRange{ {Start: 6, End: 11}, // "world" }, expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, { name: "range spanning two items", items: []SingleItem{NewItem("hello"), NewItem(" world")}, byteRanges: []ByteRange{ {Start: 3, End: 8}, // "lo wo" }, expected: []Match{ { ByteRange: ByteRange{Start: 3, End: 8}, WidthRange: WidthRange{Start: 3, End: 8}, }, }, }, { name: "multiple ranges across items", items: []SingleItem{NewItem("hello"), NewItem(" "), NewItem("world")}, byteRanges: []ByteRange{ {Start: 0, End: 5}, // "hello" {Start: 6, End: 11}, // "world" }, expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 5}, WidthRange: WidthRange{Start: 0, End: 5}, }, { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, { name: "unicode across items", // A (1w, 1b), 💖 (2w, 4b) | 中 (2w, 3b), é (1w, 3b) items: []SingleItem{NewItem("A💖"), NewItem("中é")}, byteRanges: []ByteRange{ {Start: 1, End: 8}, // 💖中 }, expected: []Match{ { ByteRange: ByteRange{Start: 1, End: 8}, WidthRange: WidthRange{Start: 1, End: 5}, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { concat := NewConcat(tt.items...) actual := concat.ByteRangesToMatches(tt.byteRanges) if len(actual) != len(tt.expected) { t.Fatalf("expected %d matches, got %d", len(tt.expected), len(actual)) } for i, expected := range tt.expected { match := actual[i] if match.ByteRange != expected.ByteRange { t.Errorf("match %d: expected byte range %+v, got %+v", i, expected.ByteRange, match.ByteRange) } if match.WidthRange != expected.WidthRange { t.Errorf("match %d: expected width range %+v, got %+v", i, expected.WidthRange, match.WidthRange) } } }) } } // TestConcatItem_ByteRangesToMatches_EquivalentItems verifies that ByteRangesToMatches // produces consistent results across equivalent items with different item boundaries. func TestConcatItem_ByteRangesToMatches_EquivalentItems(t *testing.T) { for key, items := range getEquivalentItems() { if key == "none" || len(items) == 0 { continue } // Find some byte ranges to test with content := items[0].ContentNoAnsi() if len(content) < 2 { continue } byteRanges := []ByteRange{ {Start: 0, End: min(3, len(content))}, } if len(content) > 5 { byteRanges = append(byteRanges, ByteRange{Start: len(content) - 3, End: len(content)}) } // All equivalent items should produce the same matches reference := items[0].ByteRangesToMatches(byteRanges) for _, eq := range items[1:] { actual := eq.ByteRangesToMatches(byteRanges) if len(actual) != len(reference) { t.Errorf("[%s] %s: expected %d matches, got %d", key, eq.repr(), len(reference), len(actual)) continue } for i := range reference { if actual[i] != reference[i] { t.Errorf("[%s] %s: match %d: expected %+v, got %+v", key, eq.repr(), i, reference[i], actual[i]) } } } } } ================================================ FILE: modules/viewport/item/item.go ================================================ package item import "regexp" // Item defines the interface for item implementations type Item interface { // Width returns the total width in terminal cells Width() int // Content returns the underlying complete string Content() string // ContentNoAnsi returns the underlying complete string without ANSI escape codes that style the string ContentNoAnsi() string // Take takes a substring (line) of the content with a specified widthToLeft and taking takeWidth. // continuation replaces the start and end if the content exceeds the bounds. // highlights is a list of highlights to apply to the taken content. // Returns the line and the actual width taken Take( widthToLeft, takeWidth int, continuation string, highlights []Highlight, ) (string, int) // NumWrappedLines returns the number of wrapped lines given a wrap width NumWrappedLines(wrapWidth int) int // ExtractExactMatches extracts exact matches from the item's content without ANSI codes ExtractExactMatches(exactMatch string) []Match // ExtractRegexMatches extracts regex matches from the item's content without ANSI codes ExtractRegexMatches(regex *regexp.Regexp) []Match // ByteRangesToMatches converts byte ranges in the ANSI-stripped content to Matches // with both byte ranges and width ranges populated. ByteRangesToMatches(byteRanges []ByteRange) []Match // LineBrokenItems returns the sub-items of this item, each rendered on a separate line. // For single-line items, returns a slice containing just self. // For multi-line items, returns one item per content line with line breaks between them. LineBrokenItems() []Item // repr returns a representation of the object as a string for debugging repr() string } ================================================ FILE: modules/viewport/item/item_bench_test.go ================================================ package item import ( "strings" "testing" ) // To run benchmarks: // - All: go test -bench=. -benchmem -run=^$ ./viewport/item // - Plain text only: go test -bench=BenchmarkNew_Plain -benchmem -run=^$ ./viewport/item // - ANSI only: go test -bench=BenchmarkNew_ANSI -benchmem -run=^$ ./viewport/item // - Unicode only: go test -bench=BenchmarkNew_Unicode -benchmem -run=^$ ./viewport/item // // Example of interpreting benchmark output: // BenchmarkNew_Plain_1000-8 156124 7883 ns/op 8448 B/op 3 allocs/op // - 156124: benchmark ran 156,124 iterations to get a stable measurement // - 7883 ns/op: each call to NewItem() takes about 7.9 microseconds // - 8448 B/op: each operation allocates about 8.4KB of memory // - 3 allocs/op: each call to NewItem() makes 3 distinct memory allocations // BenchmarkNew_Plain benchmarks NewItem() with plain text strings of various sizes func BenchmarkNew_Plain_10(b *testing.B) { baseString := strings.Repeat("h", 10) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_Plain_100(b *testing.B) { baseString := strings.Repeat("h", 100) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_Plain_1000(b *testing.B) { baseString := strings.Repeat("h", 1000) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_Plain_10000(b *testing.B) { baseString := strings.Repeat("h", 10000) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } // BenchmarkNew_ANSI benchmarks NewItem() with ANSI-styled strings of various sizes func BenchmarkNew_ANSI_10(b *testing.B) { baseString := strings.Repeat("\x1b[31mh"+RST+"", 10) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_ANSI_100(b *testing.B) { baseString := strings.Repeat("\x1b[31mh"+RST+"", 100) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_ANSI_1000(b *testing.B) { baseString := strings.Repeat("\x1b[31mh"+RST+"", 1000) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_ANSI_10000(b *testing.B) { baseString := strings.Repeat("\x1b[31mh"+RST+"", 10000) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } // BenchmarkNew_Unicode benchmarks NewItem() with Unicode strings of various sizes func BenchmarkNew_Unicode_10(b *testing.B) { baseString := strings.Repeat("世", 10) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_Unicode_100(b *testing.B) { baseString := strings.Repeat("世", 100) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_Unicode_1000(b *testing.B) { baseString := strings.Repeat("世", 1000) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } func BenchmarkNew_Unicode_10000(b *testing.B) { baseString := strings.Repeat("世", 10000) b.ReportAllocs() for i := 0; i < b.N; i++ { _ = NewItem(baseString) } } ================================================ FILE: modules/viewport/item/model.go ================================================ package item import ( "charm.land/lipgloss/v2" ) // ByteRange represents a range of bytes type ByteRange struct { Start, End int } // WidthRange represents a range of character widths in terminal cells type WidthRange struct { Start, End int } // Match represents a range of bytes and their according start and end width in an item type Match struct { ByteRange ByteRange WidthRange WidthRange } // Highlight represents a range and style to highlight type Highlight struct { Style lipgloss.Style ByteRangeUnstyledContent ByteRange } ================================================ FILE: modules/viewport/item/multiline.go ================================================ package item import ( "fmt" "regexp" "strings" ) // MultiLineItem implements Item by wrapping multiple SingleItems, rendered with line breaks between them. // Each individual SingleItem may span multiple terminal lines if it wraps, but the MultiLineItem itself does not // concatenate and wrap content across items (for that, see ConcatItem). // Take() must not be called on a MultiLineItem — callers should use Take() on individual items returned // by LineBrokenItems() instead. type MultiLineItem struct { items []SingleItem totalWidth int // sum of all item widths content string // cached: item content joined with \n (with ANSI) noAnsi string // cached: item content joined with \n (no ANSI) } // type assertion that MultiLineItem implements Item var _ Item = MultiLineItem{} // type assertion that *MultiLineItem implements Item var _ Item = (*MultiLineItem)(nil) // NewMultiLineItem creates a new MultiLineItem from the given items. func NewMultiLineItem(items ...SingleItem) MultiLineItem { if len(items) == 0 { return MultiLineItem{} } totalWidth := 0 for _, it := range items { totalWidth += it.Width() } return MultiLineItem{ items: items, totalWidth: totalWidth, } } // Width returns the total width in cells across all line-broken items. func (m MultiLineItem) Width() int { return m.totalWidth } // Content returns the content of all items joined with newlines. func (m MultiLineItem) Content() string { if m.content != "" { return m.content } if len(m.items) == 0 { return "" } if len(m.items) == 1 { return m.items[0].Content() } totalLen := 0 for _, it := range m.items { totalLen += len(it.Content()) } totalLen += len(m.items) - 1 // newline separators var builder strings.Builder builder.Grow(totalLen) for i, it := range m.items { if i > 0 { builder.WriteByte('\n') } builder.WriteString(it.Content()) } m.content = builder.String() return m.content } // ContentNoAnsi returns the content of all items joined with newlines, without ANSI codes. func (m MultiLineItem) ContentNoAnsi() string { if m.noAnsi != "" { return m.noAnsi } if len(m.items) == 0 { return "" } if len(m.items) == 1 { return m.items[0].ContentNoAnsi() } totalLen := 0 for _, it := range m.items { totalLen += len(it.ContentNoAnsi()) } totalLen += len(m.items) - 1 var builder strings.Builder builder.Grow(totalLen) for i, it := range m.items { if i > 0 { builder.WriteByte('\n') } builder.WriteString(it.ContentNoAnsi()) } m.noAnsi = builder.String() return m.noAnsi } // NumWrappedLines returns the total number of terminal lines needed to render all // line-broken items, where each item wraps independently. func (m MultiLineItem) NumWrappedLines(wrapWidth int) int { if wrapWidth <= 0 { return 0 } if len(m.items) == 0 { return 1 } total := 0 for _, it := range m.items { total += it.NumWrappedLines(wrapWidth) } return total } // Take must not be called on a MultiLineItem. Callers should render // individual items returned by LineBrokenItems() instead. func (m MultiLineItem) Take( _, _ int, _ string, _ []Highlight, ) (string, int) { panic("Take() called on MultiLineItem — use LineBrokenItems() to render individual lines") } // LineBrokenItems returns the individual items, each rendered on a separate line. func (m MultiLineItem) LineBrokenItems() []Item { // convert MultiLineItem to Item items := make([]Item, len(m.items)) for i := range m.items { items[i] = m.items[i] } return items } // repr returns a string representation of the MultiLineItem for debugging. func (m MultiLineItem) repr() string { var v strings.Builder v.WriteString("MultiLine(") for i := range m.items { if i > 0 { v.WriteString(", ") } v.WriteString(m.items[i].repr()) } v.WriteString(")") return v.String() } // ByteRangesToMatches converts byte ranges in the concatenated ANSI-stripped content to Matches. func (m MultiLineItem) ByteRangesToMatches(byteRanges []ByteRange) []Match { if len(m.items) == 0 || len(byteRanges) == 0 { return nil } if len(m.items) == 1 { return m.items[0].ByteRangesToMatches(byteRanges) } lineByteOffsets, lineWidthOffsets := m.computeOffsets() matches := make([]Match, 0, len(byteRanges)) for _, br := range byteRanges { startWidth, endWidth := m.byteRangeToWidthRange(br.Start, br.End, lineByteOffsets, lineWidthOffsets) matches = append(matches, Match{ ByteRange: br, WidthRange: WidthRange{Start: startWidth, End: endWidth}, }) } return matches } // ExtractExactMatches extracts exact matches from the concatenated content. // Byte ranges are relative to ContentNoAnsi(). Width ranges are cumulative across items. func (m MultiLineItem) ExtractExactMatches(exactMatch string) []Match { if len(m.items) == 0 || exactMatch == "" { return nil } if len(m.items) == 1 { return m.items[0].ExtractExactMatches(exactMatch) } concatenated := m.ContentNoAnsi() var byteRanges []ByteRange startIndex := 0 for { foundIndex := strings.Index(concatenated[startIndex:], exactMatch) if foundIndex == -1 { break } actualStartIndex := startIndex + foundIndex endIndex := actualStartIndex + len(exactMatch) byteRanges = append(byteRanges, ByteRange{Start: actualStartIndex, End: endIndex}) startIndex = endIndex } return m.ByteRangesToMatches(byteRanges) } // ExtractRegexMatches extracts regex matches from the concatenated content. func (m MultiLineItem) ExtractRegexMatches(regex *regexp.Regexp) []Match { if len(m.items) == 0 { return nil } if len(m.items) == 1 { return m.items[0].ExtractRegexMatches(regex) } concatenated := m.ContentNoAnsi() regexMatches := regex.FindAllStringIndex(concatenated, -1) if len(regexMatches) == 0 { return nil } byteRanges := make([]ByteRange, 0, len(regexMatches)) for _, rm := range regexMatches { byteRanges = append(byteRanges, ByteRange{Start: rm[0], End: rm[1]}) } return m.ByteRangesToMatches(byteRanges) } // computeOffsets returns cumulative byte offsets and width offsets for each line-broken item. // Byte offsets account for the \n separators between items in the concatenated content. func (m MultiLineItem) computeOffsets() (lineByteOffsets, lineWidthOffsets []int) { lineByteOffsets = make([]int, len(m.items)+1) lineWidthOffsets = make([]int, len(m.items)+1) for i, it := range m.items { lineByteOffsets[i+1] = lineByteOffsets[i] + len(it.ContentNoAnsi()) if i < len(m.items)-1 { lineByteOffsets[i+1]++ // \n separator } lineWidthOffsets[i+1] = lineWidthOffsets[i] + it.Width() } return } // findLineForByteOffset finds which line-broken item contains the given byte offset // in the concatenated content. Returns (lineIndex, localByteOffset). func (m MultiLineItem) findLineForByteOffset(byteOffset int, lineByteOffsets []int) (int, int) { for i := 0; i < len(m.items); i++ { lineStart := lineByteOffsets[i] lineEnd := lineByteOffsets[i] + len(m.items[i].ContentNoAnsi()) if byteOffset >= lineStart && byteOffset < lineEnd { return i, byteOffset - lineStart } // byteOffset falls on the \n separator — attribute to the next line if i < len(m.items)-1 && byteOffset == lineEnd { return i + 1, 0 } } // past the end lastIdx := len(m.items) - 1 return lastIdx, len(m.items[lastIdx].ContentNoAnsi()) } // byteRangeToWidthRange converts a byte range in the concatenated content to a // cumulative width range across line-broken items. func (m MultiLineItem) byteRangeToWidthRange( startByte, endByte int, lineByteOffsets, lineWidthOffsets []int, ) (startWidth, endWidth int) { startLineIdx, startLocalByte := m.findLineForByteOffset(startByte, lineByteOffsets) endLineIdx, endLocalByte := m.findLineForByteOffset(endByte, lineByteOffsets) if startLineIdx >= 0 && startLineIdx < len(m.items) { startRuneIdx := m.items[startLineIdx].getRuneIndexAtByteOffset(startLocalByte) if startRuneIdx > 0 { startWidth = int(m.items[startLineIdx].getCumulativeWidthAtRuneIdx(startRuneIdx - 1)) } startWidth += lineWidthOffsets[startLineIdx] } if endLineIdx >= 0 && endLineIdx < len(m.items) { endRuneIdx := m.items[endLineIdx].getRuneIndexAtByteOffset(endLocalByte) if endRuneIdx > 0 { endWidth = int(m.items[endLineIdx].getCumulativeWidthAtRuneIdx(endRuneIdx - 1)) } endWidth += lineWidthOffsets[endLineIdx] } return } // NumLineBrokenItems returns the number of line-broken items. func (m MultiLineItem) NumLineBrokenItems() int { return len(m.items) } // LineBrokenItem returns the line-broken item at the given index. func (m MultiLineItem) LineBrokenItem(idx int) SingleItem { return m.items[idx] } // String returns the content for fmt.Stringer compatibility. func (m MultiLineItem) String() string { return fmt.Sprintf("MultiLineItem{lines=%d, width=%d}", len(m.items), m.totalWidth) } ================================================ FILE: modules/viewport/item/multiline_test.go ================================================ package item import ( "reflect" "regexp" "strings" "testing" ) func TestMultiLineItem_Width(t *testing.T) { tests := []struct { name string items []SingleItem expected int }{ { name: "empty", items: nil, expected: 0, }, { name: "single item", items: []SingleItem{NewItem("hello")}, expected: 5, }, { name: "two items", items: []SingleItem{NewItem("hello"), NewItem("world")}, expected: 10, }, { name: "item with empty line", items: []SingleItem{NewItem("hello"), NewItem(""), NewItem("world")}, expected: 10, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) if actual := m.Width(); actual != tt.expected { t.Errorf("expected width %d, got %d", tt.expected, actual) } }) } } func TestMultiLineItem_Content(t *testing.T) { tests := []struct { name string items []SingleItem expected string }{ { name: "empty", items: nil, expected: "", }, { name: "single item", items: []SingleItem{NewItem("hello")}, expected: "hello", }, { name: "two items joined with newline", items: []SingleItem{NewItem("hello"), NewItem("world")}, expected: "hello\nworld", }, { name: "three items with empty middle", items: []SingleItem{NewItem("a"), NewItem(""), NewItem("b")}, expected: "a\n\nb", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) if actual := m.Content(); actual != tt.expected { t.Errorf("expected content %q, got %q", tt.expected, actual) } if actual := m.ContentNoAnsi(); actual != tt.expected { t.Errorf("expected contentNoAnsi %q, got %q", tt.expected, actual) } }) } } func TestMultiLineItem_NumWrappedLines(t *testing.T) { tests := []struct { name string items []SingleItem wrapWidth int expected int }{ { name: "empty items", items: nil, wrapWidth: 10, expected: 1, }, { name: "single short item", items: []SingleItem{NewItem("hello")}, wrapWidth: 10, expected: 1, }, { name: "single item wraps", items: []SingleItem{NewItem("hello world")}, wrapWidth: 5, expected: 3, }, { name: "two items no wrapping", items: []SingleItem{NewItem("hello"), NewItem("world")}, wrapWidth: 10, expected: 2, }, { name: "two items both wrap", items: []SingleItem{NewItem("hello world"), NewItem("foo bar baz")}, wrapWidth: 5, expected: 6, // 3 + 3 }, { name: "item with empty line", items: []SingleItem{NewItem("hello"), NewItem(""), NewItem("world")}, wrapWidth: 10, expected: 3, // 1 + 1 (empty) + 1 }, { name: "zero wrap width", items: []SingleItem{NewItem("hello")}, wrapWidth: 0, expected: 0, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) if actual := m.NumWrappedLines(tt.wrapWidth); actual != tt.expected { t.Errorf("expected %d wrapped lines, got %d", tt.expected, actual) } }) } } func TestMultiLineItem_LineBrokenItems(t *testing.T) { items := []SingleItem{NewItem("hello"), NewItem("world")} m := NewMultiLineItem(items...) broken := m.LineBrokenItems() if len(broken) != 2 { t.Fatalf("expected 2 line-broken items, got %d", len(broken)) } if broken[0].Content() != "hello" { t.Errorf("expected first item content 'hello', got %q", broken[0].Content()) } if broken[1].Content() != "world" { t.Errorf("expected second item content 'world', got %q", broken[1].Content()) } } func TestMultiLineItem_Take_Panics(t *testing.T) { m := NewMultiLineItem(NewItem("hello"), NewItem("world")) defer func() { if r := recover(); r == nil { t.Error("expected Take() to panic on MultiLineItem, but it didn't") } }() m.Take(0, 10, "", nil) } func TestMultiLineItem_ExtractExactMatches(t *testing.T) { tests := []struct { name string items []SingleItem exactMatch string expected []Match }{ { name: "no match", items: []SingleItem{NewItem("hello"), NewItem("world")}, exactMatch: "xyz", expected: nil, }, { name: "match in first item", items: []SingleItem{NewItem("hello"), NewItem("world")}, exactMatch: "hello", expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 5}, WidthRange: WidthRange{Start: 0, End: 5}, }, }, }, { name: "match in second item", items: []SingleItem{NewItem("hello"), NewItem("world")}, exactMatch: "world", expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, // "hello\n" = 6 bytes offset WidthRange: WidthRange{Start: 5, End: 10}, // width offset = 5 (width of "hello") }, }, }, { name: "match spanning newline", items: []SingleItem{NewItem("hello"), NewItem("world")}, exactMatch: "o\nw", expected: []Match{ { ByteRange: ByteRange{Start: 4, End: 7}, WidthRange: WidthRange{Start: 4, End: 6}, // "o" width=1 at offset 4, "\n" not counted, "w" at offset 5+0=5, end at 5+1=6 }, }, }, { name: "empty match", items: []SingleItem{NewItem("hello")}, exactMatch: "", expected: nil, }, { name: "single item delegates", items: []SingleItem{NewItem("hello world")}, exactMatch: "world", expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) actual := m.ExtractExactMatches(tt.exactMatch) if !reflect.DeepEqual(actual, tt.expected) { t.Errorf("expected %v, got %v", tt.expected, actual) } }) } } func TestMultiLineItem_ExtractRegexMatches(t *testing.T) { tests := []struct { name string items []SingleItem pattern string expected []Match }{ { name: "simple match", items: []SingleItem{NewItem("hello"), NewItem("world")}, pattern: "world", expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 5, End: 10}, }, }, }, { name: "match in multiple items", items: []SingleItem{NewItem("abc"), NewItem("abcd")}, pattern: "abc", expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 3}, WidthRange: WidthRange{Start: 0, End: 3}, }, { ByteRange: ByteRange{Start: 4, End: 7}, WidthRange: WidthRange{Start: 3, End: 6}, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) actual := m.ExtractRegexMatches(regexp.MustCompile(tt.pattern)) if !reflect.DeepEqual(actual, tt.expected) { t.Errorf("expected %v, got %v", tt.expected, actual) } }) } } func TestMultiLineItem_Repr(t *testing.T) { m := NewMultiLineItem(NewItem("a"), NewItem("b")) repr := m.repr() if repr != `MultiLine(Item("a"), Item("b"))` { t.Errorf("unexpected repr: %s", repr) } } func TestMultiLineItem_ByteRangesToMatches(t *testing.T) { tests := []struct { name string items []SingleItem byteRanges []ByteRange expected []Match }{ { name: "nil byte ranges", items: []SingleItem{NewItem("hello"), NewItem("world")}, byteRanges: nil, expected: nil, }, { name: "empty byte ranges", items: []SingleItem{NewItem("hello"), NewItem("world")}, byteRanges: []ByteRange{}, expected: nil, }, { name: "empty items", items: []SingleItem{}, byteRanges: []ByteRange{{Start: 0, End: 5}}, expected: nil, }, { name: "single item delegates to SingleItem", items: []SingleItem{NewItem("hello world")}, byteRanges: []ByteRange{ {Start: 6, End: 11}, }, expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, { name: "range in first item", items: []SingleItem{NewItem("hello"), NewItem("world")}, // ContentNoAnsi = "hello\nworld" byteRanges: []ByteRange{ {Start: 0, End: 5}, // "hello" }, expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 5}, WidthRange: WidthRange{Start: 0, End: 5}, }, }, }, { name: "range in second item", items: []SingleItem{NewItem("hello"), NewItem("world")}, // ContentNoAnsi = "hello\nworld", "world" starts at byte 6 byteRanges: []ByteRange{ {Start: 6, End: 11}, // "world" }, expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 5, End: 10}, // width offset = 5 (width of "hello") }, }, }, { name: "range spanning newline", items: []SingleItem{NewItem("hello"), NewItem("world")}, // ContentNoAnsi = "hello\nworld", "o\nw" = bytes 4-7 byteRanges: []ByteRange{ {Start: 4, End: 7}, // "o\nw" }, expected: []Match{ { ByteRange: ByteRange{Start: 4, End: 7}, WidthRange: WidthRange{Start: 4, End: 6}, // "o" ends at width 5, "w" starts at width 5, ends at 6 }, }, }, { name: "multiple ranges across items", items: []SingleItem{NewItem("abc"), NewItem("def")}, // ContentNoAnsi = "abc\ndef" byteRanges: []ByteRange{ {Start: 0, End: 3}, // "abc" {Start: 4, End: 7}, // "def" }, expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 3}, WidthRange: WidthRange{Start: 0, End: 3}, }, { ByteRange: ByteRange{Start: 4, End: 7}, WidthRange: WidthRange{Start: 3, End: 6}, }, }, }, { name: "three items with match in middle", items: []SingleItem{NewItem("aaa"), NewItem("bbb"), NewItem("ccc")}, // ContentNoAnsi = "aaa\nbbb\nccc", "bbb" starts at byte 4 byteRanges: []ByteRange{ {Start: 4, End: 7}, // "bbb" }, expected: []Match{ { ByteRange: ByteRange{Start: 4, End: 7}, WidthRange: WidthRange{Start: 3, End: 6}, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) actual := m.ByteRangesToMatches(tt.byteRanges) if !reflect.DeepEqual(actual, tt.expected) { t.Errorf("expected %v, got %v", tt.expected, actual) } }) } } // TestMultiLineItem_ByteRangesToMatches_ConsistentWithExtract verifies that // ByteRangesToMatches and ExtractExactMatches produce the same results. func TestMultiLineItem_ByteRangesToMatches_ConsistentWithExtract(t *testing.T) { tests := []struct { name string items []SingleItem query string }{ { name: "match in first line", items: []SingleItem{NewItem("hello world"), NewItem("foo bar")}, query: "hello", }, { name: "match in second line", items: []SingleItem{NewItem("hello"), NewItem("world")}, query: "world", }, { name: "match in multiple lines", items: []SingleItem{NewItem("abc"), NewItem("abcd")}, query: "abc", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { m := NewMultiLineItem(tt.items...) // Get matches via ExtractExactMatches exactMatches := m.ExtractExactMatches(tt.query) // Manually find byte ranges in ContentNoAnsi content := m.ContentNoAnsi() var byteRanges []ByteRange start := 0 for { idx := strings.Index(content[start:], tt.query) if idx == -1 { break } actualStart := start + idx end := actualStart + len(tt.query) byteRanges = append(byteRanges, ByteRange{Start: actualStart, End: end}) start = end } // Get matches via ByteRangesToMatches brMatches := m.ByteRangesToMatches(byteRanges) if !reflect.DeepEqual(exactMatches, brMatches) { t.Errorf("ExtractExactMatches=%+v, ByteRangesToMatches=%+v", exactMatches, brMatches) } }) } } ================================================ FILE: modules/viewport/item/safecast.go ================================================ package item import ( "math" ) // clampIntToUint8 safely converts an int to uint8, clamping to valid range func clampIntToUint8(val int) uint8 { if val < 0 { return 0 } if val > math.MaxUint8 { return math.MaxUint8 } return uint8(val) } // clampIntToUint32 safely converts an int to uint32, clamping to valid range func clampIntToUint32(val int) uint32 { if val < 0 { return 0 } if uint64(val) > math.MaxUint32 { return math.MaxUint32 } return uint32(val) } ================================================ FILE: modules/viewport/item/safecast_test.go ================================================ package item import ( "math" "testing" ) func TestClampIntToUint8(t *testing.T) { tests := []struct { name string val int want uint8 }{ {"zero", 0, 0}, {"positive in range", 100, 100}, {"max uint8", math.MaxUint8, math.MaxUint8}, {"above max uint8", math.MaxUint8 + 1, math.MaxUint8}, {"large positive", math.MaxInt, math.MaxUint8}, {"negative", -1, 0}, {"large negative", math.MinInt, 0}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if got := clampIntToUint8(tt.val); got != tt.want { t.Errorf("clampIntToUint8(%d) = %d, want %d", tt.val, got, tt.want) } }) } } func TestClampIntToUint32(t *testing.T) { tests := []struct { name string val int want uint32 }{ {"zero", 0, 0}, {"positive in range", 100, 100}, {"max uint32 - 1", math.MaxUint32 - 1, math.MaxUint32 - 1}, {"negative", -1, 0}, {"large negative", math.MinInt, 0}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if got := clampIntToUint32(tt.val); got != tt.want { t.Errorf("clampIntToUint32(%d) = %d, want %d", tt.val, got, tt.want) } }) } } // TestClampIntToUint32_aboveMaxOnBigPlatforms tests the upper clamp on 64-bit platforms // where int can exceed math.MaxUint32. On 32-bit platforms, int cannot exceed // math.MaxUint32, so this case is only exercised where math.MaxInt > math.MaxUint32. func TestClampIntToUint32_aboveMaxOnBigPlatforms(t *testing.T) { if math.MaxInt <= math.MaxUint32 { t.Skip("int is 32-bit on this platform; values cannot exceed math.MaxUint32") } tests := []struct { name string val int want uint32 }{ {"max uint32", math.MaxUint32, math.MaxUint32}, {"above max uint32", math.MaxUint32 + 1, math.MaxUint32}, {"large positive", math.MaxInt, math.MaxUint32}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if got := clampIntToUint32(tt.val); got != tt.want { t.Errorf("clampIntToUint32(%d) = %d, want %d", tt.val, got, tt.want) } }) } } ================================================ FILE: modules/viewport/item/single.go ================================================ package item import ( "fmt" "regexp" "strings" "unicode/utf8" "github.com/clipperhouse/displaywidth" ) // SingleItem provides functionality to get sequential strings of a specified terminal cell width, accounting // for the ansi escape codes styling the line. type SingleItem struct { line string // underlying string with ansi codes. utf-8 encoded bytes lineNoAnsi string // line without ansi codes. utf-8 encoded bytes lineNoAnsiRuneWidths []uint8 // packed terminal cell widths, 4 widths per byte (2 bits each) ansiCodeIndexes [][]uint32 // slice of startByte, endByte indexes of ansi codes numNoAnsiRunes int // number of runes in lineNoAnsi totalWidth int // total width in terminal cells fillStyle string // ANSI code to use when filling remaining width (emulates \x1b[K]) sparsity int // interval for which to store cumulative cell width sparseRuneIdxToNoAnsiByteOffset []uint32 // rune idx to byte offset of lineNoAnsi, stored every sparsity runes sparseLineNoAnsiCumRuneWidths []uint32 // cumulative terminal cell width, stored every sparsity runes } // type assertion that SingleItem implements Item var _ Item = SingleItem{} // type assertion that *SingleItem implements Item var _ Item = (*SingleItem)(nil) // extractEraseInLineFillStyle finds \x1b[K or \x1b[0K in the line and returns // the ANSI style code immediately before it (the style the terminal would use // to fill). returns "" if no erase sequence is found or the preceding code is // a reset (meaning fill uses default background). func extractEraseInLineFillStyle(line string) string { pos := strings.Index(line, "\x1b[0K") if pos == -1 { pos = strings.Index(line, "\x1b[K") } if pos == -1 { return "" } // find the last \x1b[...m before the erase sequence prefix := line[:pos] lastEsc := strings.LastIndex(prefix, "\x1b[") if lastEsc == -1 { return "" } mIdx := strings.IndexByte(prefix[lastEsc:], 'm') if mIdx == -1 { return "" } code := prefix[lastEsc : lastEsc+mIdx+1] if isResetCode(code) { return "" } return code } // NewItem creates a new SingleItem from the given string. func NewItem(line string) SingleItem { // \x1b[K and \x1b[0K tell the terminal to fill from cursor to end of line // with the current background color. we can't preserve them as-is because // the viewport's render() pads every line to a fixed width with lipgloss, // and those plain padding spaces overwrite the \x1b[K fill. instead, strip // them and record the ANSI style active at that position, then in Take() // append styled padding spaces to emulate the fill. fillStyle := extractEraseInLineFillStyle(line) if fillStyle != "" || strings.Contains(line, "\x1b[K") || strings.Contains(line, "\x1b[0K") { line = strings.ReplaceAll(line, "\x1b[0K", "") line = strings.ReplaceAll(line, "\x1b[K", "") } line = stripNonSGR(line) if len(line) <= 0 { return SingleItem{line: line, fillStyle: fillStyle} } // keep sparsity small for short lines sparsity := 4 if len(line) > 100 { sparsity = 10 // tradeoff between memory usage and CPU. 10 seems to be a good balance } item := SingleItem{ line: line, sparsity: sparsity, fillStyle: fillStyle, } item.ansiCodeIndexes = findAnsiByteRanges(line) if len(item.ansiCodeIndexes) > 0 { totalLen := len(line) for _, r := range item.ansiCodeIndexes { totalLen -= int(r[1] - r[0]) } noAnsiBytes := make([]byte, 0, totalLen) lastPos := 0 for _, r := range item.ansiCodeIndexes { noAnsiBytes = append(noAnsiBytes, line[lastPos:int(r[0])]...) lastPos = int(r[1]) } noAnsiBytes = append(noAnsiBytes, line[lastPos:]...) item.lineNoAnsi = string(noAnsiBytes) } else { item.lineNoAnsi = line } numRunes := utf8.RuneCountInString(item.lineNoAnsi) // calculate size needed for sparse cumulative widths sparseLen := (numRunes + item.sparsity - 1) / item.sparsity item.sparseRuneIdxToNoAnsiByteOffset = make([]uint32, sparseLen) item.sparseLineNoAnsiCumRuneWidths = make([]uint32, sparseLen) // calculate size needed for packed rune widths (4 widths per byte) packedLen := (numRunes + 3) / 4 item.lineNoAnsiRuneWidths = make([]uint8, packedLen) var currentOffset uint32 var cumWidth uint32 runeIdx := 0 for byteOffset := 0; byteOffset < len(item.lineNoAnsi); { r, runeNumBytes := utf8.DecodeRuneInString(item.lineNoAnsi[byteOffset:]) rw := displaywidth.Rune(r) width := clampIntToUint8(rw) // pack 4 widths per byte (2 bits each) packedIdx := runeIdx / 4 bitPos := (runeIdx % 4) * 2 // clear the 2 bits at the position and set the new width item.lineNoAnsiRuneWidths[packedIdx] &= ^(uint8(3) << bitPos) item.lineNoAnsiRuneWidths[packedIdx] |= width << bitPos cumWidth += uint32(width) if runeIdx%item.sparsity == 0 { item.sparseRuneIdxToNoAnsiByteOffset[runeIdx/item.sparsity] = currentOffset item.sparseLineNoAnsiCumRuneWidths[runeIdx/item.sparsity] = cumWidth } if runeIdx == numRunes-1 { item.totalWidth = int(cumWidth) } currentOffset += clampIntToUint32(runeNumBytes) runeIdx++ byteOffset += runeNumBytes } item.numNoAnsiRunes = runeIdx return item } // Width returns the total width in terminal cells. func (l SingleItem) Width() int { if len(l.line) == 0 { return 0 } return l.totalWidth } // Content returns the underlying string content func (l SingleItem) Content() string { return l.line } // ContentNoAnsi returns the underlying string content without ANSI escape codes func (l SingleItem) ContentNoAnsi() string { return l.lineNoAnsi } // Take returns a substring of the item that fits within the specified width func (l SingleItem) Take( widthToLeft, takeWidth int, continuation string, highlights []Highlight, ) (string, int) { if widthToLeft < 0 { widthToLeft = 0 } widthToLeft = min(widthToLeft, l.Width()) startRuneIdx := l.findRuneIndexWithWidthToLeft(widthToLeft) if startRuneIdx >= l.numNoAnsiRunes || takeWidth == 0 { if l.fillStyle != "" && takeWidth > 0 { // content is empty but fill is requested — produce styled padding return l.fillStyle + strings.Repeat(" ", takeWidth) + RST, takeWidth } return "", 0 } var result strings.Builder remainingWidth := takeWidth leftRuneIdx := startRuneIdx startByteOffset := l.getByteOffsetAtRuneIdx(startRuneIdx) runesWritten := 0 for ; remainingWidth > 0 && leftRuneIdx < l.numNoAnsiRunes; leftRuneIdx++ { r := l.runeAt(leftRuneIdx) runeWidth := l.getRuneWidth(leftRuneIdx) if int(runeWidth) > remainingWidth { break } result.WriteRune(r) runesWritten++ remainingWidth -= int(runeWidth) } // if only zero-width runes were written, return "" for i := 0; i < runesWritten; i++ { if displaywidth.Rune(l.runeAt(startRuneIdx+i)) > 0 { break } if i == runesWritten-1 { return "", 0 } } // write the subsequent zero-width runes, e.g. the accent on an 'e' if result.Len() > 0 { for ; leftRuneIdx < l.numNoAnsiRunes; leftRuneIdx++ { r := l.runeAt(leftRuneIdx) if displaywidth.Rune(r) == 0 { result.WriteRune(r) } else { break } } } res := result.String() // reapply original styling if len(l.ansiCodeIndexes) > 0 { res = reapplyAnsi(l.line, res, int(startByteOffset), l.ansiCodeIndexes) } // highlight the desired string var endByteOffset int if leftRuneIdx < l.numNoAnsiRunes { endByteOffset = int(l.getByteOffsetAtRuneIdx(leftRuneIdx)) } else { endByteOffset = len(l.lineNoAnsi) } res = highlightString( res, highlights, int(startByteOffset), endByteOffset, ) // apply left/right line continuation indicators if len(continuation) > 0 && (startRuneIdx > 0 || leftRuneIdx < l.numNoAnsiRunes) { continuationRunes := []rune(continuation) // if more runes to the left of the result, replace start runes with continuation indicator if startRuneIdx > 0 { res = replaceStartWithContinuation(res, continuationRunes) } // if more runes to the right, replace final runes in result with continuation indicator if leftRuneIdx < l.numNoAnsiRunes { res = replaceEndWithContinuation(res, continuationRunes) } } // emulate \x1b[K: append padding spaces styled with the ANSI code that // was active at the \x1b[K position in the original line. we use explicit // styled spaces rather than re-emitting \x1b[K because render() pads // lines via lipgloss.Width(), and those unstyled spaces would overwrite // the fill. if l.fillStyle != "" && remainingWidth > 0 { res += l.fillStyle + strings.Repeat(" ", remainingWidth) + RST remainingWidth = 0 } res = removeEmptyAnsiSequences(res) return res, takeWidth - remainingWidth } // NumWrappedLines returns the number of wrapped lines given a wrap width func (l SingleItem) NumWrappedLines(wrapWidth int) int { if wrapWidth <= 0 { return 0 } else if l.totalWidth == 0 { return 1 } return (l.totalWidth + wrapWidth - 1) / wrapWidth } // LineBrokenItems returns a slice containing just this item (single-line). func (l SingleItem) LineBrokenItems() []Item { return []Item{l} } // Repr returns a string representation for debugging. func (l SingleItem) repr() string { return fmt.Sprintf("Item(%q)", l.line) } // runeAt decodes the desired rune from the lineNoAnsi string // it serves as a memory-saving technique compared to storing all the runes in a slice func (l SingleItem) runeAt(runeIdx int) rune { if runeIdx < 0 || runeIdx >= l.numNoAnsiRunes { return -1 } start := l.getByteOffsetAtRuneIdx(runeIdx) var end uint32 if runeIdx+1 >= l.numNoAnsiRunes { end = clampIntToUint32(len(l.lineNoAnsi)) } else { end = l.getByteOffsetAtRuneIdx(runeIdx + 1) } r, _ := utf8.DecodeRuneInString(l.lineNoAnsi[start:end]) return r } func (l SingleItem) getByteOffsetAtRuneIdx(runeIdx int) uint32 { if runeIdx < 0 { panic("runeIdx must be greater or equal to 0") } if runeIdx == 0 || len(l.line) == 0 || l.sparsity == 0 { return 0 } if runeIdx >= l.numNoAnsiRunes { panic("rune index greater than num runes") } // get the last stored byte offset before this index sparseIdx := runeIdx / l.sparsity baseRuneIdx := sparseIdx * l.sparsity if baseRuneIdx == runeIdx { return l.sparseRuneIdxToNoAnsiByteOffset[sparseIdx] } currRuneIdx := baseRuneIdx byteOffset := l.sparseRuneIdxToNoAnsiByteOffset[sparseIdx] for ; currRuneIdx != runeIdx; currRuneIdx++ { _, nBytes := utf8.DecodeRuneInString(l.lineNoAnsi[byteOffset:]) byteOffset += clampIntToUint32(nBytes) } return byteOffset } // getRuneIndexAtByteOffset finds the rune index at the given byte offset func (l SingleItem) getRuneIndexAtByteOffset(byteOffset int) int { if byteOffset <= 0 || len(l.lineNoAnsi) == 0 { return 0 } if byteOffset >= len(l.lineNoAnsi) { return l.numNoAnsiRunes } // binary search to find the rune index left, right := 0, l.numNoAnsiRunes-1 for left <= right { mid := left + (right-left)/2 midByteOffset := int(l.getByteOffsetAtRuneIdx(mid)) if midByteOffset == byteOffset { return mid } else if midByteOffset < byteOffset { left = mid + 1 } else { right = mid - 1 } } // if exact match not found, return the rune index where byteOffset would fall return right } // getRuneWidth extracts the width of a rune from the packed array func (l SingleItem) getRuneWidth(runeIdx int) uint8 { if runeIdx < 0 || runeIdx >= l.numNoAnsiRunes { return 0 } packedIdx := runeIdx / 4 bitPos := (runeIdx % 4) * 2 return (l.lineNoAnsiRuneWidths[packedIdx] >> bitPos) & 3 } func (l SingleItem) getCumulativeWidthAtRuneIdx(runeIdx int) uint32 { if runeIdx < 0 { return 0 } if runeIdx >= l.numNoAnsiRunes { panic("runeIdx greater than num runes") } // get the last stored cumulative width before this index sparseIdx := runeIdx / l.sparsity baseRuneIdx := sparseIdx * l.sparsity if baseRuneIdx == runeIdx { return l.sparseLineNoAnsiCumRuneWidths[sparseIdx] } // sum the widths from the last stored point to our target index var additionalWidth uint32 for i := baseRuneIdx + 1; i <= runeIdx; i++ { additionalWidth += uint32(l.getRuneWidth(i)) } return l.sparseLineNoAnsiCumRuneWidths[sparseIdx] + additionalWidth } // findRuneIndexWithWidthToLeft returns the index of the rune that has the input width to the left of it func (l SingleItem) findRuneIndexWithWidthToLeft(widthToLeft int) int { if widthToLeft < 0 { panic("widthToLeft less than 0") } if widthToLeft == 0 || l.numNoAnsiRunes == 0 { return 0 } if widthToLeft > l.Width() { panic("widthToLeft greater than total width") } left, right := 0, l.numNoAnsiRunes-1 widthToLeftUint32 := clampIntToUint32(widthToLeft) if l.getCumulativeWidthAtRuneIdx(right) < widthToLeftUint32 { return l.numNoAnsiRunes } for left < right { mid := left + (right-left)/2 if l.getCumulativeWidthAtRuneIdx(mid) >= widthToLeftUint32 { right = mid } else { left = mid + 1 } } // skip over zero-width runes w := l.getCumulativeWidthAtRuneIdx(left) nextLeft := left + 1 for nextLeft < l.numNoAnsiRunes && l.getCumulativeWidthAtRuneIdx(nextLeft) == w { left = nextLeft nextLeft++ } return left + 1 } // ByteRangesToMatches converts byte ranges in the ANSI-stripped content to Matches. func (l SingleItem) ByteRangesToMatches(byteRanges []ByteRange) []Match { if len(byteRanges) == 0 { return nil } matches := make([]Match, 0, len(byteRanges)) for _, br := range byteRanges { startWidth, endWidth := l.byteRangeToWidthRange(br.Start, br.End) matches = append(matches, Match{ ByteRange: br, WidthRange: WidthRange{Start: startWidth, End: endWidth}, }) } return matches } // byteRangeToWidthRange converts a byte range to a width range for a SingleItem. func (l SingleItem) byteRangeToWidthRange(startByte, endByte int) (startWidth, endWidth int) { startRuneIdx := l.getRuneIndexAtByteOffset(startByte) endRuneIdx := l.getRuneIndexAtByteOffset(endByte) if startRuneIdx > 0 { startWidth = int(l.getCumulativeWidthAtRuneIdx(startRuneIdx - 1)) } if endRuneIdx > 0 { endWidth = int(l.getCumulativeWidthAtRuneIdx(endRuneIdx - 1)) } return } // ExtractExactMatches extracts exact matches from the item's content without ANSI codes func (l SingleItem) ExtractExactMatches(exactMatch string) []Match { if exactMatch == "" { return nil } unstyled := l.lineNoAnsi var byteRanges []ByteRange startIndex := 0 for { foundIndex := strings.Index(unstyled[startIndex:], exactMatch) if foundIndex == -1 { break } actualStartIndex := startIndex + foundIndex endIndex := actualStartIndex + len(exactMatch) byteRanges = append(byteRanges, ByteRange{Start: actualStartIndex, End: endIndex}) startIndex = endIndex // overlapping matches are not considered } return l.ByteRangesToMatches(byteRanges) } // ExtractRegexMatches extracts regex matches from the item's content without ANSI codes func (l SingleItem) ExtractRegexMatches(regex *regexp.Regexp) []Match { regexMatches := regex.FindAllStringIndex(l.lineNoAnsi, -1) if len(regexMatches) == 0 { return nil } byteRanges := make([]ByteRange, 0, len(regexMatches)) for _, rm := range regexMatches { byteRanges = append(byteRanges, ByteRange{Start: rm[0], End: rm[1]}) } return l.ByteRangesToMatches(byteRanges) } ================================================ FILE: modules/viewport/item/single_test.go ================================================ package item import ( "regexp" "strings" "testing" "github.com/antgroup/hugescm/modules/viewport/internal" "charm.land/lipgloss/v2" ) func TestSingle_Width(t *testing.T) { tests := []struct { name string s string expected int }{ { name: "empty", s: "", expected: 0, }, { name: "simple", s: "1234567890", expected: 10, }, { name: "unicode", s: "世界🌟世界a", expected: 11, }, { name: "ansi", s: "\x1b[38;2;255;0;0mhi" + RST, expected: 2, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.s) if actual := item.Width(); actual != tt.expected { t.Errorf("expected %d, got %d", tt.expected, actual) } }) } } func TestSingle_Content(t *testing.T) { tests := []struct { name string s string expected string }{ { name: "empty", s: "", expected: "", }, { name: "simple", s: "1234567890", expected: "1234567890", }, { name: "unicode", s: "世界🌟世界", expected: "世界🌟世界", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.s) if actual := item.Content(); actual != tt.expected { t.Errorf("expected %s, got %s", tt.expected, actual) } }) } } func TestSingle_Take(t *testing.T) { tests := []struct { name string s string width int continuation string toHighlight string highlightStyle lipgloss.Style startWidth int numTakes int expected []string }{ { name: "empty", s: "", width: 10, continuation: "", startWidth: 0, numTakes: 1, expected: []string{""}, }, { name: "simple", s: "1234567890", width: 10, continuation: "", startWidth: 0, numTakes: 1, expected: []string{"1234567890"}, }, { name: "negative widthToLeft", s: "1234567890", width: 10, continuation: "", startWidth: -1, numTakes: 1, expected: []string{"1234567890"}, }, { name: "seek", s: "1234567890", width: 10, continuation: "", startWidth: 3, numTakes: 1, expected: []string{"4567890"}, }, { name: "seek to end", s: "1234567890", width: 10, continuation: "", startWidth: 10, numTakes: 1, expected: []string{""}, }, { name: "seek past end", s: "1234567890", width: 10, continuation: "", startWidth: 11, numTakes: 1, expected: []string{""}, }, { name: "continuation", s: "1234567890", width: 7, continuation: "...", startWidth: 2, numTakes: 1, expected: []string{"...6..."}, }, { name: "continuation past end", s: "1234567890", width: 10, continuation: "...", startWidth: 11, numTakes: 1, expected: []string{""}, }, { name: "unicode", s: "世界🌟世界🌟", width: 10, continuation: "", startWidth: 0, numTakes: 1, expected: []string{"世界🌟世界"}, }, { name: "unicode seek past first rune", s: "世界🌟世界🌟", width: 10, continuation: "", startWidth: 2, numTakes: 1, expected: []string{"界🌟世界🌟"}, }, { name: "unicode seek past first 2 runes", s: "世界🌟世界🌟", width: 10, continuation: "", startWidth: 3, numTakes: 1, expected: []string{"🌟世界🌟"}, }, { name: "unicode seek past all but 1 rune", s: "世界🌟世界🌟", width: 10, continuation: "", startWidth: 10, numTakes: 1, expected: []string{"🌟"}, }, { name: "unicode seek almost to end", s: "世界🌟世界🌟", width: 10, continuation: "", startWidth: 11, numTakes: 1, expected: []string{""}, }, { name: "unicode seek to end", s: "世界🌟世界🌟", width: 10, continuation: "", startWidth: 12, numTakes: 1, expected: []string{""}, }, { name: "unicode insufficient width", s: "世界🌟世界🌟", width: 1, continuation: "", startWidth: 2, numTakes: 1, expected: []string{""}, }, { name: "no ansi, no continuation, no width", s: "12345678901234", width: 0, continuation: "", numTakes: 3, expected: []string{ "", "", "", }, }, { name: "no ansi, continuation, no width", s: "12345678901234", width: 0, continuation: "...", numTakes: 3, expected: []string{ "", "", "", }, }, { name: "no ansi, no continuation, width 1", s: "12345678901234", width: 1, continuation: "", numTakes: 3, expected: []string{ "1", "2", "3", }, }, { name: "no ansi, continuation, width 1", s: "12345678901234", width: 1, continuation: "...", numTakes: 3, expected: []string{ ".", ".", ".", }, }, { name: "no ansi, no continuation", s: "12345678901234", width: 5, continuation: "", numTakes: 4, expected: []string{ "12345", "67890", "1234", "", }, }, { name: "no ansi, continuation", s: "12345678901234", width: 5, continuation: "...", numTakes: 4, expected: []string{ "12...", ".....", "...4", "", }, }, { name: "no ansi, no continuation", s: "12345678901234", width: 5, continuation: "", numTakes: 4, expected: []string{ "12345", "67890", "1234", "", }, }, { name: "no ansi, continuation", s: "12345678901234", width: 5, continuation: "...", numTakes: 4, expected: []string{ "12...", ".....", "...4", "", }, }, { name: "double width unicode, no continuation, no width", s: "世界🌟", // each of these takes up 2 terminal cells width: 0, continuation: "", numTakes: 3, expected: []string{ "", "", "", }, }, { name: "double width unicode, continuation, no width", s: "世界🌟", // each of these takes up 2 terminal cells width: 0, continuation: "...", numTakes: 3, expected: []string{ "", "", "", }, }, { name: "double width unicode, no continuation, width 1", s: "世界🌟", // each of these takes up 2 terminal cells width: 1, continuation: "", numTakes: 3, expected: []string{ "", "", "", }, }, { name: "double width unicode, continuation, width 1", s: "世界🌟", // each of these takes up 2 terminal cells width: 1, continuation: "...", numTakes: 3, expected: []string{ "", "", "", }, }, { name: "double width unicode, no continuation, width 2", s: "世界🌟", // each of these takes up 2 terminal cells width: 2, continuation: "", numTakes: 4, expected: []string{ "世", "界", "🌟", "", }, }, { name: "double width unicode, continuation, width 2", s: "世界🌟", // each of these takes up 2 terminal cells width: 2, continuation: "...", numTakes: 4, expected: []string{ "..", "..", "..", "", }, }, { name: "double width unicode, no continuation, width 3", s: "世界🌟", // each of these takes up 2 terminal cells width: 3, continuation: "", numTakes: 4, expected: []string{ "世", "界", "🌟", "", }, }, { name: "double width unicode, continuation, width 3", s: "世界🌟", // each of these takes up 2 terminal cells width: 3, continuation: "...", numTakes: 4, expected: []string{ "..", "..", "..", "", }, }, { name: "double width unicode, no continuation, width 4", s: "世界🌟", // each of these takes up 2 terminal cells width: 4, continuation: "", numTakes: 3, expected: []string{ "世界", "🌟", "", }, }, { name: "double width unicode, continuation, width 3", s: "世界🌟", // each of these takes up 2 terminal cells width: 4, continuation: "...", numTakes: 3, expected: []string{ "世..", "..", "", }, }, { name: "width equal to continuation", s: "1234567890", width: 3, continuation: "...", numTakes: 4, expected: []string{ "...", "...", "...", ".", }, }, { name: "width slightly bigger than continuation", s: "1234567890", width: 4, continuation: "...", numTakes: 3, expected: []string{ "1...", "....", "..", }, }, { name: "width double continuation 1", s: "123456789012345678", width: 6, continuation: "...", numTakes: 3, expected: []string{ "123...", "......", "...678", }, }, { name: "width double continuation 2", s: "1234567890123456789", width: 6, continuation: "...", numTakes: 4, expected: []string{ "123...", "......", "......", ".", }, }, { name: "small string", s: "hi", width: 3, continuation: "...", numTakes: 1, expected: []string{"hi"}, }, { name: "continuation longer than width", s: "1234567890123456789012345", width: 1, continuation: "...", numTakes: 1, expected: []string{"."}, }, { name: "twice the continuation longer than width", s: "1234567", width: 5, continuation: "...", numTakes: 1, expected: []string{"12..."}, }, { name: "sufficient width", s: "1234567890123456789012345", width: 30, continuation: "...", numTakes: 1, expected: []string{"1234567890123456789012345"}, }, { name: "sufficient width, space at end preserved", s: "1234567890123456789012345 ", width: 30, continuation: "...", numTakes: 1, expected: []string{"1234567890123456789012345 "}, }, { name: "insufficient width", s: "1234567890123456789012345", width: 15, continuation: "...", numTakes: 1, expected: []string{"123456789012..."}, }, { name: "insufficient width", s: "123456789012345678901234567890123456789012345", width: 15, continuation: "...", numTakes: 3, expected: []string{ "123456789012...", "...901234567...", "...456789012345", }, }, { name: "ansi simple, no continuation", s: "\x1b[38;2;255;0;0ma really really long line" + RST, width: 15, continuation: "", numTakes: 2, expected: []string{ "\x1b[38;2;255;0;0ma really really" + RST, "\x1b[38;2;255;0;0m long line" + RST, }, }, { name: "ansi simple, continuation", s: "\x1b[38;2;255;0;0m12345678901234567890123456789012345" + RST, width: 15, continuation: "...", numTakes: 3, expected: []string{ "\x1b[38;2;255;0;0m123456789012..." + RST, "\x1b[38;2;255;0;0m...901234567..." + RST, "\x1b[38;2;255;0;0m...45" + RST, }, }, { name: "inline ansi, no continuation", s: "\x1b[38;2;255;0;0ma" + RST + " really really long line", width: 15, continuation: "", numTakes: 2, expected: []string{ "\x1b[38;2;255;0;0ma" + RST + " really really", " long line", }, }, { name: "inline ansi, continuation", s: "|\x1b[38;2;169;15;15mfl..-1" + RST + "| {\"timestamp\": \"now\"}", width: 15, continuation: "...", numTakes: 3, expected: []string{ "|\x1b[38;2;169;15;15mfl..-1" + RST + "| {\"t...", "...mp\": \"now\"}", "", }, }, { name: "ansi short", s: "\x1b[38;2;0;0;255mhi" + RST, width: 3, continuation: "...", numTakes: 1, expected: []string{ "\x1b[38;2;0;0;255mhi" + RST, }, }, { name: "multi-byte runes", s: "├─flask", width: 6, continuation: "...", numTakes: 1, expected: []string{ "├─f...", }, }, { name: "multi-byte runes with ansi and continuation", s: "\x1b[38;2;0;0;255m├─flask" + RST, width: 6, continuation: "...", numTakes: 1, expected: []string{ "\x1b[38;2;0;0;255m├─f..." + RST, }, }, { name: "width exceeds capacity", s: " │ └─[ ] local-path-provisioner (running for 11d)", width: 53, continuation: "", numTakes: 1, expected: []string{ " │ └─[ ] local-path-provisioner (running for 11d)", }, }, { name: "toHighlight, no continuation, no overflow, no ansi", s: "a very normal log", width: 15, continuation: "", toHighlight: "very", highlightStyle: internal.RedBg, numTakes: 1, expected: []string{ "a " + internal.RedBg.Render("very") + " normal l", }, }, { name: "toHighlight, no continuation, no overflow, no ansi", s: "a very normal log", width: 15, continuation: "", toHighlight: "very", highlightStyle: internal.RedBg, numTakes: 1, expected: []string{ "a " + internal.RedBg.Render("very") + " normal l", }, }, { name: "toHighlight, continuation, no overflow, no ansi", s: "a very normal log", width: 15, continuation: "...", toHighlight: "l l", highlightStyle: internal.RedBg, numTakes: 1, expected: []string{ "a very norma\x1b[48;2;255;0;0m..." + RST, }, }, { name: "toHighlight, another continuation, no overflow, no ansi", s: "a very normal log", width: 15, continuation: "...", toHighlight: "very", highlightStyle: internal.RedBg, startWidth: 1, numTakes: 1, expected: []string{ ".\x1b[48;2;255;0;0m..ry" + RST + " normal...", }, }, { name: "toHighlight, no continuation, no overflow, no ansi, many matches", s: strings.Repeat("r", 10), width: 6, continuation: "", toHighlight: "r", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ strings.Repeat("\x1b[48;2;255;0;0mr"+RST+"", 6), strings.Repeat("\x1b[48;2;255;0;0mr"+RST+"", 4), }, }, { name: "toHighlight, no continuation, no overflow, ansi", s: "\x1b[38;2;0;0;255mhi \x1b[48;2;0;255;0mthere" + RST + " er", width: 15, continuation: "", toHighlight: "er", highlightStyle: internal.RedBg, numTakes: 1, expected: []string{ "\x1b[38;2;0;0;255mhi \x1b[48;2;0;255;0mth" + RST + "\x1b[48;2;255;0;0mer" + RST + "\x1b[38;2;0;0;255m\x1b[48;2;0;255;0me" + RST + " \x1b[48;2;255;0;0mer" + RST, }, }, { name: "toHighlight, no continuation, overflows left and right, no ansi", s: "hi there re", width: 6, continuation: "", toHighlight: "hi there", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ internal.RedBg.Render("hi the"), internal.RedBg.Render("re") + " re", }, }, { name: "toHighlight, no continuation, overflows left and right, ansi", s: "\x1b[38;2;0;0;255mhi there re" + RST, width: 6, continuation: "", toHighlight: "hi there", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ "\x1b[48;2;255;0;0mhi the" + RST, "\x1b[48;2;255;0;0mre" + RST + "\x1b[38;2;0;0;255m re" + RST, }, }, { name: "toHighlight, no continuation, another ansi", s: internal.RedBg.Render("hello") + " " + internal.BlueBg.Render("world"), width: 11, continuation: "", toHighlight: "lo wo", highlightStyle: internal.GreenBg, numTakes: 1, expected: []string{ internal.RedBg.Render("hel") + internal.GreenBg.Render("lo wo") + internal.BlueBg.Render("rld"), }, }, { name: "toHighlight, no continuation, overflows left and right one char, no ansi", s: "hi there re", width: 7, continuation: "", toHighlight: "hi there", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ internal.RedBg.Render("hi ther"), internal.RedBg.Render("e") + " re", }, }, { name: "unicode toHighlight, no continuation, no overflow, no ansi", s: "世界🌟世界🌟", width: 7, continuation: "", toHighlight: "世界", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ internal.RedBg.Render("世界") + "🌟", internal.RedBg.Render("世界") + "🌟", }, }, { name: "unicode toHighlight, no continuation, overflow, no ansi", s: "世界🌟世界🌟", width: 7, continuation: "", toHighlight: "世界🌟世", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ internal.RedBg.Render("世界🌟"), internal.RedBg.Render("世") + "界🌟", }, }, { name: "unicode toHighlight, no continuation, overflow, ansi", s: "\x1b[38;2;0;0;255m世界🌟世界🌟" + RST, width: 7, continuation: "", toHighlight: "世界🌟世", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ internal.RedBg.Render("世界🌟"), internal.RedBg.Render("世") + "\x1b[38;2;0;0;255m界🌟" + RST, }, }, { name: "unicode toHighlight, continuation, overflow, ansi", s: "\x1b[38;2;0;0;255m世界🌟世界🌟" + RST, width: 7, continuation: "...", toHighlight: "世界🌟世", highlightStyle: internal.RedBg, numTakes: 2, expected: []string{ "\x1b[48;2;255;0;0m世界.." + RST, "\x1b[48;2;255;0;0m.." + RST + "\x1b[38;2;0;0;255m界🌟" + RST, }, }, { name: "unicode with heart exact width", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b s: "A💖中é", width: 6, continuation: "", startWidth: 0, numTakes: 1, expected: []string{"A💖中é"}, }, { name: "unicode with heart start continuation", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b s: "A💖中é", width: 5, continuation: "...", startWidth: 1, numTakes: 1, expected: []string{"..中é"}, }, { name: "unicode with heart start continuation and ansi", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b s: internal.RedBg.Render("A💖") + "中é", width: 5, continuation: "...", startWidth: 1, numTakes: 1, expected: []string{internal.RedBg.Render("..") + "中é"}, }, { name: "unicode combining", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b s: "A💖中éA💖中é", // 12w total width: 10, continuation: "", numTakes: 2, expected: []string{ "A💖中éA💖", "中é", }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if len(tt.expected) != tt.numTakes { t.Fatalf("num expected != num popLefts") } item := NewItem(tt.s) startWidth := tt.startWidth byteRanges := item.ExtractExactMatches(tt.toHighlight) highlights := toHighlights(byteRanges, tt.highlightStyle) for i := 0; i < tt.numTakes; i++ { actual, actualWidth := item.Take(startWidth, tt.width, tt.continuation, highlights) internal.CmpStr(t, tt.expected[i], actual) startWidth += actualWidth } }) } } func TestSingle_Take_EraseInLine(t *testing.T) { greenBg := "\x1b[42m" redBg := "\x1b[41m" baseBg := "\x1b[48;2;0;40;0m" highlightBg := "\x1b[48;2;0;96;0m" tests := []struct { name string s string width int startWidth int expected string // expectedWidth is the width returned by Take; defaults to width if zero expectedWidth int }{ { name: "\\x1b[K pads with preceding style", s: greenBg + "+added" + "\x1b[K" + RST, width: 20, expected: greenBg + "+added" + RST + greenBg + strings.Repeat(" ", 14) + RST, }, { name: "\\x1b[0K pads with preceding style", s: redBg + "-removed" + "\x1b[0K" + RST, width: 20, expected: redBg + "-removed" + RST + redBg + strings.Repeat(" ", 12) + RST, }, { name: "uses fill style not content style", s: baseBg + "text" + highlightBg + "hl" + RST + baseBg + "\x1b[0K" + RST, width: 20, expected: baseBg + "text" + highlightBg + "hl" + RST + baseBg + strings.Repeat(" ", 14) + RST, }, { name: "no padding when content fills width", s: greenBg + "1234567890" + "\x1b[K" + RST, width: 10, expected: greenBg + "1234567890" + RST, }, { name: "no padding without \\x1b[K", s: greenBg + "+added" + RST, width: 20, expected: greenBg + "+added" + RST, expectedWidth: 6, }, { name: "pads when scrolled right", s: greenBg + "+added line" + "\x1b[K" + RST, width: 20, startWidth: 5, expected: greenBg + "d line" + RST + greenBg + strings.Repeat(" ", 14) + RST, }, { name: "empty content with \\x1b[K", s: greenBg + "\x1b[K" + RST, width: 10, expected: greenBg + strings.Repeat(" ", 10) + RST, }, { name: "plain text with \\x1b[K but no preceding style", s: "hello\x1b[K", width: 10, expected: "hello", expectedWidth: 5, }, { name: "\\x1b[K preceded by reset means no fill", s: greenBg + "text" + RST + "\x1b[K" + RST, width: 20, expected: greenBg + "text" + RST, expectedWidth: 4, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.s) actual, actualWidth := item.Take(tt.startWidth, tt.width, "", []Highlight{}) internal.CmpStr(t, tt.expected, actual) expectedWidth := tt.expectedWidth if expectedWidth == 0 { expectedWidth = tt.width } if actualWidth != expectedWidth { t.Errorf("expected width %d, got %d", expectedWidth, actualWidth) } }) } } func TestSingle_NewItem_stripsNonSGR(t *testing.T) { tests := []struct { name string input string expectedContent string expectedNoAnsi string expectedWidth int }{ { name: "non-sgr csi stripped from content", input: "\x1b[31m\x1b[2Jhello\x1b[m", expectedContent: "\x1b[31mhello\x1b[m", expectedNoAnsi: "hello", expectedWidth: 5, }, { name: "cursor movement stripped", input: "\x1b[10;20Hworld", expectedContent: "world", expectedNoAnsi: "world", expectedWidth: 5, }, { name: "osc stripped", input: "\x1b]0;title\x07hello", expectedContent: "hello", expectedNoAnsi: "hello", expectedWidth: 5, }, { name: "escK with non-sgr still works", input: "\x1b[41m\x1b[2Jhello\x1b[K", expectedContent: "\x1b[41mhello", expectedNoAnsi: "hello", expectedWidth: 5, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.input) internal.CmpStr(t, tt.expectedContent, item.Content()) internal.CmpStr(t, tt.expectedNoAnsi, item.ContentNoAnsi()) if item.Width() != tt.expectedWidth { t.Errorf("expected width %d, got %d", tt.expectedWidth, item.Width()) } }) } } func TestSingle_Take_NoAnsiLeak(t *testing.T) { // simulates git diff syntax-highlighted output where " is one color and \b another. // when highlighting ", ANSI code internals like "38;2;190;132;255m" must not // leak as visible text. s := "\x1b[38;2;204;204;204m " + RST + "\x1b[38;2;152;195;121m\"" + RST + "\x1b[38;2;190;132;255m\\b" + RST + "\x1b[38;2;152;195;121m\"" + RST + "\x1b[38;2;204;204;204m " + RST item := NewItem(s) byteRanges := item.ExtractExactMatches("\"") highlights := toHighlights(byteRanges, internal.RedBg) actual, _ := item.Take(0, 80, "", highlights) stripped := StripAnsi(actual) plain := StripAnsi(s) if stripped != plain { t.Errorf("ANSI leak detected: StripAnsi(result) = %q, want %q", stripped, plain) } } func TestSingle_NumWrappedLines(t *testing.T) { tests := []struct { name string s string wrapWidth int expected int }{ { name: "none no width", s: "none", wrapWidth: 0, expected: 0, }, { name: "none with width", s: "none", wrapWidth: 5, expected: 1, }, { name: "hello world negative width", s: "hello world", // 11 width wrapWidth: -1, expected: 0, }, { name: "hello world zero width", s: "hello world", // 11 width wrapWidth: 0, expected: 0, }, { name: "hello world wrap 1", s: "hello world", // 11 width wrapWidth: 1, expected: 11, }, { name: "hello world wrap 5", s: "hello world", // 11 width wrapWidth: 5, expected: 3, }, { name: "hello world wrap 11", s: "hello world", // 11 width wrapWidth: 11, expected: 1, }, { name: "hello world wrap 12", s: "hello world", // 11 width wrapWidth: 12, expected: 1, }, { name: "ansi wrap 5", s: internal.RedBg.Render("hello world"), // 11 width wrapWidth: 5, expected: 3, }, { name: "unicode_ansi wrap 3", s: internal.RedBg.Render("A💖") + "中é", // 6 width wrapWidth: 3, expected: 2, }, { name: "unicode_ansi wrap 6", s: internal.RedBg.Render("A💖") + "中é", // 6 width wrapWidth: 6, expected: 1, }, { name: "unicode_ansi wrap 7", s: internal.RedBg.Render("A💖") + "中é", // 6 width wrapWidth: 7, expected: 1, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.s) actual := item.NumWrappedLines(tt.wrapWidth) if actual != tt.expected { t.Errorf("expected %d, got %d for item %s with wrap width %d", tt.expected, actual, item.repr(), tt.wrapWidth) } }) } } func TestSingleItem_ExtractExactMatches(t *testing.T) { tests := []struct { name string s string exactMatch string expected []Match }{ { name: "empty exact match", s: "hello world", exactMatch: "", expected: []Match{}, }, { name: "no matches", s: "hell", exactMatch: "lo", expected: []Match{}, }, { name: "single match", s: "hello world", exactMatch: "world", expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "multiple matches in single string", s: "hello world world", exactMatch: "world", expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, { ByteRange: ByteRange{ Start: 12, End: 17, }, WidthRange: WidthRange{ Start: 12, End: 17, }, }, }, }, { name: "overlapping matches", s: "aaa", exactMatch: "aa", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 2, }, WidthRange: WidthRange{ Start: 0, End: 2, }, }, }, }, { name: "sequential matches", s: "aaaa", exactMatch: "aa", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 2, }, WidthRange: WidthRange{ Start: 0, End: 2, }, }, { ByteRange: ByteRange{ Start: 2, End: 4, }, WidthRange: WidthRange{ Start: 2, End: 4, }, }, }, }, { name: "case sensitive", s: "Hello HELLO hello", exactMatch: "hello", expected: []Match{ { ByteRange: ByteRange{ Start: 12, End: 17, }, WidthRange: WidthRange{ Start: 12, End: 17, }, }, }, }, { name: "unicode characters", // 世 is 3 bytes 2 width, 界 is 3 bytes 2 width, 🌟 is 4 bytes 2 width s: "世界 hello 🌟", exactMatch: "界 hello 🌟", expected: []Match{ { ByteRange: ByteRange{ Start: 3, End: 17, }, WidthRange: WidthRange{ Start: 2, End: 13, }, }, }, }, { name: "single character match", s: "abcabc", exactMatch: "a", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 1, }, WidthRange: WidthRange{ Start: 0, End: 1, }, }, { ByteRange: ByteRange{ Start: 3, End: 4, }, WidthRange: WidthRange{ Start: 3, End: 4, }, }, }, }, { name: "match at beginning and end", s: "test middle test", exactMatch: "test", expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 4, }, WidthRange: WidthRange{ Start: 0, End: 4, }, }, { ByteRange: ByteRange{ Start: 12, End: 16, }, WidthRange: WidthRange{ Start: 12, End: 16, }, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { matches := NewItem(tt.s).ExtractExactMatches(tt.exactMatch) if len(matches) != len(tt.expected) { t.Errorf("expected %d matches, got %d", len(tt.expected), len(matches)) return } for i, expected := range tt.expected { match := matches[i] if match.ByteRange.Start != expected.ByteRange.Start || match.ByteRange.End != expected.ByteRange.End { t.Errorf("match %d: expected byte range Start=%d End=%d, got Start=%d End=%d", i, expected.ByteRange.Start, expected.ByteRange.End, match.ByteRange.Start, match.ByteRange.End) } if match.WidthRange.Start != expected.WidthRange.Start || match.WidthRange.End != expected.WidthRange.End { t.Errorf("match %d: expected width range Start=%d End=%d, got Start=%d End=%d", i, expected.WidthRange.Start, expected.WidthRange.End, match.WidthRange.Start, match.WidthRange.End) } } }) } } func TestSingleItem_ExtractRegexMatches(t *testing.T) { tests := []struct { name string s string regexPattern string expected []Match expectError bool }{ { name: "invalid regex", s: "hello world", regexPattern: "[", expected: nil, expectError: true, }, { name: "no matches", s: "hello world", regexPattern: "xyz", expected: []Match{}, }, { name: "simple word match", s: "hello world", regexPattern: "world", expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "word boundary match", s: "hello world worldly", regexPattern: `\bworld\b`, expected: []Match{ { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, }, }, { name: "digit pattern", s: "line 123 has numbers 456", regexPattern: `\d+`, expected: []Match{ { ByteRange: ByteRange{ Start: 5, End: 8, }, WidthRange: WidthRange{ Start: 5, End: 8, }, }, { ByteRange: ByteRange{ Start: 21, End: 24, }, WidthRange: WidthRange{ Start: 21, End: 24, }, }, }, }, { name: "case insensitive pattern", s: "Hello HELLO hello", regexPattern: `(?i)hello`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, { ByteRange: ByteRange{ Start: 6, End: 11, }, WidthRange: WidthRange{ Start: 6, End: 11, }, }, { ByteRange: ByteRange{ Start: 12, End: 17, }, WidthRange: WidthRange{ Start: 12, End: 17, }, }, }, }, { name: "capturing groups", s: "user: john and user: jane", regexPattern: `user: (\w+)`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 10, }, WidthRange: WidthRange{ Start: 0, End: 10, }, }, { ByteRange: ByteRange{ Start: 15, End: 25, }, WidthRange: WidthRange{ Start: 15, End: 25, }, }, }, }, { name: "multiple capturing groups", s: "user: john smith and user: jane doe", regexPattern: `user: (\w+) (\w+)`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 16, }, WidthRange: WidthRange{ Start: 0, End: 16, }, }, { ByteRange: ByteRange{ Start: 21, End: 35, }, WidthRange: WidthRange{ Start: 21, End: 35, }, }, }, }, { name: "dot metacharacter", s: "a1b a.b axb", regexPattern: `a.b`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 3, }, WidthRange: WidthRange{ Start: 0, End: 3, }, }, { ByteRange: ByteRange{ Start: 4, End: 7, }, WidthRange: WidthRange{ Start: 4, End: 7, }, }, { ByteRange: ByteRange{ Start: 8, End: 11, }, WidthRange: WidthRange{ Start: 8, End: 11, }, }, }, }, { name: "anchored pattern", s: "start middle end", regexPattern: `^start`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 5, }, WidthRange: WidthRange{ Start: 0, End: 5, }, }, }, }, { name: "unicode with regex", // 世 is 3 bytes 2 width, 界 is 3 bytes 2 width, 🌟 is 4 bytes 2 width s: "世界 test 🌟 and test 世界", regexPattern: `界 test 🌟`, expected: []Match{ { ByteRange: ByteRange{ Start: 3, End: 16, }, WidthRange: WidthRange{ Start: 2, End: 12, }, }, }, }, { name: "overlapping matches not possible with regex", s: "aaa", regexPattern: `aa`, expected: []Match{ { ByteRange: ByteRange{ Start: 0, End: 2, }, WidthRange: WidthRange{ Start: 0, End: 2, }, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { regex, err := regexp.Compile(tt.regexPattern) if tt.expectError { if err == nil { t.Errorf("expected error but got none") } return } if err != nil { t.Errorf("unexpected error compiling regex: %v", err) return } matches := NewItem(tt.s).ExtractRegexMatches(regex) if len(matches) != len(tt.expected) { t.Errorf("expected %d matches, got %d", len(tt.expected), len(matches)) return } for i, expected := range tt.expected { match := matches[i] if match.ByteRange.Start != expected.ByteRange.Start || match.ByteRange.End != expected.ByteRange.End { t.Errorf("match %d: expected byte range Start=%d End=%d, got Start=%d End=%d", i, expected.ByteRange.Start, expected.ByteRange.End, match.ByteRange.Start, match.ByteRange.End) } if match.WidthRange.Start != expected.WidthRange.Start || match.WidthRange.End != expected.WidthRange.End { t.Errorf("match %d: expected width range Start=%d End=%d, got Start=%d End=%d", i, expected.WidthRange.Start, expected.WidthRange.End, match.WidthRange.Start, match.WidthRange.End) } } }) } } func TestSingle_findRuneIndexWithWidthToLeft(t *testing.T) { tests := []struct { name string s string widthToLeft int expectedRuneIdx int shouldPanic bool }{ { name: "empty string", s: "", widthToLeft: 0, expectedRuneIdx: 0, }, { name: "negative widthToLeft", s: "hello", widthToLeft: -1, shouldPanic: true, }, { name: "single char", s: "a", widthToLeft: 1, expectedRuneIdx: 1, }, { name: "widthToLeft at end", s: "abc", widthToLeft: 3, expectedRuneIdx: 3, }, { name: "widthToLeft past total width", s: "a", widthToLeft: 2, shouldPanic: true, }, { name: "longer", s: "hello", widthToLeft: 3, expectedRuneIdx: 3, }, { name: "ansi", s: "hi " + internal.RedBg.Render("there") + " leo", widthToLeft: 8, expectedRuneIdx: 8, }, { name: "unicode", s: "A💖中é", // A (1w, 1b, 1r), 💖 (2w, 4b, 1r), 中 (2w, 3b, 1r), é (1w, 3b, 2r) = 6w, 11b, 5r widthToLeft: 5, expectedRuneIdx: 3, }, { name: "unicode zero-width", s: "A💖中é", // A (1w, 1b, 1r), 💖 (2w, 4b, 1r), 中 (2w, 3b, 1r), é (1w, 3b, 2r) = 6w, 11b, 5r widthToLeft: 6, expectedRuneIdx: 5, }, { name: "unicode zero-width single char", s: "é", widthToLeft: 1, expectedRuneIdx: 2, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.s) if tt.shouldPanic { assertPanic(t, func() { item.findRuneIndexWithWidthToLeft(tt.widthToLeft) }) return } actual := item.findRuneIndexWithWidthToLeft(tt.widthToLeft) if actual != tt.expectedRuneIdx { t.Errorf("findRuneIndexWithWidthToLeft() got %d, expected %d", actual, tt.expectedRuneIdx) } }) } } func TestSingle_getByteOffsetAtRuneIdx(t *testing.T) { tests := []struct { name string s string runeIdx int expectedByteOffset int shouldPanic bool }{ { name: "empty string", s: "", runeIdx: 0, expectedByteOffset: 0, }, { name: "negative runeIdx", s: "hello", runeIdx: -1, shouldPanic: true, }, { name: "single char", s: "a", runeIdx: 0, expectedByteOffset: 0, }, { name: "runeIdx out of bounds", s: "a", runeIdx: 1, shouldPanic: true, }, { name: "longer", s: "hello", runeIdx: 3, expectedByteOffset: 3, }, { name: "ansi", s: "hi " + internal.RedBg.Render("there") + " leo", runeIdx: 8, expectedByteOffset: 8, }, { name: "unicode", s: "A💖中é", // A (1w, 1b, 1r), 💖 (2w, 4b, 1r), 中 (2w, 3b, 1r), é (1w, 3b, 2r) = 6w, 11b, 5r runeIdx: 3, // first rune in é expectedByteOffset: 8, }, { name: "unicode zero-width", s: "A💖中é", // A (1w, 1b, 1r), 💖 (2w, 4b, 1r), 中 (2w, 3b, 1r), é (1w, 3b, 2r) = 6w, 11b, 5r runeIdx: 4, // second rune in é expectedByteOffset: 9, }, { name: "unicode zero-width single char", s: "é", runeIdx: 1, expectedByteOffset: 1, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { item := NewItem(tt.s) if tt.shouldPanic { assertPanic(t, func() { item.getByteOffsetAtRuneIdx(tt.runeIdx) }) return } actual := item.getByteOffsetAtRuneIdx(tt.runeIdx) if int(actual) != tt.expectedByteOffset { t.Errorf("getByteOffsetAtRuneIdx() got %d, expected %d", actual, tt.expectedByteOffset) } }) } } func TestSingleItem_ByteRangesToMatches(t *testing.T) { tests := []struct { name string s string byteRanges []ByteRange expected []Match }{ { name: "nil byte ranges", s: "hello world", byteRanges: nil, expected: nil, }, { name: "empty byte ranges", s: "hello world", byteRanges: []ByteRange{}, expected: nil, }, { name: "single ASCII range", s: "hello world", byteRanges: []ByteRange{ {Start: 6, End: 11}, }, expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, { name: "multiple ASCII ranges", s: "hello world hello", byteRanges: []ByteRange{ {Start: 0, End: 5}, {Start: 12, End: 17}, }, expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 5}, WidthRange: WidthRange{Start: 0, End: 5}, }, { ByteRange: ByteRange{Start: 12, End: 17}, WidthRange: WidthRange{Start: 12, End: 17}, }, }, }, { name: "unicode double-width characters", // 世 is 3 bytes 2 width, 界 is 3 bytes 2 width, 🌟 is 4 bytes 2 width s: "世界 hello 🌟", byteRanges: []ByteRange{ {Start: 3, End: 17}, // "界 hello 🌟" }, expected: []Match{ { ByteRange: ByteRange{Start: 3, End: 17}, WidthRange: WidthRange{Start: 2, End: 13}, }, }, }, { name: "range at start", s: "hello world", byteRanges: []ByteRange{ {Start: 0, End: 5}, }, expected: []Match{ { ByteRange: ByteRange{Start: 0, End: 5}, WidthRange: WidthRange{Start: 0, End: 5}, }, }, }, { name: "single character range", s: "hello", byteRanges: []ByteRange{ {Start: 2, End: 3}, }, expected: []Match{ { ByteRange: ByteRange{Start: 2, End: 3}, WidthRange: WidthRange{Start: 2, End: 3}, }, }, }, { name: "ANSI-styled content uses no-ansi positions", s: "\x1b[38;2;255;0;0mhello world" + RST, byteRanges: []ByteRange{ {Start: 6, End: 11}, }, expected: []Match{ { ByteRange: ByteRange{Start: 6, End: 11}, WidthRange: WidthRange{Start: 6, End: 11}, }, }, }, { name: "mixed unicode widths", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b s: "A💖中é", byteRanges: []ByteRange{ {Start: 1, End: 8}, // 💖中 }, expected: []Match{ { ByteRange: ByteRange{Start: 1, End: 8}, WidthRange: WidthRange{Start: 1, End: 5}, }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { itm := NewItem(tt.s) actual := itm.ByteRangesToMatches(tt.byteRanges) if len(actual) != len(tt.expected) { t.Fatalf("expected %d matches, got %d", len(tt.expected), len(actual)) } for i, expected := range tt.expected { match := actual[i] if match.ByteRange != expected.ByteRange { t.Errorf("match %d: expected byte range %+v, got %+v", i, expected.ByteRange, match.ByteRange) } if match.WidthRange != expected.WidthRange { t.Errorf("match %d: expected width range %+v, got %+v", i, expected.WidthRange, match.WidthRange) } } }) } } // TestSingleItem_ByteRangesToMatches_ConsistentWithExtract verifies that // ByteRangesToMatches produces the same results as ExtractExactMatches // for the same byte ranges. func TestSingleItem_ByteRangesToMatches_ConsistentWithExtract(t *testing.T) { tests := []struct { name string s string query string }{ {name: "ASCII", s: "hello world hello", query: "hello"}, {name: "unicode", s: "世界 test 🌟", query: "test"}, {name: "single char", s: "abcabc", query: "a"}, {name: "ANSI styled", s: "\x1b[31mhello world\x1b[0m", query: "world"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { itm := NewItem(tt.s) // Get matches via ExtractExactMatches exactMatches := itm.ExtractExactMatches(tt.query) // Manually compute byte ranges the same way content := itm.ContentNoAnsi() var byteRanges []ByteRange start := 0 for { idx := strings.Index(content[start:], tt.query) if idx == -1 { break } actualStart := start + idx end := actualStart + len(tt.query) byteRanges = append(byteRanges, ByteRange{Start: actualStart, End: end}) start = end } // Get matches via ByteRangesToMatches brMatches := itm.ByteRangesToMatches(byteRanges) if len(exactMatches) != len(brMatches) { t.Fatalf("length mismatch: ExtractExactMatches=%d, ByteRangesToMatches=%d", len(exactMatches), len(brMatches)) } for i := range exactMatches { if exactMatches[i] != brMatches[i] { t.Errorf("match %d: ExtractExactMatches=%+v, ByteRangesToMatches=%+v", i, exactMatches[i], brMatches[i]) } } }) } } ================================================ FILE: modules/viewport/item/string.go ================================================ package item import ( "strings" "github.com/clipperhouse/displaywidth" ) // overflowsLeft checks if a substring overflows a string on the left if the string were to start at startByteIdx inclusive. // assumes s has no ansi codes. // It performs a case-sensitive comparison and returns two values: // - A boolean indicating whether there is overflow // - An integer indicating the ending string index (exclusive) of the overflow (0 if none) // // Examples: // // 01234567890 // overflowsLeft("my str here", 3, "my str") returns (true, 6) // overflowsLeft("my str here", 3, "your str") returns (false, 0) // overflowsLeft("my str here", 6, "my str") returns (false, 0) func overflowsLeft(s string, startByteIdx int, substr string) (bool, int) { if len(s) == 0 || len(substr) == 0 || len(substr) > len(s) { return false, 0 } end := len(substr) + startByteIdx for offset := 1; offset < len(substr); offset++ { if startByteIdx-offset < 0 || end-offset > len(s) { continue } if s[startByteIdx-offset:end-offset] == substr { return true, end - offset } } return false, 0 } // overflowsRight checks if a substring overflows a string on the right if the string were to end at endByteIdx exclusive. // assumes s has no ansi codes. // It performs a case-sensitive comparison and returns two values: // - A boolean indicating whether there is overflow // - An integer indicating the starting string startByteIdx of the overflow (0 if none) // // Examples: // // 01234567890 // overflowsRight("my str here", 3, "y str") returns (true, 1) // overflowsRight("my str here", 3, "y strong") returns (false, 0) // overflowsRight("my str here", 6, "tr here") returns (true, 4) func overflowsRight(s string, endByteIdx int, substr string) (bool, int) { if len(s) == 0 || len(substr) == 0 || len(substr) > len(s) { return false, 0 } leftmostIdx := endByteIdx - len(substr) + 1 for offset := 0; offset < len(substr); offset++ { startIdx := leftmostIdx + offset if startIdx < 0 || startIdx+len(substr) > len(s) { continue } sl := s[startIdx : startIdx+len(substr)] if sl == substr { return true, leftmostIdx + offset } } return false, 0 } func replaceStartWithContinuation(s string, continuationRunes []rune) string { if len(s) == 0 || len(continuationRunes) == 0 { return s } var sb strings.Builder ansiCodeIndexes := findAnsiRuneRanges(s) runes := []rune(s) for runeIdx := 0; runeIdx < len(runes); { if len(ansiCodeIndexes) > 0 { codeStart, codeEnd := int(ansiCodeIndexes[0][0]), int(ansiCodeIndexes[0][1]) if runeIdx == codeStart { for j := codeStart; j < codeEnd; j++ { sb.WriteRune(runes[j]) } // skip ansi runeIdx = codeEnd ansiCodeIndexes = ansiCodeIndexes[1:] continue } } if len(continuationRunes) > 0 { rWidth := displaywidth.Rune(runes[runeIdx]) // if rune is wider than remaining continuation width, cut off the continuation remainingContinuationWidth := 0 for _, cr := range continuationRunes { remainingContinuationWidth += displaywidth.Rune(cr) } if rWidth > remainingContinuationWidth { sb.WriteRune(runes[runeIdx]) continuationRunes = nil } // replace current rune with continuation runes for rWidth > 0 && len(continuationRunes) > 0 { currContinuationRune := continuationRunes[0] sb.WriteRune(currContinuationRune) continuationRunes = continuationRunes[1:] rWidth -= displaywidth.Rune(currContinuationRune) } // skip subsequent zero-width runes that are not ansi sequences nextIdx := runeIdx + 1 for nextIdx < len(runes) { nextRWidth := displaywidth.Rune(runes[nextIdx]) if nextRWidth == 0 && nextIdx < len(runes) && !runesHaveAnsiPrefix(runes[nextIdx:]) { runeIdx++ nextIdx = runeIdx + 1 } else { break } } } else { sb.WriteRune(runes[runeIdx]) } runeIdx++ } return sb.String() } func replaceEndWithContinuation(s string, continuationRunes []rune) string { if len(s) == 0 || len(continuationRunes) == 0 { return s } // collect runes to prepend (we're iterating backwards) var runesToPrepend []rune ansiCodeIndexes := findAnsiRuneRanges(s) runes := []rune(s) for runeIdx := len(runes) - 1; runeIdx >= 0; { if len(ansiCodeIndexes) > 0 { lastAnsiCodeIndexes := ansiCodeIndexes[len(ansiCodeIndexes)-1] codeStart, codeEnd := int(lastAnsiCodeIndexes[0]), int(lastAnsiCodeIndexes[1]) if runeIdx == codeEnd-1 { for j := codeEnd - 1; j >= codeStart; j-- { runesToPrepend = append(runesToPrepend, runes[j]) } // skip ansi runeIdx = codeStart - 1 ansiCodeIndexes = ansiCodeIndexes[:len(ansiCodeIndexes)-1] continue } } if len(continuationRunes) > 0 { rWidth := displaywidth.Rune(runes[runeIdx]) // if rune is wider than remaining continuation width, cut off the continuation remainingContinuationWidth := 0 for _, cr := range continuationRunes { remainingContinuationWidth += displaywidth.Rune(cr) } if rWidth > remainingContinuationWidth { runesToPrepend = append(runesToPrepend, runes[runeIdx]) continuationRunes = nil } // replace current rune with continuation runes for rWidth > 0 && len(continuationRunes) > 0 { currContinuationRune := continuationRunes[len(continuationRunes)-1] runesToPrepend = append(runesToPrepend, currContinuationRune) continuationRunes = continuationRunes[:len(continuationRunes)-1] rWidth -= displaywidth.Rune(currContinuationRune) } } else { runesToPrepend = append(runesToPrepend, runes[runeIdx]) } runeIdx-- } // build result string efficiently var result strings.Builder result.Grow(len(runesToPrepend) * 4) // estimate 4 bytes per rune on average for i := len(runesToPrepend) - 1; i >= 0; i-- { result.WriteRune(runesToPrepend[i]) } return result.String() } // getBytesLeftOfWidth returns nBytes of content to the left of startItemIdx while excluding ANSI codes func getBytesLeftOfWidth(nBytes int, items []SingleItem, startItemIdx int, widthToLeft int) string { if nBytes < 0 { panic("nBytes must be greater than 0") } if nBytes == 0 || len(items) == 0 || startItemIdx >= len(items) { return "" } // first try to get bytes from the current item var result string currentItem := items[startItemIdx] runeIdx := currentItem.findRuneIndexWithWidthToLeft(widthToLeft) if runeIdx > 0 { var startByteOffset uint32 if runeIdx >= currentItem.numNoAnsiRunes { startByteOffset = clampIntToUint32(len(currentItem.lineNoAnsi)) } else { startByteOffset = currentItem.getByteOffsetAtRuneIdx(runeIdx) } noAnsiContent := currentItem.lineNoAnsi[:startByteOffset] if len(noAnsiContent) >= nBytes { return noAnsiContent[len(noAnsiContent)-nBytes:] } result = noAnsiContent nBytes -= len(noAnsiContent) } // if we need more bytes, look in previous items for i := startItemIdx - 1; i >= 0 && nBytes > 0; i-- { prevItem := items[i] noAnsiContent := prevItem.lineNoAnsi if len(noAnsiContent) >= nBytes { result = noAnsiContent[len(noAnsiContent)-nBytes:] + result break } result = noAnsiContent + result nBytes -= len(noAnsiContent) } return result } // getBytesRightOfWidth returns nBytes of content to the right of endItemIdx while excluding ANSI codes func getBytesRightOfWidth(nBytes int, items []SingleItem, endItemIdx int, widthToRight int) string { if nBytes < 0 { panic("nBytes must be greater than 0") } if nBytes == 0 || len(items) == 0 || endItemIdx >= len(items) { return "" } // first try to get bytes from the current item var result string currentItem := items[endItemIdx] if widthToRight > 0 { currentItemWidth := currentItem.Width() widthToLeft := currentItemWidth - widthToRight startRuneIdx := currentItem.findRuneIndexWithWidthToLeft(widthToLeft) if startRuneIdx < currentItem.numNoAnsiRunes { startByteOffset := currentItem.getByteOffsetAtRuneIdx(startRuneIdx) noAnsiContent := currentItem.lineNoAnsi[startByteOffset:] if len(noAnsiContent) >= nBytes { return noAnsiContent[:nBytes] } result = noAnsiContent nBytes -= len(noAnsiContent) } } // if we need more bytes, look in subsequent items for i := endItemIdx + 1; i < len(items) && nBytes > 0; i++ { nextItem := items[i] noAnsiContent := nextItem.lineNoAnsi if len(noAnsiContent) >= nBytes { result += noAnsiContent[:nBytes] break } result += noAnsiContent nBytes -= len(noAnsiContent) } return result } ================================================ FILE: modules/viewport/item/string_test.go ================================================ package item import ( "testing" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestString_overflowsLeft(t *testing.T) { tests := []struct { name string str string startByteIdx int substr string wantBool bool wantInt int }{ { name: "basic overflow case", str: "my str here", startByteIdx: 3, substr: "my str", wantBool: true, wantInt: 6, }, { name: "no overflow case", str: "my str here", startByteIdx: 6, substr: "my str", wantBool: false, wantInt: 0, }, { name: "empty string", str: "", startByteIdx: 0, substr: "test", wantBool: false, wantInt: 0, }, { name: "empty substring", str: "test string", startByteIdx: 0, substr: "", wantBool: false, wantInt: 0, }, { name: "startByteIdx out of bounds", str: "test", startByteIdx: 10, substr: "test", wantBool: false, wantInt: 0, }, { name: "exact full match", str: "hello world", startByteIdx: 0, substr: "hello world", wantBool: false, wantInt: 0, }, { name: "partial overflow at end", str: "hello world", startByteIdx: 9, substr: "dd", wantBool: false, wantInt: 0, }, { name: "case sensitivity test - no match", str: "Hello World", startByteIdx: 0, substr: "hello", wantBool: false, wantInt: 0, }, { name: "multiple character same overflow", str: "aaaa", startByteIdx: 1, substr: "aaa", wantBool: true, wantInt: 3, }, { name: "multiple character same overflow but difference", str: "aaaa", startByteIdx: 1, substr: "baaa", wantBool: false, wantInt: 0, }, { name: "special characters", str: "test!@#$", startByteIdx: 4, substr: "st!@#", wantBool: true, wantInt: 7, }, { name: "false if does not overflow", str: "some string", startByteIdx: 1, substr: "ome", wantBool: false, wantInt: 0, }, { name: "one char overflow", str: "some string", startByteIdx: 1, substr: "some", wantBool: true, wantInt: 4, }, // 世 is 3 bytes // 界 is 3 bytes // 🌟 is 4 bytes // "世界🌟世界🌟"[3:13] = "界🌟世" { name: "unicode with ansi left not overflowing", str: "世界🌟世界🌟", startByteIdx: 0, substr: "世界🌟世", wantBool: false, wantInt: 0, }, { name: "unicode with ansi left overflow 1 byte", str: "世界🌟世界🌟", startByteIdx: 1, substr: "世界🌟世", wantBool: true, wantInt: 13, }, { name: "unicode with ansi left overflow 2 bytes", str: "世界🌟世界🌟", startByteIdx: 2, substr: "世界🌟世", wantBool: true, wantInt: 13, }, { name: "unicode with ansi left overflow full rune", str: "世界🌟世界🌟", startByteIdx: 3, substr: "世界🌟世", wantBool: true, wantInt: 13, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gotBool, gotInt := overflowsLeft(tt.str, tt.startByteIdx, tt.substr) if gotBool != tt.wantBool || gotInt != tt.wantInt { t.Errorf("overflowsLeft(%q, %d, %q) = (%v, %d), want (%v, %d)", tt.str, tt.startByteIdx, tt.substr, gotBool, gotInt, tt.wantBool, tt.wantInt) } }) } } func TestString_overflowsRight(t *testing.T) { tests := []struct { name string s string endByteIdx int substr string wantBool bool wantInt int }{ { name: "example 1", s: "my str here", endByteIdx: 3, substr: "y str", wantBool: true, wantInt: 1, }, { name: "example 2", s: "my str here", endByteIdx: 3, substr: "y strong", wantBool: false, wantInt: 0, }, { name: "example 3", s: "my str here", endByteIdx: 6, substr: "tr here", wantBool: true, wantInt: 4, }, { name: "empty string", s: "", endByteIdx: 0, substr: "test", wantBool: false, wantInt: 0, }, { name: "empty substring", s: "test string", endByteIdx: 0, substr: "", wantBool: false, wantInt: 0, }, { name: "end index out of bounds", s: "test", endByteIdx: 10, substr: "test", wantBool: false, wantInt: 0, }, { name: "exact full match", s: "hello world", endByteIdx: 11, substr: "hello world", wantBool: false, wantInt: 0, }, { name: "case sensitivity test - no match", s: "Hello World", endByteIdx: 4, substr: "hello", wantBool: false, wantInt: 0, }, { name: "multiple character same overflow", s: "aaaa", endByteIdx: 2, substr: "aaa", wantBool: true, wantInt: 0, }, { name: "multiple character same overflow but difference", s: "aaaa", endByteIdx: 2, substr: "aaab", wantBool: false, wantInt: 0, }, { name: "false if does not overflow", s: "some string", endByteIdx: 5, substr: "ome ", wantBool: false, wantInt: 0, }, { name: "one char overflow", s: "some string", endByteIdx: 5, substr: "ome s", wantBool: true, wantInt: 1, }, // 世 is 3 bytes // 界 is 3 bytes // 🌟 is 4 bytes // "世界🌟世界🌟"[3:10] = "界🌟" { name: "unicode with ansi no overflow", s: "世界🌟世界🌟", endByteIdx: 13, substr: "界🌟世", wantBool: false, wantInt: 0, }, { name: "unicode with ansi overflow right one byte", s: "世界🌟世界🌟", endByteIdx: 12, substr: "界🌟世", wantBool: true, wantInt: 3, }, { name: "unicode with ansi overflow right two bytes", s: "世界🌟世界🌟", endByteIdx: 11, substr: "界🌟世", wantBool: true, wantInt: 3, }, { name: "unicode with ansi overflow right full rune", s: "世界🌟世界🌟", endByteIdx: 10, substr: "界🌟世", wantBool: true, wantInt: 3, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gotBool, gotInt := overflowsRight(tt.s, tt.endByteIdx, tt.substr) if gotBool != tt.wantBool || gotInt != tt.wantInt { t.Errorf("overflowsRight(%q, %d, %q) = (%v, %d), want (%v, %d)", tt.s, tt.endByteIdx, tt.substr, gotBool, gotInt, tt.wantBool, tt.wantInt) } }) } } func TestString_replaceStartWithContinuation(t *testing.T) { tests := []struct { name string s string continuation string expected string }{ { name: "empty", s: "", continuation: "", expected: "", }, { name: "empty continuation", s: "my string", continuation: "", expected: "my string", }, { name: "simple", s: "my string", continuation: "...", expected: "...string", }, { name: "ansi from start", s: "\x1b[31mmy string" + RST, continuation: "...", expected: "\x1b[31m...string" + RST, }, { name: "ansi overlaps continuation", s: "m\x1b[31my string" + RST, continuation: "...", expected: ".\x1b[31m..string" + RST, }, { name: "unicode", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖中é", continuation: "...", expected: "...中é", }, { name: "unicode leading combined", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "é💖中", continuation: "...", expected: "...中", }, { name: "unicode combined", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "💖é💖中", continuation: "...", expected: "...💖中", }, { name: "unicode width overlap", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "中💖中é", continuation: "...", expected: "..💖中é", // continuation shrinks by 1 }, { name: "unicode start", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖中é", continuation: "...", expected: "...中é", }, { name: "unicode start ansi", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: internal.RedBg.Render("A💖") + "中é", continuation: "...", expected: internal.RedBg.Render("...") + "中é", }, { name: "unicode almost start ansi", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A" + internal.RedBg.Render("💖") + "中é", continuation: "...", expected: "." + internal.RedBg.Render("..") + "中é", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if r := replaceStartWithContinuation(tt.s, []rune(tt.continuation)); r != tt.expected { t.Errorf("expected %q, got %q", tt.expected, r) } }) } } func TestString_replaceEndWithContinuation(t *testing.T) { tests := []struct { name string s string continuation string expected string }{ { name: "empty", s: "", continuation: "", expected: "", }, { name: "empty continuation", s: "my string", continuation: "", expected: "my string", }, { name: "simple", s: "my string", continuation: "...", expected: "my str...", }, { name: "ansi from end", s: "\x1b[31mmy string" + RST, continuation: "...", expected: "\x1b[31mmy str..." + RST, }, { name: "ansi overlaps continuation", s: "\x1b[31mmy strin" + RST + "g", continuation: "...", expected: "\x1b[31mmy str.." + RST + ".", }, { name: "unicode", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖中é", continuation: "...", expected: "A💖...", }, { name: "unicode trailing combined", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖中é", continuation: "...", expected: "A💖...", }, { name: "unicode combined", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖é中", continuation: "...", expected: "A💖...", }, { name: "unicode width overlap", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "💖中", continuation: "...", expected: "💖..", // continuation shrinks by 1 }, { name: "unicode end", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖中é", continuation: "...", expected: "A💖...", }, { name: "unicode end ansi", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A💖" + internal.RedBg.Render("中é"), continuation: "...", expected: "A💖" + internal.RedBg.Render("..."), }, { name: "unicode almost end ansi", // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) s: "A" + internal.RedBg.Render("💖中") + "é", continuation: "...", expected: "A" + internal.RedBg.Render("💖..") + ".", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if r := replaceEndWithContinuation(tt.s, []rune(tt.continuation)); r != tt.expected { t.Errorf("expected %q, got %q", tt.expected, r) } }) } } func TestString_getBytesLeftOfWidth(t *testing.T) { tests := []struct { name string items []SingleItem nBytes int startItemIdx int widthToLeft int expected string shouldPanic bool }{ { name: "empty items", items: nil, nBytes: 1, startItemIdx: 0, widthToLeft: 0, expected: "", }, { name: "negative bytes", items: []SingleItem{NewItem("abc")}, nBytes: -1, startItemIdx: 0, widthToLeft: 1, shouldPanic: true, }, { name: "zero bytes", items: []SingleItem{NewItem("abc")}, nBytes: 0, startItemIdx: 0, widthToLeft: 1, expected: "", }, { name: "item index out of bounds", items: []SingleItem{NewItem("abc")}, nBytes: 1, startItemIdx: 1, widthToLeft: 0, expected: "", }, { name: "single item full content", items: []SingleItem{NewItem("abc")}, nBytes: 3, startItemIdx: 0, widthToLeft: 3, expected: "abc", }, { name: "single item partial content", items: []SingleItem{NewItem("abc")}, nBytes: 2, startItemIdx: 0, widthToLeft: 2, expected: "ab", }, { name: "multiple items full content", items: []SingleItem{ NewItem("abc"), NewItem("def"), }, nBytes: 6, startItemIdx: 1, widthToLeft: 3, expected: "abcdef", }, { name: "multiple items partial content", items: []SingleItem{ NewItem("abc"), NewItem("def"), }, nBytes: 4, startItemIdx: 1, widthToLeft: 2, expected: "bcde", }, { name: "ignore ansi codes", items: []SingleItem{ NewItem("a" + internal.RedBg.Render("b") + "c"), NewItem(internal.RedBg.Render("def")), }, nBytes: 5, startItemIdx: 1, widthToLeft: 3, expected: "bcdef", }, // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) { name: "unicode characters", items: []SingleItem{ NewItem("A💖中"), NewItem("é"), }, nBytes: 10, startItemIdx: 1, widthToLeft: 1, expected: "💖中é", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if tt.shouldPanic { assertPanic(t, func() { getBytesLeftOfWidth(tt.nBytes, tt.items, tt.startItemIdx, tt.widthToLeft) }) return } if got := getBytesLeftOfWidth(tt.nBytes, tt.items, tt.startItemIdx, tt.widthToLeft); got != tt.expected { t.Errorf("getBytesLeftOfWidth() = %v, want %v", []byte(got), []byte(tt.expected)) } }) } } func TestString_getBytesRightOfWidth(t *testing.T) { tests := []struct { name string items []SingleItem nBytes int endItemIdx int widthToRight int expected string shouldPanic bool }{ { name: "empty items", items: nil, nBytes: 1, endItemIdx: 0, widthToRight: 0, expected: "", }, { name: "negative bytes", items: []SingleItem{NewItem("abc")}, nBytes: -1, endItemIdx: 0, widthToRight: 1, shouldPanic: true, }, { name: "zero bytes", items: []SingleItem{NewItem("abc")}, nBytes: 0, endItemIdx: 0, widthToRight: 1, expected: "", }, { name: "item index out of bounds", items: []SingleItem{NewItem("abc")}, nBytes: 1, endItemIdx: 1, widthToRight: 0, expected: "", }, { name: "single item full content", items: []SingleItem{NewItem("abc")}, nBytes: 3, endItemIdx: 0, widthToRight: 3, expected: "abc", }, { name: "single item partial content", items: []SingleItem{NewItem("abc")}, nBytes: 2, endItemIdx: 0, widthToRight: 2, expected: "bc", }, { name: "multiple items full content", items: []SingleItem{ NewItem("abc"), NewItem("def"), }, nBytes: 6, endItemIdx: 0, widthToRight: 3, expected: "abcdef", }, { name: "multiple items partial content", items: []SingleItem{ NewItem("abc"), NewItem("def"), }, nBytes: 4, endItemIdx: 0, widthToRight: 2, expected: "bcde", }, { name: "ignore ansi codes", items: []SingleItem{ NewItem("a" + internal.RedBg.Render("b") + "c"), NewItem(internal.RedBg.Render("def")), }, nBytes: 5, endItemIdx: 0, widthToRight: 2, expected: "bcdef", }, // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) { name: "unicode characters", items: []SingleItem{ NewItem("A💖中"), NewItem("é"), }, nBytes: 10, endItemIdx: 0, widthToRight: 4, expected: "💖中é", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if tt.shouldPanic { assertPanic(t, func() { getBytesRightOfWidth(tt.nBytes, tt.items, tt.endItemIdx, tt.widthToRight) }) return } if got := getBytesRightOfWidth(tt.nBytes, tt.items, tt.endItemIdx, tt.widthToRight); got != tt.expected { t.Errorf("getBytesRightOfWidth() = %v, want %v", []byte(got), []byte(tt.expected)) } }) } } ================================================ FILE: modules/viewport/keymap.go ================================================ package viewport import ( "charm.land/bubbles/v2/key" ) // KeyMap contains viewport key bindings type KeyMap struct { PageDown key.Binding PageUp key.Binding HalfPageUp key.Binding HalfPageDown key.Binding Up key.Binding Down key.Binding Left key.Binding Right key.Binding Top key.Binding Bottom key.Binding } // DefaultKeyMap returns a set of default key bindings for the viewport func DefaultKeyMap() KeyMap { return KeyMap{ PageDown: key.NewBinding( key.WithKeys("pgdown", "f", "ctrl+f", "space"), key.WithHelp("space/f", "page down"), ), PageUp: key.NewBinding( key.WithKeys("pgup", "b", "ctrl+b"), key.WithHelp("b", "pgup"), ), HalfPageUp: key.NewBinding( key.WithKeys("u", "ctrl+u"), key.WithHelp("u", "½ page up"), ), HalfPageDown: key.NewBinding( key.WithKeys("d", "ctrl+d"), key.WithHelp("d", "½ page down"), ), Up: key.NewBinding( key.WithKeys("up", "k"), key.WithHelp("↑/k", "scroll up"), ), Down: key.NewBinding( key.WithKeys("down", "j", "enter"), key.WithHelp("↓/j", "scroll down"), ), Left: key.NewBinding( key.WithKeys("left"), key.WithHelp("←", "left"), ), Right: key.NewBinding( key.WithKeys("right"), key.WithHelp("→", "right"), ), Top: key.NewBinding( key.WithKeys("g", "ctrl+g", "home"), key.WithHelp("g", "top"), ), Bottom: key.NewBinding( key.WithKeys("G", "end"), key.WithHelp("G", "bottom"), ), } } ================================================ FILE: modules/viewport/navigation_manager.go ================================================ package viewport import ( "charm.land/bubbles/v2/key" tea "charm.land/bubbletea/v2" ) // navigationManager manages keyboard input and navigation logic type navigationManager struct { // keyMap is the keymap for the viewport keyMap KeyMap // selectionEnabled is true if the viewport allows individual line selection selectionEnabled bool // topSticky is true when selection should remain at the top until user manually scrolls down topSticky bool // bottomSticky is true when selection should remain at the bottom until user manually scrolls up bottomSticky bool } // newNavigationManager creates a new navigationManager with the specified key mappings. func newNavigationManager(keyMap KeyMap) *navigationManager { return &navigationManager{ keyMap: keyMap, selectionEnabled: false, topSticky: false, bottomSticky: false, } } // navigationAction represents a navigation command type navigationAction int const ( // actionNone represents no navigation action. actionNone navigationAction = iota // actionUp represents moving up one item. actionUp // actionDown represents moving down one item. actionDown // actionLeft represents moving left horizontally. actionLeft // actionRight represents moving right horizontally. actionRight // actionHalfPageUp represents moving up half a page. actionHalfPageUp // actionHalfPageDown represents moving down half a page. actionHalfPageDown // actionPageUp represents moving up one page. actionPageUp // actionPageDown represents moving down one page. actionPageDown // actionTop represents moving to the top. actionTop // actionBottom represents moving to the bottom. actionBottom ) // navigationContext contains the context needed for navigation calculations type navigationContext struct { wrapText bool dimensions rectangle numContentLines int numVisibleItems int } // navigationResult contains the result of processing a navigation action type navigationResult struct { action navigationAction scrollAmount int // lines to scroll selectionAmount int // items to move selection } // processKeyMsg processes a keyboard message and returns the corresponding navigation action func (nm navigationManager) processKeyMsg(msg tea.KeyMsg, ctx navigationContext) navigationResult { switch { case key.Matches(msg, nm.keyMap.Up): return navigationResult{action: actionUp, scrollAmount: 1, selectionAmount: 1} case key.Matches(msg, nm.keyMap.Down): return navigationResult{action: actionDown, scrollAmount: 1, selectionAmount: 1} case key.Matches(msg, nm.keyMap.Left): if !ctx.wrapText { return navigationResult{action: actionLeft, scrollAmount: ctx.dimensions.width / 4} } case key.Matches(msg, nm.keyMap.Right): if !ctx.wrapText { return navigationResult{action: actionRight, scrollAmount: ctx.dimensions.width / 4} } case key.Matches(msg, nm.keyMap.HalfPageUp): scrollAmount := ctx.numContentLines / 2 selectionAmount := max(1, ctx.numVisibleItems/2) return navigationResult{action: actionHalfPageUp, scrollAmount: scrollAmount, selectionAmount: selectionAmount} case key.Matches(msg, nm.keyMap.HalfPageDown): scrollAmount := ctx.numContentLines / 2 selectionAmount := max(1, ctx.numVisibleItems/2) return navigationResult{action: actionHalfPageDown, scrollAmount: scrollAmount, selectionAmount: selectionAmount} case key.Matches(msg, nm.keyMap.PageUp): scrollAmount := ctx.numContentLines selectionAmount := ctx.numVisibleItems return navigationResult{action: actionPageUp, scrollAmount: scrollAmount, selectionAmount: selectionAmount} case key.Matches(msg, nm.keyMap.PageDown): scrollAmount := ctx.numContentLines selectionAmount := ctx.numVisibleItems return navigationResult{action: actionPageDown, scrollAmount: scrollAmount, selectionAmount: selectionAmount} case key.Matches(msg, nm.keyMap.Top): return navigationResult{action: actionTop} case key.Matches(msg, nm.keyMap.Bottom): return navigationResult{action: actionBottom} } return navigationResult{action: actionNone} } ================================================ FILE: modules/viewport/object.go ================================================ package viewport import "github.com/antgroup/hugescm/modules/viewport/item" // Object is implemented by types that can return an Item // It exists to allow the viewport to return the selected object without (de)serializing it type Object interface { GetItem() item.Item } ================================================ FILE: modules/viewport/styles.go ================================================ package viewport import ( "charm.land/lipgloss/v2" ) // Styles contains styling configuration for the viewport type Styles struct { // SelectionPrefix is prepended to each visible line of the selected item. // Non-selected lines get equivalent-width blank padding to maintain alignment. // Only applied when selection is enabled and this string is non-empty. // This is the primary mechanism for selection visibility under NO_COLOR. SelectionPrefix string FooterStyle lipgloss.Style SelectedItemStyle lipgloss.Style } // DefaultStyles returns a set of default styles for the viewport. // Uses only reverse video — no 256-color or true-color values. func DefaultStyles() Styles { return Styles{ SelectionPrefix: "", FooterStyle: lipgloss.NewStyle(), SelectedItemStyle: lipgloss.NewStyle().Reverse(true), } } ================================================ FILE: modules/viewport/viewport.go ================================================ package viewport import ( "fmt" "os" "path/filepath" "regexp" "strings" "time" "charm.land/bubbles/v2/key" "charm.land/bubbles/v2/textinput" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport/item" ) // Terminology: // - object: an object of type T that implements the Object interface, i.e. has an Item() method // - item: the item.Item returned by an object's Item() method. A single item may span multiple viewport lines. // if selection is enabled, the item is the selectable unit // - line: a line of text on one row of terminal cells // - visible: in the vertical sense, a line is visible if it is within the viewport // - truncated: in the horizontal sense, a line is truncated if it is too long to fit in the viewport // // wrap disabled, wide enough viewport: // item index line index // this is the first line 0 0 // this is the second line 1 1 // // wrap disabled, overflows viewport width: // item index line index // this is the first... 0 0 // this is the secon... 1 1 // // wrap enabled: // item index line index // this is the 0 0 // first line 0 1 // this is the 1 2 // second line 1 3 var surroundingAnsiRegex = regexp.MustCompile(`(\x1b\[[0-9;]*m.*?\x1b\[0?m)`) // CompareFn is a function type for comparing two items of type T. type CompareFn[T any] func(a, b T) bool // Option is a functional option for configuring the viewport type Option[T Object] func(*Model[T]) // WithKeyMap sets the key mapping for the viewport func WithKeyMap[T Object](keyMap KeyMap) Option[T] { return func(m *Model[T]) { m.navigation.keyMap = keyMap } } // WithStyles sets the styling for the viewport func WithStyles[T Object](styles Styles) Option[T] { return func(m *Model[T]) { m.display.styles = styles } } // WithWrapText sets whether the viewport wraps text func WithWrapText[T Object](wrap bool) Option[T] { return func(m *Model[T]) { m.SetWrapText(wrap) } } // WithSelectionEnabled sets whether the viewport allows selection func WithSelectionEnabled[T Object](enabled bool) Option[T] { return func(m *Model[T]) { m.SetSelectionEnabled(enabled) } } // WithFooterEnabled sets whether the viewport shows the footer func WithFooterEnabled[T Object](enabled bool) Option[T] { return func(m *Model[T]) { m.SetFooterEnabled(enabled) } } // WithProgressBarEnabled sets whether the footer displays a Unicode progress bar func WithProgressBarEnabled[T Object](enabled bool) Option[T] { return func(m *Model[T]) { m.SetProgressBarEnabled(enabled) } } // WithStickyTop sets whether to automatically scroll to the top when content changes func WithStickyTop[T Object](stickyTop bool) Option[T] { return func(m *Model[T]) { m.SetTopSticky(stickyTop) } } // WithStickyBottom sets whether to automatically scroll to the bottom when content changes func WithStickyBottom[T Object](stickyBottom bool) Option[T] { return func(m *Model[T]) { m.SetBottomSticky(stickyBottom) } } // WithSelectionStyleOverridesItemStyle controls whether the selection style replaces the item's // existing ANSI styling. When true (default), the selected item is stripped of its original // styling and the selection style is applied to all non-highlighted regions. When false, // the item keeps its original styling and the selection style is applied only to unstyled regions. func WithSelectionStyleOverridesItemStyle[T Object](overrides bool) Option[T] { return func(m *Model[T]) { m.config.selectionStyleOverridesItemStyle = overrides } } // WithFileSaving configures automatic file saving when a hotkey is pressed. // Files are saved to the specified directory with timestamp-based names. func WithFileSaving[T Object](saveDir string, saveKey key.Binding) Option[T] { return func(m *Model[T]) { m.config.saveDir = saveDir m.config.saveKey = saveKey } } // Model represents a viewport component type Model[T Object] struct { // content manages the content and selection state content *contentManager[T] // display handles rendering display *displayManager // navigation manages keyboard input and navigation logic navigation *navigationManager // config manages configuration options config *configuration } // New creates a new viewport model with reasonable defaults func New[T Object](width, height int, opts ...Option[T]) (m *Model[T]) { if width < 0 { width = 0 } if height < 0 { height = 0 } m = &Model[T]{} m.content = newContentManager[T]() m.display = newDisplayManager(width, height, DefaultStyles()) m.navigation = newNavigationManager(DefaultKeyMap()) m.config = newConfiguration() for _, opt := range opts { if opt != nil { opt(m) } } return m } // Update processes messages and updates the model func (m *Model[T]) Update(msg tea.Msg) (*Model[T], tea.Cmd) { var ( cmd tea.Cmd cmds []tea.Cmd ) // route all messages to filename textinput when actively entering filename if m.config.saveState.enteringFilename { if keyMsg, ok := msg.(tea.KeyPressMsg); ok { switch keyMsg.Code { case tea.KeyEnter: filename := m.config.saveState.filenameInput.Value() if filename == "" { filename = time.Now().Format("20060102-150405") + ".txt" } else if !strings.HasSuffix(filename, ".txt") { filename += ".txt" } m.config.saveState.enteringFilename = false m.config.saveState.saving = true return m, m.saveToFile(filename) case tea.KeyEscape: m.config.saveState.enteringFilename = false return m, nil } } // forward all non-KeyMsg messages to textinput (e.g. cursor blink) m.config.saveState.filenameInput, cmd = m.config.saveState.filenameInput.Update(msg) return m, cmd } switch msg := msg.(type) { case tea.KeyMsg: if key.Matches(msg, m.config.saveKey) { saveDirDefined := m.config.saveDir != "" saving := m.config.saveState.saving showingResult := m.config.saveState.showingResult enteringFilename := m.config.saveState.enteringFilename if !saveDirDefined || saving || showingResult || enteringFilename { return m, nil } ti := textinput.New() ti.Placeholder = time.Now().Format("20060102-150405") + ".txt" ti.Focus() ti.CharLimit = 256 ti.SetWidth(m.display.bounds.width - 20) m.config.saveState.filenameInput = ti m.config.saveState.enteringFilename = true return m, textinput.Blink } case fileSavedMsg: // update save state with result m.config.saveState.saving = false m.config.saveState.showingResult = true if msg.err != nil { m.config.saveState.isError = true m.config.saveState.resultMsg = fmt.Sprintf("Save failed: %v", msg.err) } else { m.config.saveState.isError = false m.config.saveState.resultMsg = fmt.Sprintf("Saved to %s", msg.filename) } // start 4 second timer to clear result cmd = func() tea.Msg { time.Sleep(4 * time.Second) return clearSaveResultMsg{} } cmds = append(cmds, cmd) return m, tea.Batch(cmds...) case clearSaveResultMsg: // clear the save result display m.config.saveState.showingResult = false m.config.saveState.resultMsg = "" m.config.saveState.isError = false return m, nil } // handle navigation for KeyMsg if keyMsg, ok := msg.(tea.KeyMsg); ok { navCtx := navigationContext{ wrapText: m.config.wrapText, dimensions: m.display.bounds, numContentLines: m.getNumContentLines(), numVisibleItems: m.getNumVisibleItems(), } navResult := m.navigation.processKeyMsg(keyMsg, navCtx) switch navResult.action { case actionUp: if m.navigation.selectionEnabled { m.SetSelectedItemIdx(m.content.getSelectedIdx() - navResult.selectionAmount) } else { m.scrollDownLines(-navResult.scrollAmount) } case actionDown: if m.navigation.selectionEnabled { m.SetSelectedItemIdx(m.content.getSelectedIdx() + navResult.selectionAmount) } else { m.scrollDownLines(navResult.scrollAmount) } case actionLeft: if !m.config.wrapText { m.SetXOffset(m.display.xOffset - navResult.scrollAmount) } case actionRight: if !m.config.wrapText { m.SetXOffset(m.display.xOffset + navResult.scrollAmount) } case actionHalfPageUp, actionPageUp: m.scrollDownLines(-navResult.scrollAmount) if m.navigation.selectionEnabled { m.SetSelectedItemIdx(m.content.getSelectedIdx() - navResult.selectionAmount) } case actionHalfPageDown, actionPageDown: m.scrollDownLines(navResult.scrollAmount) if m.navigation.selectionEnabled { m.SetSelectedItemIdx(m.content.getSelectedIdx() + navResult.selectionAmount) } case actionTop: if m.navigation.selectionEnabled { m.SetSelectedItemIdx(0) } else { m.display.topItemIdx = 0 m.display.topItemLineOffset = 0 } case actionBottom: if m.navigation.selectionEnabled { m.SetSelectedItemIdx(m.content.getSelectedIdx() + m.content.numItems()) } else { maxItemIdx, maxTopLineOffset := m.maxItemIdxAndMaxTopLineOffset() m.display.setTopItemIdxAndOffset(maxItemIdx, maxTopLineOffset) } default: // no-op on keypress that doesn't produce a selection action } } cmds = append(cmds, cmd) return m, tea.Batch(cmds...) } // View renders the viewport func (m *Model[T]) View() string { var builder strings.Builder wrap := m.config.wrapText visibleHeaderLines := m.getVisibleHeaderLines() itemIndexes := m.getVisibleContentItemIndexes() // pre-allocate capacity based on estimated size estimatedSize := (len(visibleHeaderLines) + len(itemIndexes) + 10) * (m.display.bounds.width + 1) builder.Grow(estimatedSize) // header lines for i := range visibleHeaderLines { headerItem := item.NewItem(visibleHeaderLines[i]) line, _ := headerItem.Take(0, m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}) builder.WriteString(line) builder.WriteByte('\n') } // render post-header line if set if m.config.postHeaderLine != "" { postHeaderItem := item.NewItem(m.config.postHeaderLine) truncated, _ := postHeaderItem.Take(0, m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}) builder.WriteString(truncated) builder.WriteByte('\n') } // content lines — render each visible line using segment-aware logic. // An item may have multiple line-broken segments (via LineBrokenItems()), each rendered // on a separate terminal line and wrapping independently. truncatedVisibleContentLines := make([]string, len(itemIndexes)) // selection prefix: when selection is enabled and a prefix is configured, // prepend the prefix to selected lines and equivalent padding to others cw := m.contentWidth() hasPrefix := m.navigation.selectionEnabled && m.display.styles.SelectionPrefix != "" prefixPad := m.selectionPrefixPadding() // segment tracking state for multi-line items var currentSegments []item.Item currentSegIdx := 0 currentCellsToLeft := 0 prevItemIdx := -1 // initialize segment state for the first visible item if wrap && len(itemIndexes) > 0 { topItem := m.content.objects[itemIndexes[0]].GetItem() currentSegments = topItem.LineBrokenItems() var wrapOffset int currentSegIdx, wrapOffset = decomposeLineOffset(currentSegments, m.display.topItemLineOffset, cw) currentCellsToLeft = wrapOffset * cw prevItemIdx = itemIndexes[0] } for idx, itemIdx := range itemIndexes { // when we encounter a new item, refresh segment tracking if itemIdx != prevItemIdx { fullItem := m.content.objects[itemIdx].GetItem() currentSegments = fullItem.LineBrokenItems() currentSegIdx = 0 currentCellsToLeft = 0 prevItemIdx = itemIdx } var truncated string isSelection := m.navigation.selectionEnabled && itemIdx == m.content.getSelectedIdx() // get highlights for this item and remap to current segment highlights := m.getHighlightsForItem(itemIdx) if isSelection && m.config.selectionStyleOverridesItemStyle { highlights = m.selectionHighlights(itemIdx, highlights) } highlights = remapHighlightsForSegment(highlights, currentSegments, currentSegIdx) // get the current segment to render segment := currentSegments[currentSegIdx] // when selection style overrides item style, use a stripped segment (no ANSI) so only // highlight styling applies, preventing original content styling from leaking through if isSelection && m.config.selectionStyleOverridesItemStyle { segment = item.NewItem(segment.ContentNoAnsi()) } if wrap { var widthTaken int truncated, widthTaken = segment.Take( currentCellsToLeft, cw, "", highlights, ) // advance segment tracking for next iteration if idx+1 < len(itemIndexes) && itemIndexes[idx+1] == itemIdx { currentCellsToLeft += widthTaken if currentCellsToLeft >= segment.Width() { currentSegIdx++ currentCellsToLeft = 0 } } } else { // non-wrapped: render segment with horizontal panning truncated, _ = segment.Take( m.display.xOffset, cw, m.config.continuationIndicator, highlights, ) } if isSelection && !m.config.selectionStyleOverridesItemStyle { truncated = m.styleSelection(truncated) } pannedRight := m.display.xOffset > 0 segmentHasWidth := segment.Width() > 0 pannedPastAllWidth := lipgloss.Width(truncated) == 0 if !wrap && pannedRight && segmentHasWidth && pannedPastAllWidth { // if panned right past where line ends, show continuation indicator continuation := item.NewItem(m.config.continuationIndicator) truncated, _ = continuation.Take(0, cw, "", []item.Highlight{}) if isSelection { truncated = m.display.styles.SelectedItemStyle.Render(item.StripAnsi(truncated)) } } if isSelection && lipgloss.Width(truncated) == 0 { // ensure selection is visible even if line empty truncated = m.display.styles.SelectedItemStyle.Render(" ") } // prepend selection prefix or padding if hasPrefix { if isSelection { truncated = m.display.styles.SelectionPrefix + truncated } else { truncated = prefixPad + truncated } } truncatedVisibleContentLines[idx] = truncated } for i := range truncatedVisibleContentLines { builder.WriteString(truncatedVisibleContentLines[i]) builder.WriteByte('\n') } nVisibleLines := len(itemIndexes) padCount := max(0, m.getNumContentLines()-nVisibleLines) for range padCount { builder.WriteByte('\n') } // render pre-footer line if set if m.config.preFooterLine != "" { preFooterItem := item.NewItem(m.config.preFooterLine) truncated, _ := preFooterItem.Take(0, m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}) builder.WriteString(truncated) builder.WriteByte('\n') } if m.config.saveState.enteringFilename { // show filename input in footer prompt := "Save as: " inputView := m.config.saveState.filenameInput.View() footerContent := prompt + inputView footerItem := item.NewItem(footerContent) truncated, _ := footerItem.Take(0, m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}) builder.WriteString(m.display.styles.FooterStyle.Render(truncated)) } else if m.config.saveState.saving || m.config.saveState.showingResult { // show save status footer var statusMsg string if m.config.saveState.saving { statusMsg = "Saving..." } else if m.config.saveState.showingResult { statusMsg = m.config.saveState.resultMsg } statusItem := item.NewItem(statusMsg) truncated, _ := statusItem.Take(0, m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}) styledMsg := m.display.styles.FooterStyle.Render(truncated) builder.WriteString(styledMsg) } else if m.config.footerEnabled { // pad so footer shows up at bottom builder.WriteString(m.getTruncatedFooterLine(itemIndexes)) } return m.display.render(strings.TrimSuffix(builder.String(), "\n")) } // SetObjects sets the objects func (m *Model[T]) SetObjects(objects []T) { var initialNumLinesAboveSelection int var stayAtTop, stayAtBottom bool var prevSelection T if m.navigation.selectionEnabled { if inView := m.selectionInViewInfo(); inView.numLinesSelectionInView > 0 { initialNumLinesAboveSelection = inView.numLinesAboveSelection } currentItems := m.content.objects selectedIdx := m.content.getSelectedIdx() if m.navigation.topSticky && len(currentItems) > 0 && selectedIdx == 0 { stayAtTop = true } else if m.navigation.bottomSticky && (len(currentItems) == 0 || (selectedIdx == len(currentItems)-1)) { stayAtBottom = true } else if m.content.compareFn != nil && 0 <= selectedIdx && selectedIdx < len(currentItems) { prevSelection = currentItems[selectedIdx] } } else { if m.navigation.topSticky && m.isScrolledToTop() { stayAtTop = true } else if m.navigation.bottomSticky && m.isScrolledToBottom() { stayAtBottom = true } } m.content.objects = objects // ensure scroll position is valid given new Item m.safelySetTopItemIdxAndOffset(m.display.topItemIdx, m.display.topItemLineOffset) // ensure xOffset is valid given new Item m.SetXOffset(m.display.xOffset) if m.navigation.selectionEnabled { if stayAtTop { m.content.setSelectedIdx(0) } else if stayAtBottom { m.content.setSelectedIdx(max(0, m.content.numItems()-1)) m.scrollSoSelectionInView() } else if m.content.compareFn != nil { // TODO: could flag when items are sorted & comparable and use binary search instead found := false items := m.content.objects for i := range items { if m.content.compareFn(items[i], prevSelection) { m.content.setSelectedIdx(i) found = true break } } if !found { m.content.setSelectedIdx(0) } } // when staying at bottom, just want to scroll so selection in view, which is done above if !stayAtBottom { m.content.selectedIdx = clampValZeroToMax(m.content.selectedIdx, len(m.content.objects)-1) m.scrollSoSelectionInView() if inView := m.selectionInViewInfo(); inView.numLinesSelectionInView > 0 { deltaLinesAbove := initialNumLinesAboveSelection - inView.numLinesAboveSelection m.scrollDownLines(-deltaLinesAbove) } } } else { if stayAtTop { m.display.setTopItemIdxAndOffset(0, 0) } else if stayAtBottom { maxItemIdx, maxTopLineOffset := m.maxItemIdxAndMaxTopLineOffset() m.display.setTopItemIdxAndOffset(maxItemIdx, maxTopLineOffset) } } } // SetTopSticky sets whether selection should stay at top when new Item added and selection is at the top func (m *Model[T]) SetTopSticky(topSticky bool) { m.navigation.topSticky = topSticky } // SetBottomSticky sets whether selection should stay at bottom when new Item added and selection is at the bottom func (m *Model[T]) SetBottomSticky(bottomSticky bool) { m.navigation.bottomSticky = bottomSticky } // SetSelectionEnabled sets whether the viewport allows line selection func (m *Model[T]) SetSelectionEnabled(selectionEnabled bool) { wasEnabled := m.navigation.selectionEnabled m.navigation.selectionEnabled = selectionEnabled // when enabling selection, set the selected item to the top visible item and ensure the top line is in view if selectionEnabled && !wasEnabled && !m.content.isEmpty() { topVisibleItemIdx := clampValZeroToMax(m.display.topItemIdx, m.content.numItems()-1) m.content.setSelectedIdx(topVisibleItemIdx) m.scrollSoSelectionInView() } } // SetFooterEnabled sets whether the viewport shows the footer when it overflows func (m *Model[T]) SetFooterEnabled(footerEnabled bool) { m.config.footerEnabled = footerEnabled } // SetProgressBarEnabled sets whether the footer displays a Unicode progress bar in the footer func (m *Model[T]) SetProgressBarEnabled(enabled bool) { m.config.progressBarEnabled = enabled } // SetPostHeaderLine sets a line to render just below the header. // Pass empty string to disable. The line will be truncated to viewport width. func (m *Model[T]) SetPostHeaderLine(line string) { m.config.postHeaderLine = line } // SetPreFooterLine sets a line to render just above the footer. // Pass empty string to disable. The line will be truncated to viewport width. func (m *Model[T]) SetPreFooterLine(line string) { m.config.preFooterLine = line } // GetPreFooterLine returns the current pre-footer line. func (m *Model[T]) GetPreFooterLine() string { return m.config.preFooterLine } // SetSelectionComparator sets the comparator function for maintaining the current selection when Item changes. // If compareFn is non-nil, the viewport will try to maintain the current selection when Item changes. func (m *Model[T]) SetSelectionComparator(compareFn CompareFn[T]) { m.content.compareFn = compareFn } // GetSelectionEnabled returns whether the viewport allows line selection func (m *Model[T]) GetSelectionEnabled() bool { return m.navigation.selectionEnabled } // IsCapturingInput returns true when the viewport is in a mode that should capture all input // (e.g., filename entry for saving). Callers should forward all messages to the viewport // without processing them when this returns true. func (m *Model[T]) IsCapturingInput() bool { return m.config.saveState.enteringFilename } // SetWrapText sets whether the viewport wraps text func (m *Model[T]) SetWrapText(wrapText bool) { var initialNumLinesAboveSelection int if m.navigation.selectionEnabled { if inView := m.selectionInViewInfo(); inView.numLinesSelectionInView > 0 { initialNumLinesAboveSelection = inView.numLinesAboveSelection } } m.config.wrapText = wrapText m.display.topItemLineOffset = 0 m.display.xOffset = 0 if m.navigation.selectionEnabled { m.scrollSoSelectionInView() if inView := m.selectionInViewInfo(); inView.numLinesSelectionInView > 0 { deltaLinesAbove := initialNumLinesAboveSelection - inView.numLinesAboveSelection m.scrollDownLines(-deltaLinesAbove) m.scrollSoSelectionInView() } } m.safelySetTopItemIdxAndOffset(m.display.topItemIdx, m.display.topItemLineOffset) } // GetWrapText returns whether the viewport wraps text func (m *Model[T]) GetWrapText() bool { return m.config.wrapText } // SetWidth sets the viewport's width func (m *Model[T]) SetWidth(width int) { m.setWidthHeight(width, m.display.bounds.height) } // GetWidth returns the viewport width func (m *Model[T]) GetWidth() int { return m.display.bounds.width } // SetHeight sets the viewport's height, including header and footer func (m *Model[T]) SetHeight(height int) { m.setWidthHeight(m.display.bounds.width, height) } // GetHeight returns the viewport height func (m *Model[T]) GetHeight() int { return m.display.bounds.height } // SetStyles sets the styling for the viewport func (m *Model[T]) SetStyles(styles Styles) { m.display.styles = styles } // GetTopItemIdxAndLineOffset returns the current top item index and line offset within that item func (m *Model[T]) GetTopItemIdxAndLineOffset() (int, int) { return m.display.topItemIdx, m.display.topItemLineOffset } // SetSelectedItemIdx sets the selected context index. Automatically puts selection in view as necessary func (m *Model[T]) SetSelectedItemIdx(selectedItemIdx int) { if !m.navigation.selectionEnabled { return } m.content.setSelectedIdx(selectedItemIdx) m.scrollSoSelectionInView() } // GetSelectedItemIdx returns the currently selected item index func (m *Model[T]) GetSelectedItemIdx() int { if !m.navigation.selectionEnabled { return 0 } return m.content.getSelectedIdx() } // GetSelectedItem returns a pointer to the currently selected item func (m *Model[T]) GetSelectedItem() *T { if !m.navigation.selectionEnabled { return nil } return m.content.getSelectedItem() } // SetHeader sets the header, an unselectable set of lines at the top of the viewport func (m *Model[T]) SetHeader(header []string) { m.content.header = header } // EnsureItemInView scrolls or pans the viewport so that the specified portion of an item is visible. // If the desired item portion is above or below the current view, it scrolls vertically to bring it into view, leaving // verticalPad number of lines of context if possible. // If the desired item portion is to the left or right of the current view, it pans horizontally to bring it into view, // leaving horizontalPad number of columns of context if possible. // Afterwards, it's possible that the selection is out of view of the viewport. func (m *Model[T]) EnsureItemInView(itemIdx, startWidth, endWidth, verticalPad, horizontalPad int) { if m.display.bounds.width == 0 { return } if m.content.isEmpty() { m.safelySetTopItemIdxAndOffset(0, 0) return } itemIdx, startWidth, endWidth = m.clampItemAndWidthParams(itemIdx, startWidth, endWidth) if m.config.wrapText { m.ensureWrappedPortionInView(itemIdx, startWidth, endWidth, verticalPad) } else { m.ensureUnwrappedItemVerticallyInView(itemIdx, verticalPad) m.ensureUnwrappedPortionHorizontallyInView(startWidth, endWidth, horizontalPad) } } // clampItemAndWidthParams clamps itemIdx, startWidth, and endWidth to valid ranges func (m *Model[T]) clampItemAndWidthParams(itemIdx, startWidth, endWidth int) (int, int, int) { itemIdx = max(0, min(itemIdx, m.content.numItems()-1)) itemWidth := m.content.objects[itemIdx].GetItem().Width() startWidth = max(0, min(startWidth, itemWidth)) endWidth = max(startWidth, min(endWidth, itemWidth)) return itemIdx, startWidth, endWidth } // ensureWrappedPortionInView ensures the specified portion is visible in wrapped mode func (m *Model[T]) ensureWrappedPortionInView(itemIdx, startWidth, endWidth, verticalPad int) { if !m.config.wrapText { panic("ensureWrappedPortionInView called when wrapText is false") } viewportWidth := m.contentWidth() segments := m.content.objects[itemIdx].GetItem().LineBrokenItems() startLineOffset := lineOffsetForCellPosition(segments, startWidth, viewportWidth) endLineOffset := lineOffsetForCellPosition(segments, max(0, endWidth-1), viewportWidth) if endWidth == 0 { endLineOffset = 0 } numLinesInPortion := endLineOffset - startLineOffset + 1 numContentLines := m.getNumContentLines() // portion larger than viewport: align top with padding if possible if numLinesInPortion >= numContentLines { desiredLinesAbove := min(verticalPad, numContentLines-1) if startLineOffset >= desiredLinesAbove { m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset-desiredLinesAbove) } else { // need to scroll up to previous items to get padding m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset) m.scrollDownLines(-desiredLinesAbove) } return } // check if already in view before any scroll-direction-based positioning // this prevents oscillation when scrollingDown changes between calls portionStartInView, portionEndInView, linesAbovePortion, linesBelowPortion := m.getWrappedPortionViewInfo(itemIdx, startLineOffset, endLineOffset) // if fully visible, check if position is already acceptable if portionStartInView && portionEndInView { // when padding can't be satisfied on both sides, check if already centered if verticalPad*2+numLinesInPortion > numContentLines { // only skip repositioning if already approximately centered (within 1 line) // this prevents oscillation while still allowing initial centering desiredPadding := numContentLines / 2 paddingDiff := linesAbovePortion - linesBelowPortion if paddingDiff < 0 { paddingDiff = -paddingDiff } if paddingDiff <= 1 || (linesAbovePortion >= desiredPadding-1 && linesBelowPortion >= desiredPadding-1) { return } // not centered, fall through to scroll-direction-based repositioning below } else { // padding can be satisfied on both sides desiredPad := min(verticalPad, numContentLines-numLinesInPortion) // already fully visible, check if padding is respected if linesAbovePortion >= desiredPad && linesBelowPortion >= desiredPad { return } // adjust position to ensure padding on the side that needs it if linesBelowPortion < desiredPad { // insufficient padding below, position to add more padding below linesToGoBack := numContentLines - 1 - desiredPad if endLineOffset >= linesToGoBack { m.safelySetTopItemIdxAndOffset(itemIdx, endLineOffset-linesToGoBack) } else { targetItemIdx, targetOffset := m.getItemIdxAbove(itemIdx, endLineOffset, linesToGoBack-endLineOffset) m.safelySetTopItemIdxAndOffset(targetItemIdx, targetOffset) } } else { // insufficient padding above, position to add more padding above if startLineOffset >= desiredPad { m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset-desiredPad) } else { targetItemIdx, targetOffset := m.getItemIdxAbove(itemIdx, startLineOffset, desiredPad-startLineOffset) m.safelySetTopItemIdxAndOffset(targetItemIdx, targetOffset) } } return } } // not visible, position based on scrolling direction scrollingDown := m.targetBelowTop(itemIdx, startLineOffset) // when padding can't be satisfied on both sides, center based on scroll direction if verticalPad*2+numLinesInPortion > numContentLines { desiredPadding := numContentLines / 2 if scrollingDown { // scrolling down: leave desiredPadding lines below m.safelySetTopItemIdxAndOffset(itemIdx, endLineOffset) linesFromTarget := m.linesBetweenCurrentTopAndTarget(itemIdx, endLineOffset) linesToScrollUp := max(0, numContentLines-1-desiredPadding-linesFromTarget) m.scrollDownLines(-linesToScrollUp) } else { // scrolling up: leave desiredPadding lines above if startLineOffset >= desiredPadding { m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset-desiredPadding) } else { m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset) m.scrollDownLines(-desiredPadding) } } return } desiredPad := min(verticalPad, numContentLines-numLinesInPortion) if scrollingDown { // scrolling down: leave desiredPad lines below m.safelySetTopItemIdxAndOffset(itemIdx, endLineOffset) linesFromTarget := m.linesBetweenCurrentTopAndTarget(itemIdx, endLineOffset) linesToScrollUp := max(0, numContentLines-1-desiredPad-linesFromTarget) m.scrollDownLines(-linesToScrollUp) } else { // scrolling up: leave desiredPad lines above if startLineOffset >= desiredPad { m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset-desiredPad) } else { m.safelySetTopItemIdxAndOffset(itemIdx, startLineOffset) m.scrollDownLines(-desiredPad) } } } // getWrappedPortionViewInfo returns whether the portion is in view and padding information func (m *Model[T]) getWrappedPortionViewInfo(itemIdx, startLineOffset, endLineOffset int) (portionStartInView, portionEndInView bool, linesAbove, linesBelow int) { if !m.config.wrapText { panic("getWrappedPortionViewInfo called when wrapText is false") } itemIndexes := m.getVisibleContentItemIndexes() itemFirstSeenAt := -1 portionStartPos := -1 portionEndPos := -1 for i, visibleItemIdx := range itemIndexes { if visibleItemIdx == itemIdx { if itemFirstSeenAt == -1 { itemFirstSeenAt = i } lineOffsetInItem := i - itemFirstSeenAt if m.display.topItemIdx == itemIdx && itemFirstSeenAt == 0 { lineOffsetInItem += m.display.topItemLineOffset } if lineOffsetInItem == startLineOffset { portionStartInView = true portionStartPos = i } if lineOffsetInItem == endLineOffset { portionEndInView = true portionEndPos = i } } } if portionStartInView { linesAbove = portionStartPos } if portionEndInView { linesBelow = len(itemIndexes) - portionEndPos - 1 } return portionStartInView, portionEndInView, linesAbove, linesBelow } // targetBelowTop checks if a target item & line is below the current top of viewport func (m *Model[T]) targetBelowTop(targetItemIdx, targetStartLineOffset int) bool { if m.display.topItemIdx < targetItemIdx { return true } if m.display.topItemIdx == targetItemIdx && m.display.topItemLineOffset < targetStartLineOffset { return true } return false } // linesBetweenCurrentTopAndTarget calculates how many lines separate current top line from target position func (m *Model[T]) linesBetweenCurrentTopAndTarget(targetItemIdx, targetLineOffset int) int { if m.display.topItemIdx > targetItemIdx { panic("current top item index is after target item index") } if m.display.topItemIdx == targetItemIdx { return targetLineOffset - m.display.topItemLineOffset } // count lines from top item to target linesFromTarget := m.numLinesForItem(m.display.topItemIdx) - m.display.topItemLineOffset for idx := m.display.topItemIdx + 1; idx < targetItemIdx; idx++ { linesFromTarget += m.numLinesForItem(idx) } linesFromTarget += targetLineOffset return linesFromTarget } // ensureUnwrappedItemVerticallyInView scrolls vertically to bring item into view func (m *Model[T]) ensureUnwrappedItemVerticallyInView(itemIdx, verticalPad int) { if m.config.wrapText { panic("ensureUnwrappedItemVerticallyInView called when wrapText is true") } itemIndexes := m.getVisibleContentItemIndexes() numContentLines := m.getNumContentLines() // check if already visible visiblePosition := -1 for i, visibleItemIdx := range itemIndexes { if visibleItemIdx == itemIdx { visiblePosition = i break } } itemInBottomHalfOfViewport := m.display.topItemIdx+numContentLines/2 <= itemIdx // when padding can't be satisfied on both sides, center the item if verticalPad*2+1 > numContentLines { desiredPadding := numContentLines / 2 if itemInBottomHalfOfViewport { // leave desiredPadding lines below targetTopItemIdx := max(0, itemIdx-numContentLines+1+desiredPadding) m.safelySetTopItemIdxAndOffset(targetTopItemIdx, 0) } else { // leave desiredPadding lines above targetTopItemIdx := max(0, itemIdx-desiredPadding) m.safelySetTopItemIdxAndOffset(targetTopItemIdx, 0) } return } desiredPad := min(verticalPad, numContentLines-1) if visiblePosition >= 0 { // item is visible, check if padding is respected linesAbove := visiblePosition linesBelow := len(itemIndexes) - visiblePosition - 1 if linesAbove >= desiredPad && linesBelow >= desiredPad { return } if itemInBottomHalfOfViewport { targetTopItemIdx := max(0, itemIdx-numContentLines+1+desiredPad) m.safelySetTopItemIdxAndOffset(targetTopItemIdx, 0) } else { targetTopItemIdx := max(0, itemIdx-desiredPad) m.safelySetTopItemIdxAndOffset(targetTopItemIdx, 0) } return } // not visible, position based on item position if itemInBottomHalfOfViewport { // leave desiredPad lines below targetTopItemIdx := max(0, itemIdx-numContentLines+1+desiredPad) m.safelySetTopItemIdxAndOffset(targetTopItemIdx, 0) } else { // leave desiredPad lines above targetTopItemIdx := max(0, itemIdx-desiredPad) m.safelySetTopItemIdxAndOffset(targetTopItemIdx, 0) } } // ensureUnwrappedPortionHorizontallyInView pans horizontally to bring portion into view func (m *Model[T]) ensureUnwrappedPortionHorizontallyInView(startWidth, endWidth, horizontalPad int) { if m.config.wrapText { panic("ensureUnwrappedPortionHorizontallyInView called when wrapText is true") } viewportWidth := m.contentWidth() currentXOffset := m.display.xOffset visibleStartWidth := currentXOffset + 1 visibleEndWidth := currentXOffset + viewportWidth portionStartInView := startWidth >= visibleStartWidth && startWidth <= visibleEndWidth portionEndInView := endWidth >= visibleStartWidth && endWidth <= visibleEndWidth portionWidth := endWidth - startWidth panningRight := startWidth > visibleStartWidth // portion wider than viewport: align left edge with padding if portionWidth > viewportWidth { desiredColumnsLeft := min(horizontalPad, viewportWidth-1) targetXOffset := max(0, startWidth-desiredColumnsLeft) m.SetXOffset(targetXOffset) return } // when padding can't be satisfied on both sides, center the portion if horizontalPad*2+portionWidth > viewportWidth { desiredColumnsLeft := (viewportWidth - portionWidth) / 2 targetXOffset := max(0, startWidth-desiredColumnsLeft) m.SetXOffset(targetXOffset) return } desiredPad := min(horizontalPad, viewportWidth-portionWidth) if portionStartInView && portionEndInView { // already fully visible, check if padding is respected columnsLeft := startWidth - currentXOffset columnsRight := currentXOffset + viewportWidth - endWidth if columnsLeft >= desiredPad && columnsRight >= desiredPad { return } // adjust position based on panning direction if panningRight { targetXOffset := max(0, endWidth+desiredPad-viewportWidth) m.SetXOffset(targetXOffset) } else { targetXOffset := max(0, startWidth-desiredPad) m.SetXOffset(targetXOffset) } return } // not visible, position based on panning direction if panningRight { // panning right: leave desiredPad columns to the right targetXOffset := max(0, endWidth+desiredPad-viewportWidth) m.SetXOffset(targetXOffset) } else { // panning left: leave desiredPad columns to the left targetXOffset := max(0, startWidth-desiredPad) m.SetXOffset(targetXOffset) } } // SetXOffset sets the horizontal offset, in terminal cell width, for panning when text wrapping is disabled func (m *Model[T]) SetXOffset(widthOffset int) { if m.config.wrapText { return } maxXOffset := m.maxItemWidth() - m.contentWidth() m.display.xOffset = max(0, min(maxXOffset, widthOffset)) } // GetXOffsetWidth returns the horizontal offset, in terminal cell width, for panning when text wrapping is disabled func (m *Model[T]) GetXOffsetWidth() int { if m.config.wrapText { return 0 } return m.display.xOffset } // SetHighlights sets specific positions to highlight with custom styles in the viewport. func (m *Model[T]) SetHighlights(highlights []Highlight) { m.content.setHighlights(highlights) } // GetHighlights returns all highlights. func (m *Model[T]) GetHighlights() []Highlight { return m.content.getHighlights() } func (m *Model[T]) maxItemWidth() int { if m.config.wrapText { panic("maxItemWidth should not be called when wrapping is enabled") } maxLineWidth := 0 headerLines := m.getVisibleHeaderLines() for i := range headerLines { if w := lipgloss.Width(headerLines[i]); w > maxLineWidth { maxLineWidth = w } } // check content line widths without fully rendering all of them if !m.content.isEmpty() { items := m.content.objects startIdx := clampValZeroToMax(m.display.topItemIdx, m.content.numItems()-1) numItemsToCheck := min(m.content.numItems()-startIdx, m.display.bounds.height) for i := range numItemsToCheck { itemIdx := startIdx + i if itemIdx >= m.content.numItems() { break } currItem := items[itemIdx].GetItem() if w := currItem.Width(); w > maxLineWidth { maxLineWidth = w } } } return maxLineWidth } func (m *Model[T]) numLinesForItem(itemIdx int) int { if !m.config.wrapText { return 1 } cw := m.contentWidth() if cw == 0 { return 0 } if m.content.isEmpty() || itemIdx < 0 || itemIdx >= m.content.numItems() { return 0 } items := m.content.objects return items[itemIdx].GetItem().NumWrappedLines(cw) } // contentWidth returns the width available for rendering content items. // When selection is enabled and a SelectionPrefix is configured, the prefix // reduces the available content width. Headers, footers, and other chrome // use the full bounds.width instead. func (m *Model[T]) contentWidth() int { if m.navigation.selectionEnabled && m.display.styles.SelectionPrefix != "" { pw := lipgloss.Width(m.display.styles.SelectionPrefix) return max(0, m.display.bounds.width-pw) } return m.display.bounds.width } // selectionPrefixPadding returns whitespace the same width as SelectionPrefix. func (m *Model[T]) selectionPrefixPadding() string { if m.display.styles.SelectionPrefix == "" { return "" } return strings.Repeat(" ", lipgloss.Width(m.display.styles.SelectionPrefix)) } func (m *Model[T]) setWidthHeight(width, height int) { if m.display.bounds.width == width && m.display.bounds.height == height { return } m.display.setBounds(rectangle{width: width, height: height}) m.safelySetTopItemIdxAndOffset(m.display.topItemIdx, m.display.topItemLineOffset) if m.navigation.selectionEnabled { m.scrollSoSelectionInView() } } func (m *Model[T]) safelySetTopItemIdxAndOffset(topItemIdx, topItemLineOffset int) { maxTopItemIdx, maxTopItemLineOffset := m.maxItemIdxAndMaxTopLineOffset() if topItemIdx < 0 { topItemIdx = 0 topItemLineOffset = 0 } if topItemIdx > maxTopItemIdx { topItemIdx = maxTopItemIdx topItemLineOffset = maxTopItemLineOffset } if topItemIdx == maxTopItemIdx { topItemLineOffset = clampValZeroToMax(topItemLineOffset, maxTopItemLineOffset) } m.display.setTopItemIdxAndOffset(topItemIdx, topItemLineOffset) } // getNumContentLines returns the number of lines of between the header and footer/pre-footer func (m *Model[T]) getNumContentLines() int { return m.display.getNumContentLines(len(m.getVisibleHeaderLines()), m.config.postHeaderLine != "", m.config.preFooterLine != "", true) } func (m *Model[T]) scrollSoSelectionInView() { if !m.navigation.selectionEnabled { panic("scrollSoSelectionInView called when selection is not enabled") } selectedItem := m.content.getSelectedItem() if selectedItem == nil { return } selectedItemWidth := (*selectedItem).GetItem().Width() startWidth := 0 endWidth := selectedItemWidth if !m.config.wrapText && m.display.xOffset > 0 { if selectedItemWidth < m.display.xOffset { // ensure the selection is visible by scrolling, but maintain xOffset if possible prevXOffset := m.display.xOffset m.EnsureItemInView(m.content.selectedIdx, 0, 0, 0, 0) m.SetXOffset(prevXOffset) return } startWidth = m.display.xOffset endWidth = m.display.xOffset + m.contentWidth() - 1 } m.EnsureItemInView(m.content.selectedIdx, startWidth, endWidth, 0, 0) } // getItemIdxAbove consumes n lines by moving up through items, returning the final item index and line offset func (m *Model[T]) getItemIdxAbove(startItemIdx, startLineOffset, linesToConsume int) (finalItemIdx, finalLineOffset int) { itemIdx := startItemIdx lineOffset := startLineOffset remaining := linesToConsume for remaining > 0 { itemIdx-- if itemIdx < 0 { return 0, 0 } numLinesInItem := m.numLinesForItem(itemIdx) if remaining <= numLinesInItem { return itemIdx, numLinesInItem - remaining } remaining -= numLinesInItem } return itemIdx, lineOffset } // getItemIdxBelow consumes n lines by moving down through items, returning the final item index and line offset func (m *Model[T]) getItemIdxBelow(startItemIdx, linesToConsume int) (finalItemIdx, finalLineOffset int) { itemIdx := startItemIdx remaining := linesToConsume for remaining > 0 { itemIdx++ if itemIdx >= m.content.numItems() { return m.content.numItems() - 1, 0 } numLinesInItem := m.numLinesForItem(itemIdx) if remaining <= numLinesInItem { return itemIdx, remaining - 1 } remaining -= numLinesInItem } return itemIdx, 0 } // scrollDownLines edits topItemIdx and topItemLineOffset to scroll the viewport by n lines (negative for up, positive for down) func (m *Model[T]) scrollDownLines(numLinesDown int) { if numLinesDown == 0 { return } // scrolling down past bottom if numLinesDown > 0 && m.isScrolledToBottom() { return } // scrolling up past top if numLinesDown < 0 && m.isScrolledToTop() { return } newTopItemIdx, newTopItemLineOffset := m.display.topItemIdx, m.display.topItemLineOffset if !m.config.wrapText { newTopItemIdx = m.display.topItemIdx + numLinesDown } else { // wrapped if numLinesDown < 0 { // scrolling up if newTopItemLineOffset >= -numLinesDown { // same item, just change offset newTopItemLineOffset += numLinesDown } else { // need to scroll up through multiple items linesToConsume := -numLinesDown - newTopItemLineOffset newTopItemIdx, newTopItemLineOffset = m.getItemIdxAbove(newTopItemIdx, newTopItemLineOffset, linesToConsume) } } else { // scrolling down numLinesInTopItem := m.numLinesForItem(newTopItemIdx) if newTopItemLineOffset+numLinesDown < numLinesInTopItem { // same item, just change offset newTopItemLineOffset += numLinesDown } else { // need to scroll down through multiple items linesToConsume := numLinesDown - (numLinesInTopItem - (newTopItemLineOffset + 1)) newTopItemIdx, newTopItemLineOffset = m.getItemIdxBelow(newTopItemIdx, linesToConsume) } } } m.safelySetTopItemIdxAndOffset(newTopItemIdx, newTopItemLineOffset) m.SetXOffset(m.display.xOffset) } // getVisibleHeaderLines returns the lines of header that are visible in the viewport as strings. // header lines will take precedence over content and footer if there is not enough vertical height func (m *Model[T]) getVisibleHeaderLines() []string { if m.display.bounds.height == 0 { return nil } headerItems := make([]item.Item, len(m.content.header)) for i := range m.content.header { headerItems[i] = item.NewItem(m.content.header[i]) } itemIndexes := m.getItemIndexesSpanningLines( 0, 0, m.display.bounds.height, len(headerItems), func(idx int) item.Item { return headerItems[idx] }, m.display.bounds.width, // headers use full viewport width ) headerLines := make([]string, len(itemIndexes)) currentItemIdxWidthToLeft := 0 for idx, itemIdx := range itemIndexes { var truncated string if m.config.wrapText { currentItemIdx := itemIndexes[idx] var widthTaken int truncated, widthTaken = headerItems[itemIdx].Take( currentItemIdxWidthToLeft, m.display.bounds.width, "", []item.Highlight{}, // no highlights for header ) if idx+1 < len(itemIndexes) { nextItemIdx := itemIndexes[idx+1] if nextItemIdx != currentItemIdx { currentItemIdxWidthToLeft = 0 } else { currentItemIdxWidthToLeft += widthTaken } } } else { // if not wrapped, items are not yet truncated or highlighted truncated, _ = headerItems[itemIdx].Take( 0, // header doesn't pan horizontally m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}, // no highlights for header ) } headerLines[idx] = truncated } return headerLines } // getVisibleContentItemIndexes returns the item indexes of content that are visible in the viewport func (m *Model[T]) getVisibleContentItemIndexes() []int { if m.display.bounds.width == 0 || m.content.isEmpty() { return nil } linesUsedByHeader := len(m.getVisibleHeaderLines()) if m.config.postHeaderLine != "" { linesUsedByHeader++ // post-header } numLinesAfterHeader := max(0, m.display.bounds.height-linesUsedByHeader) itemIndexes := m.getItemIndexesSpanningLines( m.display.topItemIdx, m.display.topItemLineOffset, numLinesAfterHeader, m.content.numItems(), func(idx int) item.Item { return m.content.objects[idx].GetItem() }, m.contentWidth(), // content uses narrower width when selection prefix is configured ) if len(itemIndexes) == 0 { return nil } reservedLines := 0 if m.config.footerEnabled { reservedLines++ // footer } if m.config.preFooterLine != "" { reservedLines++ // pre-footer } if reservedLines > 0 { itemIndexes = safeSliceUpToIdx(itemIndexes, numLinesAfterHeader-reservedLines) } return itemIndexes } // getItemIndexesSpanningLines returns the item indexes for each line given a top item index, offset and num lines. // wrapWidth is the width used for wrapping calculations (content width for content, bounds width for headers). func (m *Model[T]) getItemIndexesSpanningLines( topItemIdx int, topItemLineOffset int, totalNumLines int, numItems int, getItem func(int) item.Item, wrapWidth int, ) []int { if numItems == 0 || totalNumLines == 0 { return nil } var itemIndexes []int addLine := func(itemIndex int) bool { itemIndexes = append(itemIndexes, itemIndex) return len(itemIndexes) == totalNumLines } currItemIdx := clampValZeroToMax(topItemIdx, numItems-1) currItem := getItem(currItemIdx) done := totalNumLines == 0 if done { return itemIndexes } if m.config.wrapText { // first item has potentially fewer lines depending on the line offset numLines := max(0, currItem.NumWrappedLines(wrapWidth)-topItemLineOffset) for range numLines { // adding untruncated, unstyled items done = addLine(currItemIdx) if done { break } } for !done { currItemIdx++ if currItemIdx >= numItems { done = true } else { currItem = getItem(currItemIdx) numLines = currItem.NumWrappedLines(wrapWidth) for range numLines { // adding untruncated, unstyled items done = addLine(currItemIdx) if done { break } } } } } else { done = addLine(currItemIdx) for !done { currItemIdx++ if currItemIdx >= numItems { done = true } else { done = addLine(currItemIdx) } } } return itemIndexes } func (m *Model[T]) getTruncatedFooterLine(visibleContentItemIndexes []int) string { numerator := m.content.getSelectedIdx() + 1 // 0 indexed denominator := m.content.numItems() if denominator == 0 { return "" } if !m.config.footerEnabled { panic("getTruncatedFooterLine called when footer should not be shown") } if len(visibleContentItemIndexes) == 0 { return "" } var footerString string var percentScrolled int // if selection is disabled, numerator should be item index of bottom visible line if !m.navigation.selectionEnabled { numerator = visibleContentItemIndexes[len(visibleContentItemIndexes)-1] + 1 if m.config.wrapText && numerator == denominator && !m.isScrolledToBottom() { // if wrapped && bottom visible line is max item index, but actually not fully scrolled to bottom, show 99% percentScrolled = 99 footerString = fmt.Sprintf("99%% (%d/%d)", numerator, denominator) } } if footerString == "" { percentScrolled = percent(numerator, denominator) footerString = fmt.Sprintf("%d%% (%d/%d)", percentScrolled, numerator, denominator) } if m.config.progressBarEnabled { barSpace := m.display.bounds.width - len(footerString) - 1 if barSpace >= 3 { barWidth := min(10, barSpace) footerString = buildProgressBar(percentScrolled, barWidth) + " " + footerString } } footerItem := item.NewItem(footerString) f, _ := footerItem.Take(0, m.display.bounds.width, m.config.continuationIndicator, []item.Highlight{}) return m.display.styles.FooterStyle.Render(f) } func (m *Model[T]) isScrolledToBottom() bool { maxItemIdx, maxTopItemLineOffset := m.maxItemIdxAndMaxTopLineOffset() if m.display.topItemIdx > maxItemIdx { return true } if m.display.topItemIdx == maxItemIdx { return m.display.topItemLineOffset >= maxTopItemLineOffset } return false } // isScrolledToTop returns true if the viewport is scrolled to the very top func (m *Model[T]) isScrolledToTop() bool { return m.display.topItemIdx == 0 && m.display.topItemLineOffset == 0 } type selectionInViewInfoResult struct { numLinesSelectionInView int numLinesAboveSelection int } func (m *Model[T]) selectionInViewInfo() selectionInViewInfoResult { if !m.navigation.selectionEnabled { panic("selectionInViewInfo called when selection is disabled") } itemIndexes := m.getVisibleContentItemIndexes() numLinesSelectionInView := 0 numLinesAboveSelection := 0 assignedNumLinesAboveSelection := false for i := range itemIndexes { if itemIndexes[i] == m.content.getSelectedIdx() { if !assignedNumLinesAboveSelection { numLinesAboveSelection = i assignedNumLinesAboveSelection = true } numLinesSelectionInView++ } } return selectionInViewInfoResult{ numLinesSelectionInView: numLinesSelectionInView, numLinesAboveSelection: numLinesAboveSelection, } } func (m *Model[T]) maxItemIdxAndMaxTopLineOffset() (int, int) { numItems := m.content.numItems() if numItems == 0 { return 0, 0 } headerLines := len(m.getVisibleHeaderLines()) if m.config.postHeaderLine != "" { headerLines++ // post-header } reservedLines := 1 // footer if m.config.preFooterLine != "" { reservedLines++ // pre-footer } numContentLines := max(0, m.display.bounds.height-headerLines-reservedLines) if !m.config.wrapText { return max(0, numItems-numContentLines), 0 } // wrapped maxTopItemIdx, maxTopItemLineOffset := numItems-1, 0 numLinesLastItem := m.numLinesForItem(numItems - 1) if numContentLines <= numLinesLastItem { // last item takes up whole screen or more, adjust offset accordingly maxTopItemLineOffset = numLinesLastItem - numContentLines } else { // need to scroll up through multiple items to fill the screen linesToConsume := numContentLines - numLinesLastItem maxTopItemIdx, maxTopItemLineOffset = m.getItemIdxAbove(maxTopItemIdx, maxTopItemLineOffset, linesToConsume) } return max(0, maxTopItemIdx), max(0, maxTopItemLineOffset) } // getHighlightsForItem returns highlights for the specific item index func (m *Model[T]) getHighlightsForItem(itemIndex int) []item.Highlight { return m.content.getItemHighlightsForItem(itemIndex) } func (m *Model[T]) getNumVisibleItems() int { if !m.config.wrapText { return m.getNumContentLines() } itemIndexes := m.getVisibleContentItemIndexes() // return distinct number of items itemIndexSet := make(map[int]struct{}) for _, i := range itemIndexes { itemIndexSet[i] = struct{}{} } return len(itemIndexSet) } // selectionHighlights returns highlights that fill gaps between existing match // highlights with the selection style, so that the selection background covers // the entire item while match highlights remain visible on top. func (m *Model[T]) selectionHighlights(itemIdx int, matchHighlights []item.Highlight) []item.Highlight { itemLen := len(m.content.objects[itemIdx].GetItem().ContentNoAnsi()) if itemLen == 0 { return matchHighlights } // sort match highlights by start position sorted := make([]item.Highlight, len(matchHighlights)) copy(sorted, matchHighlights) for i := range sorted { for j := i + 1; j < len(sorted); j++ { if sorted[j].ByteRangeUnstyledContent.Start < sorted[i].ByteRangeUnstyledContent.Start { sorted[i], sorted[j] = sorted[j], sorted[i] } } } // fill gaps between match highlights with selection style var result []item.Highlight pos := 0 for _, h := range sorted { if h.ByteRangeUnstyledContent.Start > pos { result = append(result, item.Highlight{ Style: m.display.styles.SelectedItemStyle, ByteRangeUnstyledContent: item.ByteRange{Start: pos, End: h.ByteRangeUnstyledContent.Start}, }) } result = append(result, h) pos = h.ByteRangeUnstyledContent.End } if pos < itemLen { result = append(result, item.Highlight{ Style: m.display.styles.SelectedItemStyle, ByteRangeUnstyledContent: item.ByteRange{Start: pos, End: itemLen}, }) } return result } // styleSelection applies the selection style to unstyled portions of the string, // preserving any existing ANSI styling. Used when selectionStyleOverridesItemStyle is false. func (m *Model[T]) styleSelection(selection string) string { split := surroundingAnsiRegex.Split(selection, -1) matches := surroundingAnsiRegex.FindAllString(selection, -1) var builder strings.Builder builder.Grow(len(selection)) for i, section := range split { if section != "" { builder.WriteString(m.display.styles.SelectedItemStyle.Render(section)) } if i < len(split)-1 && i < len(matches) { builder.WriteString(matches[i]) } } return builder.String() } // fileSavedMsg is returned when file saving completes. type fileSavedMsg struct { filename string // full path to saved file err error // error if save failed, nil on success } // clearSaveResultMsg is sent after some seconds to clear the save result display type clearSaveResultMsg struct{} // saveToFile saves all viewport objects to a file with the given filename. func (m *Model[T]) saveToFile(filename string) tea.Cmd { return func() tea.Msg { // create directory if needed if err := os.MkdirAll(m.config.saveDir, 0750); err != nil { return fileSavedMsg{err: fmt.Errorf("failed to create directory %s: %w", m.config.saveDir, err)} } fullPath := filepath.Join(m.config.saveDir, filename) // collect content without ANSI codes var content strings.Builder for _, obj := range m.content.objects { content.WriteString(obj.GetItem().ContentNoAnsi()) content.WriteString("\n") } if err := os.WriteFile(fullPath, []byte(content.String()), 0600); err != nil { return fileSavedMsg{err: fmt.Errorf("failed to write file: %w", err)} } return fileSavedMsg{filename: fullPath, err: nil} } } // decomposeLineOffset converts a line offset within an item into // (segmentIdx, wrapOffset) given the item's line-broken items. // segmentIdx is which line-broken item, wrapOffset is how many wrapped lines // into that segment. For single-line items: returns (0, lineOffset). func decomposeLineOffset(segments []item.Item, lineOffset, wrapWidth int) (segmentIdx, wrapOffset int) { remaining := lineOffset for i, seg := range segments { n := seg.NumWrappedLines(wrapWidth) if remaining < n { return i, remaining } remaining -= n } if len(segments) == 0 { return 0, 0 } return len(segments) - 1, 0 } // remapHighlightsForSegment clips and adjusts highlight byte ranges from the full // item's content space to a specific line-broken item's content space. // Highlights that don't overlap the segment are dropped. func remapHighlightsForSegment(highlights []item.Highlight, segments []item.Item, segIdx int) []item.Highlight { if len(segments) <= 1 { // single-segment item: highlights are already in the right space return highlights } // compute byte offset of this segment in the full concatenated content startByte := 0 for i := range segIdx { startByte += len(segments[i].ContentNoAnsi()) startByte++ // \n separator } endByte := startByte + len(segments[segIdx].ContentNoAnsi()) var result []item.Highlight for _, h := range highlights { br := h.ByteRangeUnstyledContent if br.End <= startByte || br.Start >= endByte { continue } adjusted := h adjusted.ByteRangeUnstyledContent.Start = max(0, br.Start-startByte) adjusted.ByteRangeUnstyledContent.End = min(endByte-startByte, br.End-startByte) result = append(result, adjusted) } return result } // lineOffsetForCellPosition converts a cumulative cell position across // line-broken items into a line offset. For single-line items: cellPos / wrapWidth. func lineOffsetForCellPosition(segments []item.Item, cellPos, wrapWidth int) int { if len(segments) <= 1 || wrapWidth <= 0 { if wrapWidth <= 0 { return 0 } return cellPos / wrapWidth } cumCells := 0 lineOffset := 0 for _, seg := range segments { segWidth := seg.Width() if cumCells+segWidth > cellPos { if wrapWidth > 0 { lineOffset += (cellPos - cumCells) / wrapWidth } return lineOffset } cumCells += segWidth lineOffset += seg.NumWrappedLines(wrapWidth) } return max(0, lineOffset-1) } func percent(a, b int) int { if b == 0 { return 100 } return int(float32(a) / float32(b) * 100) } // buildProgressBar returns a string of exactly barWidth cells using U+2588 (█) // for the filled portion and U+2591 (░) for the empty portion. func buildProgressBar(percentScrolled, barWidth int) string { if barWidth <= 0 { return "" } filled := min(int(float64(barWidth)*float64(percentScrolled)/100.0), barWidth) return strings.Repeat("█", filled) + strings.Repeat("░", barWidth-filled) } func safeSliceUpToIdx[T any](s []T, i int) []T { if i > len(s) { return s } if i < 0 { return []T{} } return s[:i] } func clampValZeroToMax(v, maximum int) int { return max(0, min(maximum, v)) } ================================================ FILE: modules/viewport/viewport_multiline_test.go ================================================ package viewport import ( "testing" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) // multiLineObject wraps a MultiLineItem for use in viewport tests type multiLineObject struct { item item.Item } func (o multiLineObject) GetItem() item.Item { return o.item } var _ Object = multiLineObject{} func setMixedContent(vp *Model[object], items []item.Item) { objects := make([]object, len(items)) for i := range items { objects[i] = object{item: items[i]} } vp.SetObjects(objects) } func TestViewport_MultiLine_WrapOn_Basic(t *testing.T) { w, h := 15, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // Object 0: multi-line item with 3 segments // Object 1: regular single-line item setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("{"), item.NewItem(" \"k\": \"val\""), // 12 cells, fits in 15-wide viewport item.NewItem("}"), ), item.NewItem("single line"), }) expectedView := internal.Pad(w, h, []string{ "header", internal.BlueFg.Render("{"), // segment 0 (selected) internal.BlueFg.Render(" \"k\": \"val\""), // segment 1 (selected, 12 cells) internal.BlueFg.Render("}"), // segment 2 (selected) "single line", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_SelectionMovement(t *testing.T) { w, h := 20, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("line one"), item.NewItem("line two"), ), item.NewItem("after"), }) // Initially selected: first item (multi-line) expectedView := internal.Pad(w, h, []string{ "header", internal.BlueFg.Render("line one"), internal.BlueFg.Render("line two"), "after", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // Move selection down to "after" vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(w, h, []string{ "header", "line one", "line two", internal.BlueFg.Render("after"), "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_EmptySegment(t *testing.T) { w, h := 20, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // Multi-line item with an empty segment in the middle setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("above"), item.NewItem(""), item.NewItem("below"), ), }) expectedView := internal.Pad(w, h, []string{ "header", internal.BlueFg.Render("above"), internal.BlueFg.Render(" "), // empty segment shows selection marker internal.BlueFg.Render("below"), "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_SegmentWrapping(t *testing.T) { w, h := 10, 8 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // Each segment wraps independently setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("abcdefghij12"), // 12 cells, wraps to 2 lines at width 10 item.NewItem("xyz"), // 3 cells, 1 line ), }) expectedView := internal.Pad(w, h, []string{ "header", internal.BlueFg.Render("abcdefghij"), // segment 0, line 1 internal.BlueFg.Render("12"), // segment 0, line 2 internal.BlueFg.Render("xyz"), // segment 1 "", "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_ScrollDown(t *testing.T) { w, h := 20, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("seg1"), item.NewItem("seg2"), item.NewItem("seg3"), ), item.NewItem("next item"), }) // Initial view: header + 3 segment lines, fills viewport expectedView := internal.Pad(w, h, []string{ "header", internal.BlueFg.Render("seg1"), internal.BlueFg.Render("seg2"), internal.BlueFg.Render("seg3"), "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // Scroll down to next item vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(w, h, []string{ "header", "seg2", "seg3", internal.BlueFg.Render("next item"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_NoSelection(t *testing.T) { w, h := 20, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(false) setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("first"), item.NewItem("second"), ), item.NewItem("third"), }) expectedView := internal.Pad(w, h, []string{ "header", "first", "second", "third", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_SingleLineItemsUnchanged(t *testing.T) { // Verify that single-line items behave identically with the new code w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_MultiLine_WrapOn_MultipleMultiLineItems(t *testing.T) { w, h := 20, 8 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setMixedContent(vp, []item.Item{ item.NewMultiLineItem( item.NewItem("a1"), item.NewItem("a2"), ), item.NewMultiLineItem( item.NewItem("b1"), item.NewItem("b2"), ), }) // First multi-line item selected expectedView := internal.Pad(w, h, []string{ "header", internal.BlueFg.Render("a1"), internal.BlueFg.Render("a2"), "b1", "b2", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // Move down to second multi-line item vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(w, h, []string{ "header", "a1", "a2", internal.BlueFg.Render("b1"), internal.BlueFg.Render("b2"), "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } ================================================ FILE: modules/viewport/viewport_no_selection_no_wrap_test.go ================================================ package viewport import ( "strconv" "testing" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) func TestViewport_SelectionOff_WrapOff_Empty(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{}) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeader([]string{"header"}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"header"}) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SmolDimensions(t *testing.T) { w, h := 0, 0 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{"hi"}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(1) vp.SetHeight(1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"."}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(2) vp.SetHeight(2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"..", ""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(3) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"...", "hi", "..."}) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_Basic(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), internal.RedFg.Render("a") + " really rea...", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_GetConfigs(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first", "second", }) if selectionEnabled := vp.GetSelectionEnabled(); selectionEnabled { t.Errorf("expected selection to be disabled, got %v", selectionEnabled) } if wrapText := vp.GetWrapText(); wrapText { t.Errorf("expected text wrapping to be disabled, got %v", wrapText) } if selectedItemIdx := vp.GetSelectedItemIdx(); selectedItemIdx != 0 { t.Errorf("expected selected item index to be 0, got %v", selectedItemIdx) } if selectedItem := vp.GetSelectedItem(); selectedItem != nil { t.Errorf("expected selected item to be nil, got %v", selectedItem) } } func TestViewport_SelectionOff_WrapOff_ShowFooter(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), internal.RedFg.Render("a") + " really rea...", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(7) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), internal.RedFg.Render("a") + " really rea...", "", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_FooterStyle(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h, WithStyles[object](Styles{ FooterStyle: internal.RedFg, SelectedItemStyle: selectionStyle, })) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "1", "2", "3", "4", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "1", "2", "3", internal.RedFg.Render("75% (3/4)"), }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_FooterDisabled(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first line", "second line", "third line", "fourth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", "second line", "third line", "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetFooterEnabled(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", "second line", "third line", "fourth line", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SpaceAround(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ " first line ", " first line ", " first line ", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " first li...", " fi...", " ...", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_MultiHeader(t *testing.T) { w, h := 15, 2 vp := newViewport(w, h) vp.SetHeader([]string{"header1", "header2"}) setContent(vp, []string{ "line1", "line2", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", "line2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", "line2", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_OverflowLine(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"long header overflows"}) setContent(vp, []string{ "123456789012345", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "long header ...", "123456789012345", "123456789012...", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_OverflowHeight(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "123456789012345", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "123456789012345", "123456789012...", "123456789012...", "123456789012...", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_Scrolling(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) doSetContent := func() { setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", "66% (4/6)", }) validate(expectedView) // scrolling up past top is no-op vp, _ = vp.Update(upKeyMsg) validate(expectedView) // scrolling down by one vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "third", "fourth", "fifth", "83% (5/6)", }) validate(expectedView) // scrolling down by one again vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "fifth", "sixth", "100% (6/6)", }) validate(expectedView) // scrolling down past bottom when at bottom is no-op vp, _ = vp.Update(downKeyMsg) validate(expectedView) } func TestViewport_SelectionOff_WrapOff_EnsureItemInView(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth line that is really long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(5, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", "sixth l...", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(5, len("sixth line"), len("sixth line "), 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "...h", "...h li...", // 's|ixth line ' "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(5, len("sixth line that is really lon"), len("sixth line that is really long"), 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "...", "...ly long", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(1, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "third", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(4, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fourth", "fifth", "83% (5/6)", }) internal.CmpStr(t, expectedView, vp.View()) // ensure idempotence vp.EnsureItemInView(4, 0, 0, 0, 0) internal.CmpStr(t, expectedView, vp.View()) // invalid values truncated vp.EnsureItemInView(4, -1, 1e9, 0, 0) internal.CmpStr(t, expectedView, vp.View()) // full width ok vp.EnsureItemInView(4, 0, len("fifth"), 0, 0) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_EnsureItemInViewVerticalPad(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) numItems := 100 nums := make([]string, 0, numItems) for i := range numItems { nums = append(nums, strconv.Itoa(i+1)) } setContent(vp, nums) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "1", "2", "3", "4", "4% (4/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "5" with verticalPad=1 // should leave 1 line of context below vp.EnsureItemInView(4, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "3", "4", "5", "6", "6% (6/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to "3" with verticalPad=1 // should leave 1 line of context above vp.EnsureItemInView(2, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "2", "3", "4", "5", "5% (5/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to visible "8" with verticalPad=2 // should leave 2 lines of context above vp.EnsureItemInView(9, 0, 0, 0, 0) // reset to bottom vp.EnsureItemInView(7, 0, 0, 2, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "6", "7", "8", "9", "9% (9/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "99", not enough content below for verticalPad=3 // pad below as much as possible vp.EnsureItemInView(0, 0, 0, 0, 0) // reset to top vp.EnsureItemInView(98, 0, 0, 3, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "97", "98", "99", "100", "100% (1...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "50", request more padding than is available given viewport height -> center item vp.EnsureItemInView(0, 0, 0, 0, 0) // reset to top vp.EnsureItemInView(49, 0, 0, 3, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "49", "50", "51", "52", "52% (52...", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_EnsureItemInViewHorizontalPad(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "some line that is really long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "some li...", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: pan right to space after "line" with horizontalPad=2 // should leave 2 columns of padding to the right vp.EnsureItemInView(0, 0, 0, 0, 0) // reset to top vp.EnsureItemInView(0, len("some line"), len("some line "), 0, 2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "...line...", // 'so|me line_th|at is really long' "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: pan to the visible "me" of "some" with horizontalPad=1 // should leave 1 column of context to the left vp.EnsureItemInView(0, len("so"), len("some"), 0, 1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "... lin...", // 's|o__ line t|hat is really long' "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: pan right to the " r" of "is really" with huge horizontalPad // should center the target portion horizontally vp.EnsureItemInView(0, len("some line that is"), len("some line that is r"), 0, 100) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "...s re...", // 'some line tha|t is__eall|y long' "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SetXOffset(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "the first line", "the second line", }) initialExpectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the fir...", "the sec...", "", "100% (2/2)", }) internal.CmpStr(t, initialExpectedView, vp.View()) vp.SetXOffset(-1) internal.CmpStr(t, initialExpectedView, vp.View()) vp.SetXOffset(0) internal.CmpStr(t, initialExpectedView, vp.View()) vp.SetXOffset(4) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "...st line", "...ond ...", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(1000) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "...t line ", "...nd line", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_BulkScrolling(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) // half page down vp, _ = vp.Update(halfPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fourth", "fifth", "83% (5/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", "sixth", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) // half page up vp, _ = vp.Update(halfPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "third", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) // go to bottom vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", "sixth", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // go to top vp, _ = vp.Update(goToTopKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_Panning(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header long"}) doSetContent := func() { setContent(vp, []string{ "first line that is fairly long", "second line that is even much longer than the first", "third line that is fairly long", "fourth", "fifth line that is fairly long", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "first l...", "second ...", "third l...", "fourth", "66% (4/6)", }) validate(expectedView) // pan right vp.SetXOffset(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ne t...", "...ine ...", "...ne t...", ".", "66% (4/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ine ...", "...ne t...", ".", "...ne t...", "83% (5/6)", }) validate(expectedView) // pan all the way right vp.SetXOffset(41) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...e first", "...", "...", "...", "83% (5/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ly long", "...", "...ly long", "...", "100% (6/6)", }) validate(expectedView) // set shorter Item setContent(vp, []string{ "the first one", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...rst one", "", "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_ChangeHeight(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll to bottom vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "fifth", "sixth", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", "fifth", "sixth", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_ChangeContent(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fourth", "fifth", "sixth", "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // remove Item setContent(vp, []string{}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) // re-add Item setContent(vp, []string{ "first", "second", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SetSelectionEnabled_SetsTopVisibleItem(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp.SetSelectionEnabled(true) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ internal.BlueFg.Render("third"), "fourth", "fifth", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SetHighlights(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", }) highlights := []Highlight{ { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 10, }, Style: internal.RedFg, }, }, { ItemIndex: 2, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 9, }, Style: internal.GreenFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first line", "the " + internal.RedFg.Render("second") + " line", "the " + internal.GreenFg.Render("third") + " line", "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SetHighlightsStyledContent(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) setContent(vp, []string{ internal.RedFg.Render("the first line"), internal.GreenFg.Render("the second line"), internal.BlueFg.Render("the third line"), internal.RedFg.Render("the fourth line"), }) highlights := []Highlight{ { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 10, }, Style: internal.BlueFg, }, }, { ItemIndex: 2, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 9, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.RedFg.Render("the first line"), internal.GreenFg.Render("the ") + internal.BlueFg.Render("second") + internal.GreenFg.Render(" line"), internal.BlueFg.Render("the ") + internal.RedFg.Render("third") + internal.BlueFg.Render(" line"), "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_SetHighlightsAnsiUnicode(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) // A (1w, 1b), 💖 (2w, 4b), 中 (2w, 3b), é (1w, 3b) = 6w, 11b vp.SetHeader([]string{"A💖中é"}) setContent(vp, []string{ "A💖中é line", "another line", }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 1, End: 8, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "A💖中é", "A" + internal.RedFg.Render("💖中") + "é line", "another line", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } ================================================ FILE: modules/viewport/viewport_no_selection_wrap_test.go ================================================ package viewport import ( "strconv" "strings" "testing" "time" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) func TestViewport_SelectionOff_WrapOn_Empty(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetWrapText(true) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{}) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeader([]string{"header"}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"header"}) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SmolDimensions(t *testing.T) { w, h := 0, 0 vp := newViewport(w, h) vp.SetWrapText(true) vp.SetHeader([]string{"header"}) setContent(vp, []string{"hi"}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(1) vp.SetHeight(1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"h"}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(2) vp.SetHeight(2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"he", "ad"}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(3) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"hea", "der", ""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(4) vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"head", "er", "hi", "1..."}) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_Basic(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_GetConfigs(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "first", "second", }) if selectionEnabled := vp.GetSelectionEnabled(); selectionEnabled { t.Errorf("expected selection to be disabled, got %v", selectionEnabled) } if wrapText := vp.GetWrapText(); !wrapText { t.Errorf("expected text wrapping to be enabled, got %v", wrapText) } if selectedItemIdx := vp.GetSelectedItemIdx(); selectedItemIdx != 0 { t.Errorf("expected selected item index to be 0, got %v", selectedItemIdx) } if selectedItem := vp.GetSelectedItem(); selectedItem != nil { t.Errorf("expected selected item to be nil, got %v", selectedItem) } } func TestViewport_SelectionOff_WrapOn_ShowFooter(t *testing.T) { w, h := 15, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), internal.RedFg.Render("a") + " really really", "99% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), internal.RedFg.Render("a") + " really really", " long line", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(9) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), internal.RedFg.Render("a") + " really really", " long line", "", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_FooterStyle(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h, WithStyles[object](Styles{ FooterStyle: internal.RedFg, SelectedItemStyle: selectionStyle, })) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "1", "2", "3", "4", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "1", "2", "3", internal.RedFg.Render("75% (3/4)"), }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_FooterDisabled(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "first line", "second line", "third line", "fourth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", "second line", "third line", "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetFooterEnabled(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", "second line", "third line", "fourth line", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SpaceAround(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ " first line ", " first line ", " first line ", }) // trailing space is not trimmed expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " first line ", "", " first", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_MultiHeader(t *testing.T) { w, h := 15, 2 vp := newViewport(w, h) vp.SetHeader([]string{"header1", "header2"}) vp.SetWrapText(true) setContent(vp, []string{ "line1", "line2", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", "line2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", "line2", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_OverflowLine(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"long header overflows"}) vp.SetWrapText(true) setContent(vp, []string{ "123456789012345", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "long header ove", "rflows", "123456789012345", "123456789012345", "6", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_OverflowHeight(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "123456789012345", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "123456789012345", "123456789012345", "6", "123456789012345", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_Scrolling(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) doSetContent := func() { setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", "66% (4/6)", }) validate(expectedView) // scrolling up past top is no-op vp, _ = vp.Update(upKeyMsg) validate(expectedView) // scrolling down by one vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "third", "fourth", "fifth", "83% (5/6)", }) validate(expectedView) // scrolling down by one again vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "fifth", "sixth", "100% (6/6)", }) validate(expectedView) // scrolling down past bottom when at bottom is no-op vp, _ = vp.Update(downKeyMsg) validate(expectedView) } func TestViewport_SelectionOff_WrapOn_EnsureItemInView(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line that is super long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first ", "line", "the second", " line", "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(2, 0, 9, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "the second", " line", "the third", "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the fourth", " line that", " is super ", "long", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(1, len("the second"), len("the second line"), 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " line", "the third ", "line", "the fourth", "99% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(0, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first ", "line", "the second", " line", "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(3, 0, len("the fourth line that is super "), 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "the fourth", " line that", " is super ", "99% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_EnsureItemInViewVerticalPad(t *testing.T) { w, h := 10, 10 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) numItems := 100 nums := make([]string, 0, numItems) for i := range numItems { nums = append(nums, strconv.Itoa(i+1)) } setContent(vp, nums) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "1", "2", "3", "4", "5", "6", "7", "8", "8% (8/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "10" with verticalPad=1 // should leave 1 line of context below vp.EnsureItemInView(9, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "4", "5", "6", "7", "8", "9", "10", "11", "11% (11...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to "5" with verticalPad=1 // should leave 1 line of context above vp.EnsureItemInView(4, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "4", "5", "6", "7", "8", "9", "10", "11", "11% (11...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "15" with verticalPad=2 // should leave 2 lines of context above vp.EnsureItemInView(99, 0, 0, 0, 0) // reset to bottom vp.EnsureItemInView(14, 0, 0, 2, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "13", "14", "15", "16", "17", "18", "19", "20", "20% (20...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "99", not enough content below for verticalPad=3 // pad below as much as possible vp.EnsureItemInView(0, 0, 0, 0, 0) // reset to top vp.EnsureItemInView(98, 0, 0, 3, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "93", "94", "95", "96", "97", "98", "99", "100", "100% (1...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "50", request more padding than is available given viewport height -> center item vp.EnsureItemInView(0, 0, 0, 0, 0) // reset to top vp.EnsureItemInView(49, 0, 0, 5, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "47", "48", "49", "50", "51", "52", "53", "54", "54% (54...", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_EnsureItemInViewHorizontalPad(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "some line that is really long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "some line ", "that is re", "ally long", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: ensure "line " is visible with horizontalPad=2 // in wrap mode, horizontal padding ensures character ranges are visible vp.EnsureItemInView(0, 0, 0, 0, 0) // reset vp.EnsureItemInView(0, len("some line"), len("some line "), 0, 2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "some line ", "that is re", "ally long", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: ensure "really" is visible with horizontalPad=1 vp.EnsureItemInView(0, len("some line that is "), len("some line that is really"), 0, 1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "some line ", "that is re", "ally long", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: ensure end of string is visible with large horizontalPad vp.EnsureItemInView(0, len("some line that is really lon"), len("some line that is really long"), 0, 100) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "some line ", "that is re", "ally long", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SetXOffset(t *testing.T) { w, h := 10, 8 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "the first line", "the second line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first ", "line", "the second", " line", "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(-1) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(0) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(4) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(1000) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_BulkScrolling(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "the first line", "the second line", "the third line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first", "line", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", " line", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // half page down vp, _ = vp.Update(halfPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " line", "the third ", "99% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the third ", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", " line", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // half page up vp, _ = vp.Update(halfPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "the second", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first", "line", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // go to bottom vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the third ", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // go to top vp, _ = vp.Update(goToTopKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first", "line", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_Panning(t *testing.T) { w, h := 10, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header long"}) vp.SetWrapText(true) doSetContent := func() { setContent(vp, []string{ "first line that is fairly long", "second line that is even much longer than the first", "third line that is fairly long", "fourth", "fifth line that is fairly long", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "first line", " that is f", "airly long", "second lin", "33% (2/6)", }) validate(expectedView) // pan right vp.SetXOffset(5) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", " that is f", "airly long", "second lin", "e that is ", "33% (2/6)", }) validate(expectedView) // pan all the way right vp.SetXOffset(41) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "airly long", "second lin", "e that is", "even much", "33% (2/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "second lin", "e that is ", "even much ", "longer tha", "33% (2/6)", }) validate(expectedView) } func TestViewport_SelectionOff_WrapOn_ChangeHeight(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "the first line", "the second line", "the third line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first", "line", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to bottom vp, _ = vp.Update(fullPgDownKeyMsg) vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the third", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the third", "99% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first", "line", "the second", " line", "the third", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_ChangeContent(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "the first line", "the second line", "the third line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first", "line", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to bottom vp, _ = vp.Update(fullPgDownKeyMsg) vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the third", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // remove Item setContent(vp, []string{ "the first line", "the second line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", " line", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", " line", "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) // remove all Item setContent(vp, []string{}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SuperLongWrappedLine(t *testing.T) { runTest := func(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "smol", strings.Repeat("12345678", 1000000), "smol", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "smol", "1234567812", "3456781234", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "1234567812", "3456781234", "5678123456", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "3456781234", "5678123456", "7812345678", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "5678123456", "7812345678", "smol", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } internal.RunWithTimeout(t, runTest, 500*time.Millisecond) } func TestViewport_SelectionOff_WrapOn_EnableSelectionShowsTopLineInItem(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetWrapText(true) setContent(vp, []string{ "short", "this is a very long line", "another short line", "final line", }) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "very long ", "line", "another sh", "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetSelectionEnabled(true) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ internal.BlueFg.Render("this is a "), internal.BlueFg.Render("very long "), internal.BlueFg.Render("line"), "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SetHighlights(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ "first", "second line that wraps", "third", }) highlights := []Highlight{ { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 0, End: 6, }, Style: internal.RedFg, }, }, { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 12, End: 16, }, Style: internal.GreenFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", internal.RedFg.Render("second") + " lin", "e " + internal.GreenFg.Render("that") + " wra", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SetHighlightsStyledContent(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) setContent(vp, []string{ internal.GreenFg.Render("first"), internal.BlueFg.Render("second line that wraps"), internal.RedFg.Render("third"), }) highlights := []Highlight{ { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 0, End: 6, }, Style: internal.RedFg, }, }, { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 12, End: 16, }, Style: internal.GreenFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.GreenFg.Render("first"), internal.RedFg.Render("second") + internal.BlueFg.Render(" lin"), internal.BlueFg.Render("e ") + internal.GreenFg.Render("that") + internal.BlueFg.Render(" wra"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_SetHighlightsAnsiUnicode(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"A💖中é"}) vp.SetWrapText(true) setContent(vp, []string{ "A💖中é text that wraps", "another line", }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 1, End: 8, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "A💖中é", "A" + internal.RedFg.Render("💖中") + "é tex", "t that wra", "ps", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } ================================================ FILE: modules/viewport/viewport_postheader_test.go ================================================ package viewport import ( "testing" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestPostHeaderLineWithFooterEnabled(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", }) // Without post-header: 3 content lines + footer expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // With post-header: post-header + 2 content lines + footer (height 5 - 1 post-header - 1 footer = 3 content) vp.SetPostHeaderLine("Post-header text") expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header text", "line 1", "line 2", "line 3", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineWithFooterDisabled(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", }) vp.SetFooterEnabled(false) // Without post-header expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "", "", }) internal.CmpStr(t, expectedView, vp.View()) // With post-header (still renders even though footer disabled) vp.SetPostHeaderLine("Post-header text") expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header text", "line 1", "line 2", "line 3", "", }) internal.CmpStr(t, expectedView, vp.View()) } func TestEmptyPostHeaderLine(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", }) // Empty post-header means no extra line rendered vp.SetPostHeaderLine("") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "line 4", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineTruncation(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Set post-header longer than viewport width vp.SetPostHeaderLine("This is a very long post-header line that exceeds the width") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "This is a ve...", "line 1", "line 2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineSmallHeight(t *testing.T) { w, h := 20, 3 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", }) // Height 3 with footer and post-header: 1 post-header + 1 content + 1 footer vp.SetPostHeaderLine("Post-header") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header", "line 1", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineWithHeader(t *testing.T) { w, h := 30, 7 vp := newViewport(w, h) vp.SetHeader([]string{"Header"}) setContent(vp, []string{ "line 1", "line 2", "line 3", }) // Height 7 with header, post-header, footer: 1 header + 1 post-header + 3 content + 1 padding + 1 footer vp.SetPostHeaderLine("Post-header") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Header", "Post-header", "line 1", "line 2", "line 3", "", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineDynamicToggle(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", }) // Initially no post-header expectedNoPostHeader := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "line 4", "100% (4/4)", }) internal.CmpStr(t, expectedNoPostHeader, vp.View()) // Set post-header vp.SetPostHeaderLine("Post-header") expectedWithPostHeader := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header", "line 1", "line 2", "line 3", "75% (3/4)", }) internal.CmpStr(t, expectedWithPostHeader, vp.View()) // Remove post-header vp.SetPostHeaderLine("") internal.CmpStr(t, expectedNoPostHeader, vp.View()) // Set post-header again with different text vp.SetPostHeaderLine("Different post-header") expectedDifferent := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Different post-header", "line 1", "line 2", "line 3", "75% (3/4)", }) internal.CmpStr(t, expectedDifferent, vp.View()) } func TestPostHeaderLineReducesContentLines(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", "line 5", }) // Without post-header: 4 content lines visible (height 5 - 1 footer = 4) expectedNoPostHeader := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "line 4", "80% (4/5)", }) internal.CmpStr(t, expectedNoPostHeader, vp.View()) // With post-header: 3 content lines visible (height 5 - 1 post-header - 1 footer = 3) vp.SetPostHeaderLine("Post-header") expectedWithPostHeader := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header", "line 1", "line 2", "line 3", "60% (3/5)", }) internal.CmpStr(t, expectedWithPostHeader, vp.View()) } func TestPostHeaderLineWithWrap(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h, WithWrapText[object](true)) setContent(vp, []string{ "short", "longer text that wraps", }) // Post-header should appear before content vp.SetPostHeaderLine("Post-head") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-head", "short", "longer tex", "t that wra", "ps", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineScrolling(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", "line 5", "line 6", }) vp.SetPostHeaderLine("Post-header") // Initially shows first 3 content lines (height 5 - 1 post-header - 1 footer = 3) expectedInitial := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header", "line 1", "line 2", "line 3", "50% (3/6)", }) internal.CmpStr(t, expectedInitial, vp.View()) // Scroll down vp, _ = vp.Update(downKeyMsg) expectedAfterScroll := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header", "line 2", "line 3", "line 4", "66% (4/6)", }) internal.CmpStr(t, expectedAfterScroll, vp.View()) } func TestPostHeaderLineStyled(t *testing.T) { w, h := 30, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Set a styled post-header line styledPostHeader := internal.RedFg.Render("Red") + " and " + internal.BlueFg.Render("Blue") vp.SetPostHeaderLine(styledPostHeader) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ internal.RedFg.Render("Red") + " and " + internal.BlueFg.Render("Blue"), "line 1", "line 2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineDoesNotWrap(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h, WithWrapText[object](true)) setContent(vp, []string{ "short", "another", }) // Set a long post-header line - it should NOT wrap, only truncate vp.SetPostHeaderLine("This is a very long post-header that should not wrap") // Post-header should be truncated to single line, not wrapped expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "This is...", "short", "another", "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineWithPreFooterLine(t *testing.T) { w, h := 30, 6 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", "line 5", }) // Both post-header and pre-footer: height 6 - 1 post-header - 1 pre-footer - 1 footer = 3 content vp.SetPostHeaderLine("Post-header") vp.SetPreFooterLine("Pre-footer") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Post-header", "line 1", "line 2", "line 3", "Pre-footer", "60% (3/5)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineWithHeaderAndPreFooter(t *testing.T) { w, h := 30, 7 vp := newViewport(w, h) vp.SetHeader([]string{"Header"}) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", "line 5", }) // All extras: 1 header + 1 post-header + 2 content + 1 pre-footer + 1 footer = 7 // (height 7 - 1 header - 1 post-header - 1 pre-footer - 1 footer = 3 content) vp.SetPostHeaderLine("Post-header") vp.SetPreFooterLine("Pre-footer") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Header", "Post-header", "line 1", "line 2", "line 3", "Pre-footer", "60% (3/5)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineExactWidth(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Post-header exactly matches width - should not truncate vp.SetPostHeaderLine("1234567890") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "1234567890", "line 1", "line 2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPostHeaderLineOneCharOverWidth(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Post-header one char over width - should truncate vp.SetPostHeaderLine("12345678901") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "1234567...", "line 1", "line 2", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } ================================================ FILE: modules/viewport/viewport_prefooter_test.go ================================================ package viewport import ( "testing" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestPreFooterLineWithFooterEnabled(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", }) // Without pre-footer: 3 content lines + footer expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // With pre-footer: 2 content lines + pre-footer + footer vp.SetPreFooterLine("Pre-footer text") expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "Pre-footer text", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineWithFooterDisabled(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", }) vp.SetFooterEnabled(false) // Without pre-footer expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "", "", }) internal.CmpStr(t, expectedView, vp.View()) // With pre-footer (still renders even though footer disabled) vp.SetPreFooterLine("Pre-footer text") expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "Pre-footer text", "", }) internal.CmpStr(t, expectedView, vp.View()) } func TestEmptyPreFooterLine(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", }) // Empty pre-footer means no extra line rendered vp.SetPreFooterLine("") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "line 4", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) // All 4 content lines visible with footer (height 5 = 4 content + 1 footer) if vp.GetPreFooterLine() != "" { t.Errorf("expected empty pre-footer line, got %q", vp.GetPreFooterLine()) } } func TestPreFooterLineTruncation(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Set pre-footer longer than viewport width vp.SetPreFooterLine("This is a very long pre-footer line that exceeds the width") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "This is a ve...", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineSmallHeight(t *testing.T) { w, h := 20, 3 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", }) // Height 3 with footer and pre-footer: 1 content + 1 pre-footer + 1 footer vp.SetPreFooterLine("Pre-footer") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "Pre-footer", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineWithHeader(t *testing.T) { w, h := 30, 6 vp := newViewport(w, h) vp.SetHeader([]string{"Header"}) setContent(vp, []string{ "line 1", "line 2", "line 3", }) // Height 6 with header, pre-footer, footer: 1 header + 3 content + 1 pre-footer + 1 footer vp.SetPreFooterLine("Pre-footer") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "Header", "line 1", "line 2", "line 3", "Pre-footer", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineDynamicToggle(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", }) // Initially no pre-footer expectedNoPreFooter := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "line 4", "100% (4/4)", }) internal.CmpStr(t, expectedNoPreFooter, vp.View()) // Set pre-footer vp.SetPreFooterLine("Pre-footer") expectedWithPreFooter := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "Pre-footer", "75% (3/4)", }) internal.CmpStr(t, expectedWithPreFooter, vp.View()) // Remove pre-footer vp.SetPreFooterLine("") internal.CmpStr(t, expectedNoPreFooter, vp.View()) // Set pre-footer again vp.SetPreFooterLine("Different pre-footer") expectedDifferent := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "Different pre-footer", "75% (3/4)", }) internal.CmpStr(t, expectedDifferent, vp.View()) } func TestPreFooterLineGetterSetter(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) // Initially empty if got := vp.GetPreFooterLine(); got != "" { t.Errorf("expected empty pre-footer initially, got %q", got) } // Set and get vp.SetPreFooterLine("Test pre-footer") if got := vp.GetPreFooterLine(); got != "Test pre-footer" { t.Errorf("expected 'Test pre-footer', got %q", got) } // Clear and get vp.SetPreFooterLine("") if got := vp.GetPreFooterLine(); got != "" { t.Errorf("expected empty pre-footer after clearing, got %q", got) } } func TestPreFooterLineReducesContentLines(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", "line 5", }) // Without pre-footer: 4 content lines visible (height 5 - 1 footer = 4) expectedNoPreFooter := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "line 4", "80% (4/5)", }) internal.CmpStr(t, expectedNoPreFooter, vp.View()) // With pre-footer: 3 content lines visible (height 5 - 1 pre-footer - 1 footer = 3) vp.SetPreFooterLine("Pre-footer") expectedWithPreFooter := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "Pre-footer", "60% (3/5)", }) internal.CmpStr(t, expectedWithPreFooter, vp.View()) } func TestPreFooterLineWithWrap(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h, WithWrapText[object](true)) setContent(vp, []string{ "short", "longer text that wraps", }) // Pre-footer should appear just above footer, after wrapped content vp.SetPreFooterLine("Pre-foot") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "short", "longer tex", "t that wra", "ps", "Pre-foot", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineScrolling(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", "line 3", "line 4", "line 5", "line 6", }) vp.SetPreFooterLine("Pre-footer") // Initially shows first 3 content lines (height 5 - 1 pre-footer - 1 footer = 3) expectedInitial := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "line 3", "Pre-footer", "50% (3/6)", }) internal.CmpStr(t, expectedInitial, vp.View()) // Scroll down vp, _ = vp.Update(downKeyMsg) expectedAfterScroll := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 2", "line 3", "line 4", "Pre-footer", "66% (4/6)", }) internal.CmpStr(t, expectedAfterScroll, vp.View()) } func TestPreFooterLineStyled(t *testing.T) { w, h := 30, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Set a styled pre-footer line styledPreFooter := internal.RedFg.Render("Red") + " and " + internal.BlueFg.Render("Blue") vp.SetPreFooterLine(styledPreFooter) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", internal.RedFg.Render("Red") + " and " + internal.BlueFg.Render("Blue"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineStyledTruncation(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Set a styled pre-footer line that exceeds width styledPreFooter := internal.RedFg.Render("This is a very long styled text") vp.SetPreFooterLine(styledPreFooter) // Should truncate with continuation indicator, preserving style expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", internal.RedFg.Render("This is a ve..."), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineDoesNotWrap(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h, WithWrapText[object](true)) setContent(vp, []string{ "short", "another", }) // Set a long pre-footer line - it should NOT wrap, only truncate vp.SetPreFooterLine("This is a very long pre-footer that should not wrap") // Pre-footer should be truncated to single line, not wrapped expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "short", "another", "", "", "This is...", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineDoesNotWrapWithWrappedContent(t *testing.T) { w, h := 10, 7 vp := newViewport(w, h, WithWrapText[object](true)) setContent(vp, []string{ "short", "this line wraps to multiple lines", }) // Set a long pre-footer line - it should NOT wrap even when content wraps vp.SetPreFooterLine("Long pre-footer text here") // Content wraps, but pre-footer should be truncated to single line expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "short", "this line ", "wraps to m", "ultiple li", "nes", "Long pr...", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineStyledWithWrap(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h, WithWrapText[object](true)) setContent(vp, []string{ "short", "longer content here", }) // Styled pre-footer should be truncated, not wrapped styledPreFooter := internal.RedFg.Render("Styled") + " " + internal.BlueFg.Render("pre-footer line") vp.SetPreFooterLine(styledPreFooter) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "short", "longer content ", "here", "", internal.RedFg.Render("Styled") + " " + internal.BlueFg.Render("pre-f..."), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineExactWidth(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Pre-footer exactly matches width - should not truncate vp.SetPreFooterLine("1234567890") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "1234567890", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineOneCharOverWidth(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Pre-footer one char over width - should truncate vp.SetPreFooterLine("12345678901") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "1234567...", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineUnicode(t *testing.T) { w, h := 20, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Pre-footer with unicode (emojis are 2 cells wide) vp.SetPreFooterLine("Status: ✓ Done") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "Status: ✓ Done", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestPreFooterLineUnicodeTruncation(t *testing.T) { w, h := 12, 4 vp := newViewport(w, h) setContent(vp, []string{ "line 1", "line 2", }) // Pre-footer with unicode that needs truncation // Each 💖 is 2 cells wide, so with width 12 we can fit 4 emojis (8 cells) + ".." (2 cells) = 10 // or 5 emojis (10 cells) + ".." (2 cells) = 12 exactly vp.SetPreFooterLine("💖💖💖💖💖💖💖💖") expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "line 1", "line 2", "💖💖💖💖💖..", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } ================================================ FILE: modules/viewport/viewport_progressbar_test.go ================================================ package viewport import ( "testing" "github.com/antgroup/hugescm/modules/viewport/internal" ) func TestProgressBarDefaultDisabled(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{"line 1", "line 2", "line 3"}) expectedView := internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", "", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestProgressBarEnabled100Percent(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h, WithProgressBarEnabled[object](true)) setContent(vp, []string{"line 1", "line 2", "line 3"}) // "100% (3/3)" = 10 chars, barSpace=19, barWidth=min(10,19)=10, filled=10 expectedView := internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", "", "██████████ 100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestProgressBarEnabledPartialScrolling(t *testing.T) { w, h := 30, 8 vp := newViewport(w, h) vp.SetProgressBarEnabled(true) vp.SetSelectionEnabled(true) setContent(vp, []string{"line 1", "line 2", "line 3", "line 4"}) // "25% (1/4)" = 9 chars, barSpace=20, barWidth=10, filled=int(10*25/100)=2 expectedView := internal.Pad(w, h, []string{ selectionStyle.Render("line 1"), "line 2", "line 3", "line 4", "", "", "", "██░░░░░░░░ 25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetSelectedItemIdx(1) // "50% (2/4)" = 9 chars, barWidth=10, filled=int(10*50/100)=5 expectedView = internal.Pad(w, h, []string{ "line 1", selectionStyle.Render("line 2"), "line 3", "line 4", "", "", "", "█████░░░░░ 50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetSelectedItemIdx(2) // "75% (3/4)" = 9 chars, barWidth=10, filled=int(10*75/100)=7 expectedView = internal.Pad(w, h, []string{ "line 1", "line 2", selectionStyle.Render("line 3"), "line 4", "", "", "", "███████░░░ 75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetSelectedItemIdx(3) // "100% (4/4)" = 10 chars, barSpace=19, barWidth=10, filled=10 expectedView = internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", selectionStyle.Render("line 4"), "", "", "", "██████████ 100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestProgressBarTooNarrowOmitted(t *testing.T) { w, h := 13, 5 vp := newViewport(w, h, WithProgressBarEnabled[object](true)) setContent(vp, []string{"line 1", "line 2", "line 3"}) // "100% (3/3)" = 10 chars, barSpace = 13-10-1 = 2 < 3, no bar expectedView := internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", "", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestProgressBarMinimumWidth(t *testing.T) { w, h := 14, 5 vp := newViewport(w, h, WithProgressBarEnabled[object](true)) setContent(vp, []string{"line 1", "line 2", "line 3"}) // "100% (3/3)" = 10 chars, barSpace = 14-10-1 = 3, barWidth=min(10,3)=3, filled=3 expectedView := internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", "", "███ 100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestProgressBarToggle(t *testing.T) { w, h := 30, 5 vp := newViewport(w, h) setContent(vp, []string{"line 1", "line 2", "line 3"}) plainFooter := internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", "", "100% (3/3)", }) internal.CmpStr(t, plainFooter, vp.View()) vp.SetProgressBarEnabled(true) withBar := internal.Pad(w, h, []string{ "line 1", "line 2", "line 3", "", "██████████ 100% (3/3)", }) internal.CmpStr(t, withBar, vp.View()) vp.SetProgressBarEnabled(false) internal.CmpStr(t, plainFooter, vp.View()) } func TestBuildProgressBar(t *testing.T) { cases := []struct { pct, width int expected string }{ {100, 10, "██████████"}, {0, 10, "░░░░░░░░░░"}, {50, 10, "█████░░░░░"}, {75, 10, "███████░░░"}, {25, 10, "██░░░░░░░░"}, {33, 6, "█░░░░░"}, {100, 3, "███"}, {0, 3, "░░░"}, {100, 0, ""}, {50, 0, ""}, } for _, c := range cases { got := buildProgressBar(c.pct, c.width) if got != c.expected { t.Errorf("buildProgressBar(%d, %d) = %q, want %q", c.pct, c.width, got, c.expected) } } } ================================================ FILE: modules/viewport/viewport_saving_test.go ================================================ package viewport import ( "os" "path/filepath" "strings" "testing" "time" "charm.land/bubbles/v2/key" tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) type saveTestObject struct { item item.Item } func (o saveTestObject) GetItem() item.Item { return o.item } var ( enterKeyMsg = tea.KeyPressMsg{Code: tea.KeyEnter, Text: "enter"} escapeKeyMsg = tea.KeyPressMsg{Code: tea.KeyEscape, Text: "esc"} saveKey = key.NewBinding(key.WithKeys("ctrl+s")) saveKeyMsg = tea.KeyPressMsg{Code: 's', Mod: tea.ModCtrl} ) func newSaveTestViewport(t *testing.T) (*Model[saveTestObject], string) { t.Helper() tmpDir := t.TempDir() vp := New[saveTestObject](80, 24, WithFileSaving[saveTestObject](tmpDir, saveKey), ) return vp, tmpDir } func setSaveTestContent(vp *Model[saveTestObject], lines []string) { objects := make([]saveTestObject, len(lines)) for i, line := range lines { objects[i] = saveTestObject{item: item.NewItem(line)} } vp.SetObjects(objects) } func TestFileSaving_PressingSaveKeyEntersFilenameMode(t *testing.T) { vp, _ := newSaveTestViewport(t) setSaveTestContent(vp, []string{"line1", "line2"}) if vp.IsCapturingInput() { t.Error("expected IsCapturingInput to be false initially") } vp, cmd := vp.Update(saveKeyMsg) if !vp.IsCapturingInput() { t.Error("expected IsCapturingInput to be true after pressing save key") } if cmd == nil { t.Error("expected a command (textinput.Blink) to be returned") } // view should show save prompt view := vp.View() if !strings.Contains(view, "Save as:") { t.Error("expected view to contain 'Save as:' prompt") } } func TestFileSaving_EscapeCancelsFilenameEntry(t *testing.T) { vp, _ := newSaveTestViewport(t) setSaveTestContent(vp, []string{"line1", "line2"}) vp, _ = vp.Update(saveKeyMsg) if !vp.IsCapturingInput() { t.Fatal("expected to be in filename entry mode") } vp, _ = vp.Update(escapeKeyMsg) if vp.IsCapturingInput() { t.Error("expected IsCapturingInput to be false after escape") } // view should no longer show save prompt view := vp.View() if strings.Contains(view, "Save as:") { t.Error("expected view to not contain 'Save as:' after escape") } } func TestFileSaving_EnterWithEmptyInputUsesTimestampDefault(t *testing.T) { vp, tmpDir := newSaveTestViewport(t) setSaveTestContent(vp, []string{"content line 1", "content line 2"}) vp, _ = vp.Update(saveKeyMsg) if !vp.IsCapturingInput() { t.Fatal("expected to be in filename entry mode") } beforeSave := time.Now() vp, cmd := vp.Update(enterKeyMsg) afterSave := time.Now() if vp.IsCapturingInput() { t.Error("expected IsCapturingInput to be false after enter") } if cmd == nil { t.Fatal("expected saveToFile command to be returned") } // view should show "Saving..." view := vp.View() if !strings.Contains(view, "Saving...") { t.Error("expected view to show 'Saving...' status") } msg := cmd() savedMsg, ok := msg.(fileSavedMsg) if !ok { t.Fatalf("expected fileSavedMsg, got %T", msg) } if savedMsg.err != nil { t.Fatalf("unexpected save error: %v", savedMsg.err) } filename := filepath.Base(savedMsg.filename) if !strings.HasSuffix(filename, ".txt") { t.Errorf("expected .txt extension, got %s", filename) } // verify file exists and has correct content content, err := os.ReadFile(savedMsg.filename) if err != nil { t.Fatalf("failed to read saved file: %v", err) } expectedContent := "content line 1\ncontent line 2\n" if string(content) != expectedContent { t.Errorf("expected content %q, got %q", expectedContent, string(content)) } // verify file is in the correct directory if filepath.Dir(savedMsg.filename) != tmpDir { t.Errorf("expected file in %s, got %s", tmpDir, filepath.Dir(savedMsg.filename)) } // verify timestamp is reasonable (within test execution window) timestampPart := strings.TrimSuffix(filename, ".txt") fileTime, err := time.ParseInLocation("20060102-150405", timestampPart, time.Local) if err != nil { t.Errorf("filename %s doesn't match timestamp format: %v", filename, err) } else { if fileTime.Before(beforeSave.Add(-2*time.Second)) || fileTime.After(afterSave.Add(2*time.Second)) { t.Errorf("timestamp %v not within expected range [%v, %v]", fileTime, beforeSave, afterSave) } } } func TestFileSaving_EnterWithCustomFilename(t *testing.T) { vp, tmpDir := newSaveTestViewport(t) setSaveTestContent(vp, []string{"test content"}) vp, _ = vp.Update(saveKeyMsg) // type custom filename for _, r := range "myfile" { vp, _ = vp.Update(internal.MakeKeyMsg(r)) } _, cmd := vp.Update(enterKeyMsg) if cmd == nil { t.Fatal("expected saveToFile command") } msg := cmd() savedMsg, ok := msg.(fileSavedMsg) if !ok { t.Fatalf("expected fileSavedMsg, got %T", msg) } if savedMsg.err != nil { t.Fatalf("unexpected save error: %v", savedMsg.err) } expectedPath := filepath.Join(tmpDir, "myfile.txt") if savedMsg.filename != expectedPath { t.Errorf("expected filename %s, got %s", expectedPath, savedMsg.filename) } // verify file exists if _, err := os.Stat(expectedPath); os.IsNotExist(err) { t.Errorf("expected file %s to exist", expectedPath) } } func TestFileSaving_CustomFilenameWithExtension(t *testing.T) { vp, tmpDir := newSaveTestViewport(t) setSaveTestContent(vp, []string{"test"}) vp, _ = vp.Update(saveKeyMsg) // type filename with .txt extension already for _, r := range "already.txt" { vp, _ = vp.Update(internal.MakeKeyMsg(r)) } _, cmd := vp.Update(enterKeyMsg) msg := cmd() savedMsg := msg.(fileSavedMsg) // should not double the extension expectedPath := filepath.Join(tmpDir, "already.txt") if savedMsg.filename != expectedPath { t.Errorf("expected filename %s, got %s", expectedPath, savedMsg.filename) } } func TestFileSaving_ContentStripsAnsiCodes(t *testing.T) { vp, _ := newSaveTestViewport(t) // set content with ANSI styling redStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("#FF0000")) styledLine := redStyle.Render("styled text") objects := []saveTestObject{ {item: item.NewItem(styledLine)}, {item: item.NewItem("plain text")}, } vp.SetObjects(objects) vp, _ = vp.Update(saveKeyMsg) _, cmd := vp.Update(enterKeyMsg) msg := cmd() savedMsg := msg.(fileSavedMsg) content, err := os.ReadFile(savedMsg.filename) if err != nil { t.Fatalf("failed to read file: %v", err) } // content should not contain ANSI escape codes if strings.Contains(string(content), "\x1b[") { t.Error("saved content should not contain ANSI escape codes") } expectedContent := "styled text\nplain text\n" if string(content) != expectedContent { t.Errorf("expected %q, got %q", expectedContent, string(content)) } } func TestFileSaving_SuccessMessageShownAfterSave(t *testing.T) { tmpDir := t.TempDir() vp := New[saveTestObject](200, 24, // wide viewport to avoid truncation WithFileSaving[saveTestObject](tmpDir, saveKey), ) setSaveTestContent(vp, []string{"test"}) // go through complete save flow vp, _ = vp.Update(saveKeyMsg) vp, cmd := vp.Update(enterKeyMsg) // execute save command msg := cmd() savedMsg := msg.(fileSavedMsg) // send the result message back to viewport vp, _ = vp.Update(savedMsg) // view should show success message with path view := vp.View() if !strings.Contains(view, "Saved to") { t.Error("expected view to show 'Saved to' message") } if !strings.Contains(view, tmpDir) { t.Errorf("expected view to contain save directory %s", tmpDir) } } func TestFileSaving_ErrorMessageShownOnFailure(t *testing.T) { vp, _ := newSaveTestViewport(t) setSaveTestContent(vp, []string{"test"}) // go through save flow vp, _ = vp.Update(saveKeyMsg) vp, _ = vp.Update(enterKeyMsg) // simulate error response vp, _ = vp.Update(fileSavedMsg{err: os.ErrPermission}) // view should show error message view := vp.View() if !strings.Contains(view, "failed") && !strings.Contains(view, "Save failed") { t.Errorf("expected view to show error message, got: %s", view) } } func TestFileSaving_IgnoresSaveKeyWhenAlreadyCapturingInput(t *testing.T) { vp, _ := newSaveTestViewport(t) setSaveTestContent(vp, []string{"test"}) // enter filename mode vp, _ = vp.Update(saveKeyMsg) if !vp.IsCapturingInput() { t.Fatal("expected to be capturing input") } // type something vp, _ = vp.Update(internal.MakeKeyMsg('a')) // press save key again - should be ignored, typed text preserved vp, cmd := vp.Update(saveKeyMsg) // should still be capturing input if !vp.IsCapturingInput() { t.Error("should still be capturing input") } if cmd != nil { t.Error("expected no command when ignoring duplicate save key") } // verify we can still complete the save with the typed filename _, cmd = vp.Update(enterKeyMsg) if cmd == nil { t.Fatal("expected save command") } msg := cmd() savedMsg := msg.(fileSavedMsg) if !strings.Contains(savedMsg.filename, "a.txt") { t.Errorf("expected filename to contain 'a.txt', got %s", savedMsg.filename) } } func TestFileSaving_TextInputReceivesKeyMessages(t *testing.T) { vp, tmpDir := newSaveTestViewport(t) setSaveTestContent(vp, []string{"test"}) vp, _ = vp.Update(saveKeyMsg) // type some characters vp, _ = vp.Update(internal.MakeKeyMsg('a')) vp, _ = vp.Update(internal.MakeKeyMsg('b')) vp, _ = vp.Update(internal.MakeKeyMsg('c')) // verify by completing the save and checking filename _, cmd := vp.Update(enterKeyMsg) msg := cmd() savedMsg := msg.(fileSavedMsg) expectedPath := filepath.Join(tmpDir, "abc.txt") if savedMsg.filename != expectedPath { t.Errorf("expected filename %s, got %s", expectedPath, savedMsg.filename) } } func TestFileSaving_NoSaveDirConfigured(t *testing.T) { // viewport without file saving configured vp := New[saveTestObject](80, 24) setSaveTestContent(vp, []string{"test"}) vp, cmd := vp.Update(saveKeyMsg) if vp.IsCapturingInput() { t.Error("should not enter filename mode when saveDir not configured") } if cmd != nil { t.Error("expected no command when saveDir not configured") } } func TestFileSaving_IsCapturingInputReturnsFalse_Initially(t *testing.T) { vp, _ := newSaveTestViewport(t) if vp.IsCapturingInput() { t.Error("expected IsCapturingInput to return false initially") } } func TestFileSaving_IsCapturingInputReturnsFalse_AfterSaveComplete(t *testing.T) { vp, _ := newSaveTestViewport(t) setSaveTestContent(vp, []string{"test"}) // complete a save vp, _ = vp.Update(saveKeyMsg) vp, cmd := vp.Update(enterKeyMsg) msg := cmd() vp, _ = vp.Update(msg) // should not be capturing input while showing result if vp.IsCapturingInput() { t.Error("expected IsCapturingInput to return false when showing result") } } func TestFileSaving_NavigationKeysIgnoredDuringFilenameEntry(t *testing.T) { vp, tmpDir := newSaveTestViewport(t) vp.SetSelectionEnabled(true) setSaveTestContent(vp, []string{"line1", "line2", "line3", "line4", "line5"}) vp, _ = vp.Update(saveKeyMsg) // try navigation keys - these should be typed into filename, not navigate vp, _ = vp.Update(internal.MakeKeyMsg('j')) // down vp, _ = vp.Update(internal.MakeKeyMsg('k')) // up vp, _ = vp.Update(internal.MakeKeyMsg('g')) // top vp, _ = vp.Update(internal.MakeKeyMsg('G')) // bottom // filename should be jkgG.txt _, cmd := vp.Update(enterKeyMsg) msg := cmd() savedMsg := msg.(fileSavedMsg) expectedPath := filepath.Join(tmpDir, "jkgG.txt") if savedMsg.filename != expectedPath { t.Errorf("expected filename %s, got %s", expectedPath, savedMsg.filename) } } func TestFileSaving_CreatesDirIfNotExists(t *testing.T) { tmpDir := t.TempDir() nestedDir := filepath.Join(tmpDir, "nested", "save", "dir") vp := New[saveTestObject](80, 24, WithFileSaving[saveTestObject](nestedDir, saveKey), ) setSaveTestContent(vp, []string{"test content"}) vp, _ = vp.Update(saveKeyMsg) _, cmd := vp.Update(enterKeyMsg) msg := cmd() savedMsg := msg.(fileSavedMsg) if savedMsg.err != nil { t.Fatalf("save failed: %v", savedMsg.err) } // verify directory was created if _, err := os.Stat(nestedDir); os.IsNotExist(err) { t.Errorf("expected directory %s to be created", nestedDir) } // verify file exists if _, err := os.Stat(savedMsg.filename); os.IsNotExist(err) { t.Errorf("expected file %s to exist", savedMsg.filename) } } ================================================ FILE: modules/viewport/viewport_selection_no_wrap_test.go ================================================ package viewport import ( "strconv" "testing" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) func TestViewport_SelectionOn_WrapOff_Empty(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetSelectionEnabled(true) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{}) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeader([]string{"header"}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"header"}) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SmolDimensions(t *testing.T) { w, h := 0, 0 vp := newViewport(w, h) vp.SetSelectionEnabled(true) vp.SetHeader([]string{"header"}) setContent(vp, []string{"hi"}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(1) vp.SetHeight(1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"."}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(2) vp.SetHeight(2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"..", ""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(3) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "...", internal.BlueFg.Render("hi"), "...", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_Basic(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), internal.RedFg.Render("a") + " really rea...", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_GetConfigs(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", }) if selectionEnabled := vp.GetSelectionEnabled(); !selectionEnabled { t.Errorf("expected selection to be enabled, got %v", selectionEnabled) } if wrapText := vp.GetWrapText(); wrapText { t.Errorf("expected text wrapping to be disabled, got %v", wrapText) } if selectedItemIdx := vp.GetSelectedItemIdx(); selectedItemIdx != 0 { t.Errorf("expected selected item index to be 0, got %v", selectedItemIdx) } vp, _ = vp.Update(downKeyMsg) if selectedItemIdx := vp.GetSelectedItemIdx(); selectedItemIdx != 1 { t.Errorf("expected selected item index to be 1, got %v", selectedItemIdx) } if selectedItem := vp.GetSelectedItem(); selectedItem != nil && selectedItem.GetItem().Content() != "second" { t.Errorf("got unexpected selected item: %v", selectedItem) } } func TestViewport_SelectionOn_WrapOff_ShowFooter(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), internal.RedFg.Render("a") + " really rea...", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(7) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really rea..."), internal.RedFg.Render("a") + " really rea...", "", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_FooterStyle(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h, WithStyles[object](Styles{ FooterStyle: internal.RedFg, SelectedItemStyle: selectionStyle, })) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "1", "2", "3", "4", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("1"), "2", "3", internal.RedFg.Render("25% (1/4)"), }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_FooterDisabled(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line", "second line", "third line", "fourth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), "second line", "third line", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetFooterEnabled(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), "second line", "third line", "fourth line", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SpaceAround(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ " first line ", " first line ", " first line ", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render(" first li..."), " fi...", " ...", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_MultiHeader(t *testing.T) { w, h := 15, 2 vp := newViewport(w, h) vp.SetHeader([]string{"header1", "header2"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "line1", "line2", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", internal.BlueFg.Render("line1"), "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", internal.BlueFg.Render("line2"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", internal.BlueFg.Render("line2"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", internal.BlueFg.Render("line2"), "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_OverflowLine(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"long header overflows"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "123456789012345", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "long header ...", internal.BlueFg.Render("123456789012345"), "123456789012...", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_OverflowHeight(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "123456789012345", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("123456789012345"), "123456789012...", "123456789012...", "123456789012...", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_Scrolling(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) doSetContent := func() { setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "third", "fourth", "16% (1/6)", }) validate(expectedView) // scrolling up past top is no-op vp, _ = vp.Update(upKeyMsg) validate(expectedView) // scrolling down by one vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", internal.BlueFg.Render("second"), "third", "fourth", "33% (2/6)", }) validate(expectedView) // scrolling to bottom vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) validate(expectedView) // scrolling down past bottom when at bottom is no-op vp, _ = vp.Update(downKeyMsg) validate(expectedView) } func TestViewport_SelectionOn_WrapOff_EnsureItemInView(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll so last item in view vp.EnsureItemInView(5, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", "sixth", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll so second item in view vp.EnsureItemInView(1, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "third", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "third", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) // ensure idempotence vp.EnsureItemInView(1, 0, 0, 0, 0) internal.CmpStr(t, expectedView, vp.View()) // invalid values truncated vp.EnsureItemInView(1, -1, 1e9, 0, 0) internal.CmpStr(t, expectedView, vp.View()) // full width ok vp.EnsureItemInView(1, 0, len("second"), 0, 0) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_EnsureItemInViewVerticalPad(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) numItems := 100 nums := make([]string, 0, numItems) for i := range numItems { nums = append(nums, strconv.Itoa(i+1)) } setContent(vp, nums) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("1"), "2", "3", "4", "1% (1/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "5" with verticalPad=1 // should leave 1 line of context below vp.SetSelectedItemIdx(4) vp.EnsureItemInView(4, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "3", "4", selectionStyle.Render("5"), "6", "5% (5/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to "3" with verticalPad=1 // should leave 1 line of context above vp.SetSelectedItemIdx(2) vp.EnsureItemInView(2, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "2", selectionStyle.Render("3"), "4", "5", "3% (3/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "8" with verticalPad=2 // should leave 2 lines of context above vp.SetSelectedItemIdx(99) // reset to bottom vp.EnsureItemInView(99, 0, 0, 0, 0) vp.SetSelectedItemIdx(7) vp.EnsureItemInView(7, 0, 0, 2, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "6", "7", selectionStyle.Render("8"), "9", "8% (8/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "99", not enough content below for verticalPad=3 // pad below as much as possible vp.SetSelectedItemIdx(0) // reset to top vp.EnsureItemInView(0, 0, 0, 0, 0) vp.SetSelectedItemIdx(98) vp.EnsureItemInView(98, 0, 0, 3, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "97", "98", selectionStyle.Render("99"), "100", "99% (99...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "50", request more padding than is available given viewport height -> center item vp.SetSelectedItemIdx(0) // reset to top vp.EnsureItemInView(0, 0, 0, 0, 0) vp.SetSelectedItemIdx(49) vp.EnsureItemInView(49, 0, 0, 3, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "49", selectionStyle.Render("50"), "51", "52", "50% (50...", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_EnsureItemInViewHorizontalPad(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "some line that is really long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("some li..."), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: pan right to space after "line" with horizontalPad=2 // should leave 2 columns of padding to the right vp.SetSelectedItemIdx(0) // reset to top vp.EnsureItemInView(0, 0, 0, 0, 0) vp.EnsureItemInView(0, len("some line"), len("some line "), 0, 2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("...line..."), // 'so|me line_th|at is really long' "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: pan to the visible "me" of "some" with horizontalPad=1 // should leave 1 column of context to the left vp.EnsureItemInView(0, len("so"), len("some"), 0, 1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("... lin..."), // 's|o__ line t|hat is really long' "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: pan right to the " r" of "is really" with huge horizontalPad // should center the target portion horizontally vp.EnsureItemInView(0, len("some line that is"), len("some line that is r"), 0, 100) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("...s re..."), // 'some line tha|t is__eall|y long' "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SetXOffset(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", }) initialExpectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("the fir..."), "the sec...", "", "50% (1/2)", }) internal.CmpStr(t, initialExpectedView, vp.View()) vp.SetXOffset(-1) internal.CmpStr(t, initialExpectedView, vp.View()) vp.SetXOffset(0) internal.CmpStr(t, initialExpectedView, vp.View()) vp.SetXOffset(4) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("...st line"), "...ond ...", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(1000) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("...t line"), "...nd line", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_BulkScrolling(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("third"), "fourth", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // half page down vp, _ = vp.Update(halfPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("fourth"), "fifth", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", internal.BlueFg.Render("fourth"), "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) // half page up vp, _ = vp.Update(halfPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", internal.BlueFg.Render("third"), "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // half page up vp, _ = vp.Update(halfPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", internal.BlueFg.Render("second"), "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // go to bottom vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // go to top vp, _ = vp.Update(goToTopKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_Panning(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header long"}) vp.SetSelectionEnabled(true) doSetContent := func() { setContent(vp, []string{ "first line that is fairly long", "second line that is even much longer than the first", "third line that is fairly long", "fourth", "fifth line that is fairly long", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", internal.BlueFg.Render("first l..."), "second ...", "third l...", "fourth", "16% (1/6)", }) validate(expectedView) // pan right vp.SetXOffset(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", internal.BlueFg.Render("...ne t..."), "...ine ...", "...ne t...", ".", "16% (1/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ne t...", internal.BlueFg.Render("...ine ..."), "...ne t...", ".", "33% (2/6)", }) validate(expectedView) // pan all the way right vp.SetXOffset(41) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...", internal.BlueFg.Render("...e first"), "...", "...", "33% (2/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...", "...e first", internal.BlueFg.Render("..."), "...", "50% (3/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...", "...e first", "...", internal.BlueFg.Render("..."), "66% (4/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...e first", "...", "...", internal.BlueFg.Render("..."), "83% (5/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ly long", "...", "...ly long", internal.BlueFg.Render("..."), "100% (6/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ly long", "...", internal.BlueFg.Render("...ly long"), "...", "83% (5/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", "...ly long", internal.BlueFg.Render("..."), "...ly long", "...", "66% (4/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", internal.BlueFg.Render("...ly long"), "...", "...ly long", "...", "50% (3/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", internal.BlueFg.Render("...n mu..."), "...ly long", "...", "...ly long", "33% (2/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", internal.BlueFg.Render("...ly long"), "...n mu...", "...ly long", "...", "16% (1/6)", }) validate(expectedView) // set shorter Item setContent(vp, []string{ "the first one", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header ...", internal.BlueFg.Render("...rst one"), "", "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_MaintainSelection(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetSelectionComparator(objectsEqual) setContent(vp, []string{ "sixth", "seventh", "eighth", "ninth", "tenth", "eleventh", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("sixth"), "seventh", "eighth", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // selection down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "sixth", internal.BlueFg.Render("seventh"), "eighth", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item above setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth", "eleventh", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "sixth", internal.BlueFg.Render("seventh"), "eighth", "63% (7/11)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item below setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "sixth", internal.BlueFg.Render("seventh"), "eighth", "43% (7/16)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_StickyTop(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetTopSticky(true) setContent(vp, []string{ "first", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "second", "first", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "first", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // de-activate by moving selection down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", internal.BlueFg.Render("first"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "second", "first", "third", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", internal.BlueFg.Render("first"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_StickyBottom(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetBottomSticky(true) setContent(vp, []string{ "first", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "second", "first", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", internal.BlueFg.Render("first"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // de-activate by moving selection up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "first", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "second", "first", "third", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "first", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_StickyBottomOverflowHeight(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetBottomSticky(true) // test covers case where first set Item to empty, then overflow height setContent(vp, []string{}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) setContent(vp, []string{ "second", "first", "third", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", internal.BlueFg.Render("third"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_StickyTopBottom(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetTopSticky(true) vp.SetBottomSticky(true) setContent(vp, []string{ "first", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item, top sticky wins out arbitrarily when both set setContent(vp, []string{ "second", "first", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "first", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // selection to bottom vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", internal.BlueFg.Render("first"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "second", "first", "third", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", internal.BlueFg.Render("third"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // de-activate by moving selection up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "third", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "second", "first", "third", "fourth", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "third", "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_StickyTop(t *testing.T) { w, h := 15, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(false) vp.SetTopSticky(true) setContent(vp, []string{ "first", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add item setContent(vp, []string{ "second", "first", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "first", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to de-activate sticky vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "first", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add item - should not return to top setContent(vp, []string{ "third", "second", "first", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "second", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_StickyBottom(t *testing.T) { w, h := 15, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(false) vp.SetBottomSticky(true) setContent(vp, []string{ "first", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add item at bottom setContent(vp, []string{ "first", "second", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to de-activate sticky vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add item - should not jump to bottom setContent(vp, []string{ "first", "second", "third", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_StickyBottomOverflowHeight(t *testing.T) { w, h := 15, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(false) vp.SetBottomSticky(true) setContent(vp, []string{}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) // add more items than fit in viewport setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fifth", "100% (5/5)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOff_StickyTopBottom(t *testing.T) { w, h := 15, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(false) vp.SetTopSticky(true) vp.SetBottomSticky(true) setContent(vp, []string{ "first", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add item, top sticky wins when both set setContent(vp, []string{ "first", "second", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll to bottom vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add item - bottom sticky should activate setContent(vp, []string{ "first", "second", "third", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to middle vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // add item - neither sticky should activate (not at top or bottom) setContent(vp, []string{ "first", "second", "third", "fourth", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "second", "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_RemoveLogsWhenSelectionBottom(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) // add Item setContent(vp, []string{ "second", "first", "third", "fourth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) // selection to bottom vp.SetSelectedItemIdx(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("fourth"), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) // remove Item setContent(vp, []string{ "second", "first", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_ChangeHeight(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "third", "fourth", "fifth", "sixth", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to third line vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", internal.BlueFg.Render("third"), "fourth", "fifth", "sixth", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("third"), "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", internal.BlueFg.Render("third"), "fourth", "fifth", "sixth", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to last line vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_ChangeContent(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "third", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to bottom vp.SetSelectedItemIdx(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fourth", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // remove Item setContent(vp, []string{ "second", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("second"), "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // remove all Item setContent(vp, []string{}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) // add Item (maintain selection off) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "third", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_AnsiOnSelection(t *testing.T) { w, h := 20, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "line with " + internal.RedFg.Render("red") + " text", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("line with red text"), // selection style overrides text styling "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_AnsiOnSelection_NoOverride(t *testing.T) { w, h := 20, 5 vp := newViewport(w, h, WithSelectionStyleOverridesItemStyle[object](false)) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "line with " + internal.RedFg.Render("red") + " text", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("line with ") + internal.RedFg.Render("red") + selectionStyle.Render(" text"), // item style preserved "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SelectionEmpty(t *testing.T) { w, h := 20, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render(" "), "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_ExtraSlash(t *testing.T) { w, h := 25, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "|2024|" + internal.RedFg.Render("fl..lq") + "/" + internal.RedFg.Render("flask-3") + "|", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("|2024|fl..lq/flask-3|"), // selection style overrides text styling "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SetHighlights(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 9, }, Style: internal.GreenFg, }, }, { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 10, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the ") + internal.GreenFg.Render("first") + internal.BlueFg.Render(" line"), "the " + internal.RedFg.Render("second") + " line", "the third line", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SetHighlightsStyledContent(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ internal.RedFg.Render("the first line"), internal.GreenFg.Render("the second line"), internal.BlueFg.Render("the third line"), internal.RedFg.Render("the fourth line"), }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 9, }, Style: internal.GreenFg, }, }, { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 10, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("the ") + internal.GreenFg.Render("first") + selectionStyle.Render(" line"), internal.GreenFg.Render("the ") + internal.RedFg.Render("second") + internal.GreenFg.Render(" line"), internal.BlueFg.Render("the third line"), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SetHighlightsStyledContent_NoOverride(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h, WithSelectionStyleOverridesItemStyle[object](false)) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ internal.RedFg.Render("the first line"), internal.GreenFg.Render("the second line"), internal.BlueFg.Render("the third line"), internal.RedFg.Render("the fourth line"), }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 9, }, Style: internal.GreenFg, }, }, { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 4, End: 10, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.RedFg.Render("the ") + internal.GreenFg.Render("first") + internal.RedFg.Render(" line"), // item style preserved, highlight applied internal.GreenFg.Render("the ") + internal.RedFg.Render("second") + internal.GreenFg.Render(" line"), internal.BlueFg.Render("the third line"), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SetHighlightsAnsiUnicode(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"A💖中é"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "A💖中é line", "another line", }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 1, End: 8, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "A💖中é", internal.BlueFg.Render("A") + internal.RedFg.Render("💖中") + internal.BlueFg.Render("é line"), "another line", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_Basic(t *testing.T) { w, h := 20, 6 prefix := "> " vp := newViewport(w, h, WithStyles[object](Styles{ SelectionPrefix: prefix, FooterStyle: lipgloss.NewStyle(), SelectedItemStyle: selectionStyle, })) vp.SetSelectionEnabled(true) setContent(vp, []string{"first", "second", "third"}) // selection on first item: prefix on first, padding on others expectedView := internal.Pad(w, h, []string{ prefix + selectionStyle.Render("first"), " " + "second", " " + "third", "", "", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(w, h, []string{ " " + "first", prefix + selectionStyle.Render("second"), " " + "third", "", "", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // verify content width is reduced (long line truncated at contentWidth = 18) setContent(vp, []string{"short", "this is a longer line that should truncate"}) vp.SetSelectedItemIdx(0) expectedView = internal.Pad(w, h, []string{ prefix + selectionStyle.Render("short"), " " + "this is a longe...", "", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_NoColor(t *testing.T) { // simulates NO_COLOR: all styles are empty, only the prefix shows selection w, h := 20, 5 prefix := "> " emptyStyle := lipgloss.NewStyle() vp := newViewport(w, h, WithStyles[object](Styles{ SelectionPrefix: prefix, FooterStyle: emptyStyle, SelectedItemStyle: emptyStyle, })) vp.SetSelectionEnabled(true) setContent(vp, []string{"alpha", "beta"}) expectedView := internal.Pad(w, h, []string{ // selected line has prefix but no style (emptyStyle is a no-op) prefix + "alpha", " " + "beta", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_SetSameDimensionsPreservesScrollPosition(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", }) // move selection to fifth item, causing a scroll vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView := internal.Pad(w, h, []string{ "header", "third", "fourth", internal.BlueFg.Render("fifth"), "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) // setting the same width and height should not change the scroll position vp.SetWidth(w) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(h) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOff_ChangeHeightPreservesSelectionPosition(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", }) // move selection to fifth item vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView := internal.Pad(w, h, []string{ "header", "second", "third", "fourth", internal.BlueFg.Render("fifth"), "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height - selection should remain visible and not jump to the top vp.SetHeight(10) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", "third", "fourth", internal.BlueFg.Render("fifth"), "sixth", "seventh", "eighth", "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height - selection should still be visible vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fourth", internal.BlueFg.Render("fifth"), "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_EmptyPrefix(t *testing.T) { // when SelectionPrefix is empty, no prefix or padding is added w, h := 20, 5 vp := newViewport(w, h) // default test helper has empty prefix vp.SetSelectionEnabled(true) setContent(vp, []string{"first", "second"}) expectedView := internal.Pad(w, h, []string{ selectionStyle.Render("first"), "second", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } ================================================ FILE: modules/viewport/viewport_selection_wrap_test.go ================================================ package viewport import ( "strconv" "strings" "testing" "time" "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) func TestViewport_SelectionOn_WrapOn_Empty(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetWrapText(true) vp.SetSelectionEnabled(true) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{}) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeader([]string{"header"}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"header"}) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SmolDimensions(t *testing.T) { w, h := 0, 0 vp := newViewport(w, h) vp.SetWrapText(true) vp.SetSelectionEnabled(true) vp.SetHeader([]string{"header"}) setContent(vp, []string{"hi"}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(1) vp.SetHeight(1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"h"}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(2) vp.SetHeight(2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"he", "ad"}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(3) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{"hea", "der", ""}) internal.CmpStr(t, expectedView, vp.View()) vp.SetWidth(4) vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "head", "er", internal.BlueFg.Render("hi"), "1...", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_Basic(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_GetConfigs(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first", "second", }) if selectionEnabled := vp.GetSelectionEnabled(); !selectionEnabled { t.Errorf("expected selection to be enabled, got %v", selectionEnabled) } if wrapText := vp.GetWrapText(); !wrapText { t.Errorf("expected text wrapping to be enabled, got %v", wrapText) } if selectedItemIdx := vp.GetSelectedItemIdx(); selectedItemIdx != 0 { t.Errorf("expected selected item index to be 0, got %v", selectedItemIdx) } vp, _ = vp.Update(downKeyMsg) if selectedItemIdx := vp.GetSelectedItemIdx(); selectedItemIdx != 1 { t.Errorf("expected selected item index to be 1, got %v", selectedItemIdx) } if selectedItem := vp.GetSelectedItem(); selectedItem != nil && selectedItem.GetItem().Content() != "second" { t.Errorf("got unexpected selected item: %v", selectedItem) } } func TestViewport_SelectionOn_WrapOn_ShowFooter(t *testing.T) { w, h := 15, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line", internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really long line"), internal.RedFg.Render("a") + " really really long line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), internal.RedFg.Render("a") + " really really", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), internal.RedFg.Render("a") + " really really", " long line", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(9) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), internal.RedFg.Render("second") + " line", internal.RedFg.Render("a really really"), internal.RedFg.Render(" long line"), internal.RedFg.Render("a") + " really really", " long line", "", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_FooterStyle(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h, WithStyles[object](Styles{ FooterStyle: internal.RedFg, SelectedItemStyle: selectionStyle, })) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "1", "2", "3", "4", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("1"), "2", "3", internal.RedFg.Render("25% (1/4)"), }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_FooterDisabled(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "first line", "second line", "third line", "fourth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), "second line", "third line", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetFooterEnabled(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line"), "second line", "third line", "fourth line", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SpaceAround(t *testing.T) { w, h := 15, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ " first line ", " first line ", " first line ", }) // trailing space is not trimmed expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render(" first line "), internal.BlueFg.Render(" "), " first", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_MultiHeader(t *testing.T) { w, h := 15, 2 vp := newViewport(w, h) vp.SetHeader([]string{"header1", "header2"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "line1", "line2", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", internal.BlueFg.Render("line1"), "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", internal.BlueFg.Render("line2"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", internal.BlueFg.Render("line2"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header1", "header2", "line1", internal.BlueFg.Render("line2"), "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_OverflowLine(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"long header overflows"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "123456789012345", "1234567890123456", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "long header ove", "rflows", internal.BlueFg.Render("123456789012345"), "123456789012345", "6", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_OverflowHeight(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "123456789012345", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", "1234567890123456", }) vp.SetSelectedItemIdx(1) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "123456789012345", internal.BlueFg.Render("123456789012345"), internal.BlueFg.Render("6"), "123456789012345", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_Scrolling(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) doSetContent := func() { setContent(vp, []string{ "first", "second", "third", "fourth", "fifth", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first"), "second", "third", "fourth", "16% (1/6)", }) validate(expectedView) // scrolling up past top is no-op vp, _ = vp.Update(upKeyMsg) validate(expectedView) // scrolling down by one vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", internal.BlueFg.Render("second"), "third", "fourth", "33% (2/6)", }) validate(expectedView) // scrolling down by one again vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first", "second", internal.BlueFg.Render("third"), "fourth", "50% (3/6)", }) validate(expectedView) // scroll to bottom vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third", "fourth", "fifth", internal.BlueFg.Render("sixth"), "100% (6/6)", }) validate(expectedView) // scrolling down past bottom when at bottom is no-op vp, _ = vp.Update(downKeyMsg) validate(expectedView) } func TestViewport_SelectionOn_WrapOn_EnsureItemInView(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line that is super long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "the second", " line", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(2, 0, 9, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("line"), "the second", " line", "the third", "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the fourth"), internal.BlueFg.Render(" line that"), internal.BlueFg.Render(" is super "), internal.BlueFg.Render("long"), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(1, len("the second"), len("the second line"), 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " line", "the third ", "line", internal.BlueFg.Render("the fourth"), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(0, 0, 0, 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first ", "line", "the second", " line", "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) vp.EnsureItemInView(3, 0, len("the fourth line that is super "), 0, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", internal.BlueFg.Render("the fourth"), internal.BlueFg.Render(" line that"), internal.BlueFg.Render(" is super "), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_EnsureItemInViewVerticalPad(t *testing.T) { w, h := 10, 10 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) numItems := 100 nums := make([]string, 0, numItems) for i := range numItems { nums = append(nums, strconv.Itoa(i+1)) } setContent(vp, nums) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("1"), "2", "3", "4", "5", "6", "7", "8", "1% (1/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "10" with verticalPad=1 // should leave 1 line of context below vp.SetSelectedItemIdx(9) vp.EnsureItemInView(9, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "4", "5", "6", "7", "8", "9", selectionStyle.Render("10"), "11", "10% (10...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to "5" with verticalPad=1 // should leave 1 line of context above vp.SetSelectedItemIdx(4) vp.EnsureItemInView(4, 0, 0, 1, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "4", selectionStyle.Render("5"), "6", "7", "8", "9", "10", "11", "5% (5/100)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "15" with verticalPad=2 // should leave 2 lines of context above vp.SetSelectedItemIdx(99) // reset to bottom vp.EnsureItemInView(99, 0, 0, 0, 0) vp.SetSelectedItemIdx(14) vp.EnsureItemInView(14, 0, 0, 2, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "13", "14", selectionStyle.Render("15"), "16", "17", "18", "19", "20", "15% (15...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "99", not enough content below for verticalPad=3 // pad below as much as possible vp.SetSelectedItemIdx(0) // reset to top vp.EnsureItemInView(0, 0, 0, 0, 0) vp.SetSelectedItemIdx(98) vp.EnsureItemInView(98, 0, 0, 3, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "93", "94", "95", "96", "97", "98", selectionStyle.Render("99"), "100", "99% (99...", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to "50", request more padding than is available given viewport height -> center item vp.SetSelectedItemIdx(0) // reset to top vp.EnsureItemInView(0, 0, 0, 0, 0) vp.SetSelectedItemIdx(49) vp.EnsureItemInView(49, 0, 0, 5, 0) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "47", "48", "49", selectionStyle.Render("50"), "51", "52", "53", "54", "50% (50...", }) internal.CmpStr(t, expectedView, vp.View()) } // TestViewport_SelectionOn_WrapOn_EnsureItemInViewNoOscillation verifies that repeated calls // to EnsureItemInView produce stable positioning. Before the fix, when padding couldn't be // satisfied on both sides, the view would oscillate on each call because scrollingDown // would change based on the current position. This simulates what happens during cursor // blinks in the filterable viewport, where SetObjects and EnsureItemInView are called // repeatedly on the same visible item. // // The oscillation occurs specifically when navigating FROM BELOW to an item: // 1. First call: scrollingDown=false (coming from below), positions with padding above // 2. After positioning, top is now ABOVE target, so scrollingDown becomes true // 3. Second call: scrollingDown=true, positions with padding below (different position!) // 4. This creates oscillation between the two positions func TestViewport_SelectionOn_WrapOn_EnsureItemInViewNoOscillation(t *testing.T) { w, h := 10, 10 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // create 100 items numItems := 100 nums := make([]string, 0, numItems) for i := range numItems { nums = append(nums, strconv.Itoa(i+1)) } setContent(vp, nums) // first go to the bottom, then navigate UP to item 50 // this is the scenario that triggers oscillation: coming from below vp.SetSelectedItemIdx(99) // go to bottom (item 100) vp.EnsureItemInView(99, 0, 0, 0, 0) // now navigate up to item 50 with padding=5 (can't fit on both sides) vp.SetSelectedItemIdx(49) vp.EnsureItemInView(49, 0, 0, 5, 0) viewAfterFirstCall := vp.View() // item 50 should be approximately centered // when coming from below, scroll-up centering positions with padding above expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "46", "47", "48", "49", selectionStyle.Render("50"), "51", "52", "53", "50% (50...", }) internal.CmpStr(t, expectedView, viewAfterFirstCall) // simulate cursor blink: call EnsureItemInView again without any navigation // before the fix, this would cause oscillation because: // - after first call, top is at item 47 (above target item 50) // - targetBelowTop(49, 0) now returns true (scrollingDown=true) // - this triggers different positioning logic, causing the view to shift for i := range 5 { vp.EnsureItemInView(49, 0, 0, 5, 0) viewAfterRepeat := vp.View() // view should remain stable - no oscillation if viewAfterRepeat != viewAfterFirstCall { t.Fatalf("View oscillated on iteration %d.\nExpected:\n%s\n\nGot:\n%s", i+1, viewAfterFirstCall, viewAfterRepeat) } } } func TestViewport_SelectionOn_WrapOn_EnsureItemInViewHorizontalPad(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "some line that is really long", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("some line "), selectionStyle.Render("that is re"), selectionStyle.Render("ally long"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: ensure "line " is visible with horizontalPad=2 // in wrap mode, horizontal padding ensures character ranges are visible vp.SetSelectedItemIdx(0) // reset vp.EnsureItemInView(0, 0, 0, 0, 0) vp.EnsureItemInView(0, len("some line"), len("some line "), 0, 2) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("some line "), selectionStyle.Render("that is re"), selectionStyle.Render("ally long"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: ensure "really" is visible with horizontalPad=1 vp.EnsureItemInView(0, len("some line that is "), len("some line that is really"), 0, 1) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("some line "), selectionStyle.Render("that is re"), selectionStyle.Render("ally long"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // horizontalPad: ensure end of string is visible with large horizontalPad vp.EnsureItemInView(0, len("some line that is really lon"), len("some line that is really long"), 0, 100) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("some line "), selectionStyle.Render("that is re"), selectionStyle.Render("ally long"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SetXOffset(t *testing.T) { w, h := 10, 8 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "the second", " line", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(-1) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(0) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(4) internal.CmpStr(t, expectedView, vp.View()) vp.SetXOffset(1000) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_BulkScrolling(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // full page down vp, _ = vp.Update(fullPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the second"), internal.BlueFg.Render(" line"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // half page down vp, _ = vp.Update(halfPgDownKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // half page down vp, _ = vp.Update(halfPgDownKeyMsg) internal.CmpStr(t, expectedView, vp.View()) // full page up vp, _ = vp.Update(fullPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the second"), internal.BlueFg.Render(" line"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // half page up vp, _ = vp.Update(halfPgUpKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // half page up vp, _ = vp.Update(halfPgUpKeyMsg) internal.CmpStr(t, expectedView, vp.View()) // go to bottom vp, _ = vp.Update(goToBottomKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // go to top vp, _ = vp.Update(goToTopKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_Panning(t *testing.T) { w, h := 10, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header long"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) doSetContent := func() { setContent(vp, []string{ "first line that is fairly long", "second line that is even much longer than the first", "third line that is fairly long as well", "fourth kinda long", "fifth kinda long too", "sixth", }) } validate := func(expectedView string) { // set Item multiple times to confirm no side effects of doing it internal.CmpStr(t, expectedView, vp.View()) doSetContent() internal.CmpStr(t, expectedView, vp.View()) } doSetContent() expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("first line"), internal.BlueFg.Render(" that is f"), internal.BlueFg.Render("airly long"), "second lin", "16% (1/6)", }) validate(expectedView) // pan right vp.SetXOffset(5) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("second lin"), internal.BlueFg.Render("e that is "), internal.BlueFg.Render("even much "), internal.BlueFg.Render("longer tha"), "33% (2/6)", }) validate(expectedView) // pan all the way right vp.SetXOffset(41) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("third line"), internal.BlueFg.Render(" that is f"), internal.BlueFg.Render("airly long"), internal.BlueFg.Render(" as well"), "50% (3/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "airly long", " as well", internal.BlueFg.Render("fourth kin"), internal.BlueFg.Render("da long"), "66% (4/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "fourth kin", "da long", internal.BlueFg.Render("fifth kind"), internal.BlueFg.Render("a long too"), "83% (5/6)", }) validate(expectedView) // scroll down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "da long", "fifth kind", "a long too", internal.BlueFg.Render("sixth"), "100% (6/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", "da long", internal.BlueFg.Render("fifth kind"), internal.BlueFg.Render("a long too"), "sixth", "83% (5/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("fourth kin"), internal.BlueFg.Render("da long"), "fifth kind", "a long too", "66% (4/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("third line"), internal.BlueFg.Render(" that is f"), internal.BlueFg.Render("airly long"), internal.BlueFg.Render(" as well"), "50% (3/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("second lin"), internal.BlueFg.Render("e that is "), internal.BlueFg.Render("even much "), internal.BlueFg.Render("longer tha"), "33% (2/6)", }) validate(expectedView) // scroll up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header lon", "g", internal.BlueFg.Render("first line"), internal.BlueFg.Render(" that is f"), internal.BlueFg.Render("airly long"), "second lin", "16% (1/6)", }) validate(expectedView) } func TestViewport_SelectionOn_WrapOn_MaintainSelection(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) vp.SetSelectionComparator(objectsEqual) setContent(vp, []string{ "sixth item", "seventh item", "eighth item", "ninth item", "tenth item", "eleventh item", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("sixth item"), "seventh it", "em", "eighth ite", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // selection down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "sixth item", internal.BlueFg.Render("seventh it"), internal.BlueFg.Render("em"), "eighth ite", "33% (2/6)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item above setContent(vp, []string{ "first item", "second item", "third item", "fourth item", "fifth item", "sixth item", "seventh item", "eighth item", "ninth item", "tenth item", "eleventh item", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "sixth item", internal.BlueFg.Render("seventh it"), internal.BlueFg.Render("em"), "eighth ite", "63% (7/11)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item below setContent(vp, []string{ "first item", "second item", "third item", "fourth item", "fifth item", "sixth item", "seventh item", "eighth item", "ninth item", "tenth item", "eleventh item", "twelfth item", "thirteenth item", "fourteenth item", "fifteenth item", "sixteenth item", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "sixth item", internal.BlueFg.Render("seventh it"), internal.BlueFg.Render("em"), "eighth ite", "43% (7/16)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_StickyTop(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetTopSticky(true) setContent(vp, []string{ "the first line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the second line", "the first line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the second"), internal.BlueFg.Render(" line"), "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // de-activate by moving selection down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the second line", "the first line", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_StickyBottom(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetBottomSticky(true) setContent(vp, []string{ "the first line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the second line", "the first line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", " line", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add longer Item at bottom setContent(vp, []string{ "the second line", "the first line", "a very long line that wraps a lot", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("a very lon"), internal.BlueFg.Render("g line tha"), internal.BlueFg.Render("t wraps a "), internal.BlueFg.Render("lot"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // de-activate by moving selection up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "a very lon", "g line tha", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the second line", "the first line", "a very long line that wraps a lot", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "a very lon", "g line tha", "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_StickyBottomOverflowHeight(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetBottomSticky(true) // test covers case where first set Item to empty, then overflow height setContent(vp, []string{}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) setContent(vp, []string{ "the second line", "the first line", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_StickyTopBottom(t *testing.T) { w, h := 10, 4 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetTopSticky(true) vp.SetBottomSticky(true) setContent(vp, []string{ "the first line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item, top sticky wins out arbitrarily when both set setContent(vp, []string{ "the second line", "the first line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the second"), internal.BlueFg.Render(" line"), "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // selection to bottom vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the second line", "the first line", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // de-activate by moving selection up vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the second line", "the first line", "the third line", "the fourth line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "50% (2/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_StickyBottomLongLine(t *testing.T) { w, h := 10, 10 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // stickyness should override maintain selection vp.SetSelectionComparator(objectsEqual) vp.SetBottomSticky(true) setContent(vp, []string{ "first line", "next line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", internal.BlueFg.Render("next line"), "", "", "", "", "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) setContent(vp, []string{ "first line", "next line", "a very long line at the bottom that wraps many times", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", "next line", internal.BlueFg.Render("a very lon"), internal.BlueFg.Render("g line at "), internal.BlueFg.Render("the bottom"), internal.BlueFg.Render(" that wrap"), internal.BlueFg.Render("s many tim"), internal.BlueFg.Render("es"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_StickyTop(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(false) vp.SetTopSticky(true) setContent(vp, []string{ "the first line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first ", "99% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add item setContent(vp, []string{ "the second line", "the first line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll down to de-activate sticky vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " line", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add item - should not return to top setContent(vp, []string{ "the third line", "the second line", "the first line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_StickyBottom(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(false) vp.SetBottomSticky(true) setContent(vp, []string{ "the first line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the first ", "line", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) // add item setContent(vp, []string{ "the first line", "the second line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "the second", " line", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // add longer item at bottom setContent(vp, []string{ "the first line", "the second line", "a very long line that wraps a lot", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "g line tha", "t wraps a ", "lot", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // scroll up to de-activate sticky vp, _ = vp.Update(upKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "a very lon", "g line tha", "t wraps a ", "99% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) // add item - should not jump to bottom setContent(vp, []string{ "the first line", "the second line", "a very long line that wraps a lot", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "a very lon", "g line tha", "t wraps a ", "75% (3/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_StickyBottomOverflowHeight(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(false) vp.SetBottomSticky(true) // test covers case where first set item to empty, then overflow height setContent(vp, []string{}) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) setContent(vp, []string{ "the second line", "the first line", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOff_WrapOn_StickyBottomLongLine(t *testing.T) { w, h := 10, 9 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(false) vp.SetBottomSticky(true) setContent(vp, []string{ "first line", "next line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line", "next line", "", "", "", "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) setContent(vp, []string{ "first line", "next line", "a very long line at the bottom that wraps many times", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "next line", "a very lon", "g line at ", "the bottom", " that wrap", "s many tim", "es", "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_RemoveLogsWhenSelectionBottom(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the second line", "the first line", "the third line", "the fourth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the second"), "25% (1/4)", }) internal.CmpStr(t, expectedView, vp.View()) // selection to bottom vp.SetSelectedItemIdx(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the fourth"), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) // remove bottom items setContent(vp, []string{ "the second line", "the first line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_ChangeHeight(t *testing.T) { w, h := 10, 3 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", "the fifth line", "the sixth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(6) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "the second", " line", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to third line vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the second", " line", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the third "), "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height vp.SetHeight(8) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "the fourth", " line", "the fifth ", "line", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to last line vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the fourth", " line", "the fifth ", "line", internal.BlueFg.Render("the sixth "), internal.BlueFg.Render("line"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height vp.SetHeight(3) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the sixth "), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_ChangeContent(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", "the fifth line", "the sixth line", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "the second", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to bottom vp.SetSelectedItemIdx(5) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "line", internal.BlueFg.Render("the sixth "), internal.BlueFg.Render("line"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // remove Item setContent(vp, []string{ "the second line", "the third line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", " line", internal.BlueFg.Render("the third "), internal.BlueFg.Render("line"), "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) // remove all Item setContent(vp, []string{}) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", }) internal.CmpStr(t, expectedView, vp.View()) // add Item setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", "the fifth line", "the sixth line", }) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the first "), internal.BlueFg.Render("line"), "the second", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_AnsiOnSelection(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "line with some " + internal.RedFg.Render("red") + " text", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("line with "), selectionStyle.Render("some red t"), internal.BlueFg.Render("ext"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_AnsiOnSelection_NoOverride(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h, WithSelectionStyleOverridesItemStyle[object](false)) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "line with some " + internal.RedFg.Render("red") + " text", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("line with "), selectionStyle.Render("some ") + internal.RedFg.Render("red") + selectionStyle.Render(" t"), // item style preserved internal.BlueFg.Render("ext"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SelectionEmpty(t *testing.T) { w, h := 20, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render(" "), "", "", "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_ExtraSlash(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "|2024|" + internal.RedFg.Render("fl..lq") + "/" + internal.RedFg.Render("flask-3") + "|", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", selectionStyle.Render("|2024|fl.."), selectionStyle.Render("lq/flask-3"), selectionStyle.Render("|"), "100% (1/1)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SuperLongWrappedLine(t *testing.T) { runTest := func(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "smol", strings.Repeat("12345678", 1000000), "smol", }) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("smol"), "1234567812", "3456781234", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("1234567812"), internal.BlueFg.Render("3456781234"), internal.BlueFg.Render("5678123456"), "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "5678123456", "7812345678", internal.BlueFg.Render("smol"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } internal.RunWithTimeout(t, runTest, 500*time.Millisecond) } func TestViewport_SelectionOn_WrapOn_SetHighlights(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "first line that wraps", "second", "third", }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 0, End: 5, }, Style: internal.GreenFg, }, }, { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 11, End: 15, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.GreenFg.Render("first") + internal.BlueFg.Render(" line"), internal.BlueFg.Render(" ") + internal.RedFg.Render("that") + internal.BlueFg.Render(" wrap"), internal.BlueFg.Render("s"), "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SetHighlightsStyledContent(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ internal.BlueFg.Render("first line that wraps"), internal.GreenFg.Render("second"), internal.RedFg.Render("third"), }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 0, End: 5, }, Style: internal.GreenFg, }, }, { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 11, End: 15, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.GreenFg.Render("first") + internal.BlueFg.Render(" line"), internal.BlueFg.Render(" ") + internal.RedFg.Render("that") + internal.BlueFg.Render(" wrap"), internal.BlueFg.Render("s"), "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SetHighlightsAnsiUnicode(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"A💖中é"}) vp.SetSelectionEnabled(true) vp.SetWrapText(true) setContent(vp, []string{ "A💖中é text that wraps", "another line", }) highlights := []Highlight{ { ItemIndex: 0, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 1, End: 8, }, Style: internal.RedFg, }, }, } vp.SetHighlights(highlights) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "A💖中é", internal.BlueFg.Render("A") + internal.RedFg.Render("💖中") + internal.BlueFg.Render("é tex"), internal.BlueFg.Render("t that wra"), internal.BlueFg.Render("ps"), "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } // # OTHER func TestViewport_StyleOverlay(t *testing.T) { w, h := 20, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "plain text", internal.RedFg.Render("red text"), "more plain", }) // add highlight to the second item which already has red styling highlights := []Highlight{ { ItemIndex: 1, ItemHighlight: item.Highlight{ ByteRangeUnstyledContent: item.ByteRange{ Start: 0, End: 3, }, Style: internal.GreenFg, }, }, } vp.SetHighlights(highlights) // first item is selected, highlight should show on second item expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("plain text"), internal.GreenFg.Render("red") + internal.RedFg.Render(" text"), "more plain", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // selection style on second item overrides the red content styling; highlight keeps its style vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "plain text", internal.GreenFg.Render("red") + selectionStyle.Render(" text"), "more plain", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to third item, highlight should show again on second item vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "plain text", internal.GreenFg.Render("red") + internal.RedFg.Render(" text"), internal.BlueFg.Render("more plain"), "100% (3/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_ToggleWrap_PreserveSelection(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "first line that is fairly long", "second line that is even much longer than the first", "third line that is fairly long", "fourth", "fifth line that is fairly long", "sixth", }) // wrap off, selection on first line expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("first line t..."), "second line ...", "third line t...", "fourth", "16% (1/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to third line vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line t...", "second line ...", internal.BlueFg.Render("third line t..."), "fourth", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap on vp.SetWrapText(true) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "longer than the", " first", internal.BlueFg.Render("third line that"), internal.BlueFg.Render(" is fairly long"), "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap off vp.SetWrapText(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "first line t...", "second line ...", internal.BlueFg.Render("third line t..."), "fourth", "50% (3/6)", }) internal.CmpStr(t, expectedView, vp.View()) // move selection to last line vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third line t...", "fourth", "fifth line t...", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap on vp.SetWrapText(true) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "fourth", "fifth line that", " is fairly long", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap off vp.SetWrapText(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "third line t...", "fourth", "fifth line t...", internal.BlueFg.Render("sixth"), "100% (6/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_ToggleWrap_PreserveSelectionInView(t *testing.T) { w, h := 15, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetSelectionEnabled(true) setContent(vp, []string{ "a really really really really really really really really really really really really long preamble", "first line that is fairly long", "second line that is even much longer than the first", "third line that is fairly long", }) vp.SetSelectedItemIdx(3) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "a really rea...", "first line t...", "second line ...", internal.BlueFg.Render("third line t..."), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap, full wrapped selection should remain in view vp.SetWrapText(true) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "longer than the", " first", internal.BlueFg.Render("third line that"), internal.BlueFg.Render(" is fairly long"), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap vp.SetWrapText(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "a really rea...", "first line t...", "second line ...", internal.BlueFg.Render("third line t..."), "100% (4/4)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_ToggleWrap_ScrollInBounds(t *testing.T) { w, h := 10, 7 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", "the fifth line", "the sixth line", }) // scroll to bottom with selection at top of that view vp.SetSelectedItemIdx(5) vp, _ = vp.Update(upKeyMsg) vp, _ = vp.Update(upKeyMsg) expectedView := internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the fourth"), internal.BlueFg.Render(" line"), "the fifth ", "line", "the sixth ", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) // toggle wrap vp.SetWrapText(false) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the sec...", "the thi...", internal.BlueFg.Render("the fou..."), "the fif...", "the six...", "66% (4/6)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_WrapOn_Basic(t *testing.T) { // width=20, prefix="> " (2 chars), so content wraps at 18 w, h := 20, 7 prefix := "> " vp := newViewport(w, h, WithStyles[object](Styles{ SelectionPrefix: prefix, FooterStyle: lipgloss.NewStyle(), SelectedItemStyle: selectionStyle, })) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{"short", "medium length", "third"}) // selection on first item expectedView := internal.Pad(w, h, []string{ prefix + selectionStyle.Render("short"), " " + "medium length", " " + "third", "", "", "", "33% (1/3)", }) internal.CmpStr(t, expectedView, vp.View()) // move down vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(w, h, []string{ " " + "short", prefix + selectionStyle.Render("medium length"), " " + "third", "", "", "", "66% (2/3)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_WrapOn_LongItemWraps(t *testing.T) { // width=12, prefix="> " (2 chars), so content wraps at 10 w, h := 12, 7 prefix := "> " vp := newViewport(w, h, WithStyles[object](Styles{ SelectionPrefix: prefix, FooterStyle: lipgloss.NewStyle(), SelectedItemStyle: selectionStyle, })) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // "hello world!!" is 13 chars, wraps at content width 10 into 2 lines setContent(vp, []string{"hello world!!", "short"}) // selected item wraps: prefix on both wrapped lines expectedView := internal.Pad(w, h, []string{ prefix + selectionStyle.Render("hello worl"), prefix + selectionStyle.Render("d!!"), " " + "short", "", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) // move down - unselected item still wraps, gets padding on both lines vp, _ = vp.Update(downKeyMsg) expectedView = internal.Pad(w, h, []string{ " " + "hello worl", " " + "d!!", prefix + selectionStyle.Render("short"), "", "", "", "100% (2/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_WrapOn_WithHeader(t *testing.T) { // header uses full width (no prefix), content uses contentWidth w, h := 20, 6 prefix := "> " vp := newViewport(w, h, WithStyles[object](Styles{ SelectionPrefix: prefix, FooterStyle: lipgloss.NewStyle(), SelectedItemStyle: selectionStyle, })) vp.SetHeader([]string{"header line"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{"alpha", "beta"}) expectedView := internal.Pad(w, h, []string{ "header line", prefix + selectionStyle.Render("alpha"), " " + "beta", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionPrefix_WrapOn_NoColor(t *testing.T) { // all styles empty, only prefix distinguishes selection w, h := 16, 6 prefix := "> " emptyStyle := lipgloss.NewStyle() vp := newViewport(w, h, WithStyles[object](Styles{ SelectionPrefix: prefix, FooterStyle: emptyStyle, SelectedItemStyle: emptyStyle, })) vp.SetWrapText(true) vp.SetSelectionEnabled(true) // "a]long item here" is 16 chars, wraps at content width 14 into 2 lines setContent(vp, []string{"a long item here!", "other"}) expectedView := internal.Pad(w, h, []string{ prefix + "a long item he", prefix + "re!", " " + "other", "", "", "50% (1/2)", }) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_SetSameDimensionsPreservesScrollPosition(t *testing.T) { w, h := 10, 5 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", "the fifth line", "the sixth line", "the seventh line", "the eighth line", }) // move selection to fifth item, causing a scroll vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView := internal.Pad(w, h, []string{ "header", " line", internal.BlueFg.Render("the fifth "), internal.BlueFg.Render("line"), "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) // setting the same width and height should not change the scroll position vp.SetWidth(w) internal.CmpStr(t, expectedView, vp.View()) vp.SetHeight(h) internal.CmpStr(t, expectedView, vp.View()) } func TestViewport_SelectionOn_WrapOn_ChangeHeightPreservesSelectionPosition(t *testing.T) { w, h := 10, 6 vp := newViewport(w, h) vp.SetHeader([]string{"header"}) vp.SetWrapText(true) vp.SetSelectionEnabled(true) setContent(vp, []string{ "the first line", "the second line", "the third line", "the fourth line", "the fifth line", "the sixth line", "the seventh line", "the eighth line", }) // move selection to fifth item vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) vp, _ = vp.Update(downKeyMsg) expectedView := internal.Pad(w, h, []string{ "header", "the fourth", " line", internal.BlueFg.Render("the fifth "), internal.BlueFg.Render("line"), "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) // increase height - selection should remain visible and not jump to the top vp.SetHeight(10) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", "the fourth", " line", internal.BlueFg.Render("the fifth "), internal.BlueFg.Render("line"), "the sixth ", "line", "the sevent", "h line", "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) // reduce height - selection should still be visible vp.SetHeight(4) expectedView = internal.Pad(vp.GetWidth(), vp.GetHeight(), []string{ "header", internal.BlueFg.Render("the fifth "), internal.BlueFg.Render("line"), "62% (5/8)", }) internal.CmpStr(t, expectedView, vp.View()) } func setContent(vp *Model[object], content []string) { renderableStrings := make([]object, len(content)) for i := range content { renderableStrings[i] = object{item: item.NewItem(content[i])} } vp.SetObjects(renderableStrings) } ================================================ FILE: modules/viewport/viewport_test_util_test.go ================================================ package viewport import ( "charm.land/lipgloss/v2" "github.com/antgroup/hugescm/modules/viewport/internal" "github.com/antgroup/hugescm/modules/viewport/item" ) type object struct { item item.Item } func (i object) GetItem() item.Item { return i.item } func objectsEqual(a, b object) bool { if a.item == nil || b.item == nil { return a.item == b.item } return a.item.Content() == b.item.Content() } var _ Object = object{} var ( downKeyMsg = internal.MakeKeyMsg('j') halfPgDownKeyMsg = internal.MakeKeyMsg('d') fullPgDownKeyMsg = internal.MakeKeyMsg('f') upKeyMsg = internal.MakeKeyMsg('k') halfPgUpKeyMsg = internal.MakeKeyMsg('u') fullPgUpKeyMsg = internal.MakeKeyMsg('b') goToTopKeyMsg = internal.MakeKeyMsg('g') goToBottomKeyMsg = internal.MakeKeyMsg('G') selectionStyle = internal.BlueFg ) func newViewport(width, height int, options ...Option[object]) *Model[object] { styles := Styles{ FooterStyle: lipgloss.NewStyle(), SelectedItemStyle: selectionStyle, } options = append([]Option[object]{ WithKeyMap[object](DefaultKeyMap()), WithStyles[object](styles), }, options...) return New[object](width, height, options...) } ================================================ FILE: modules/wildmatch/LICENSE.md ================================================ MIT License Copyright (c) 2018- GitHub, Inc. and Git LFS contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: modules/wildmatch/package.go ================================================ // package Wildmatch is an implementation of Git's wildmatch.c-style pattern // matching. // // Wildmatch patterns are comprised of any combination of the following three // components: // // - String literals. A string literal is "foo", or "foo\*" (matching "foo", // and "foo\", respectively). In general, string literals match their exact // contents in a filepath, and cannot match over directories unless they // include the operating system-specific path separator. // // - Wildcards. There are three types of wildcards: // // - Single-asterisk ('*'): matches any combination of characters, any // number of times. Does not match path separators. // // - Single-question mark ('?'): matches any single character, but not a // path separator. // // - Double-asterisk ('**'): greedily matches any number of directories. // For example, '**/foo' matches '/foo', 'bar/baz/woot/foot', but not // 'foo/bar'. Double-asterisks must be separated by filepath separators // on either side. // // - Character groups. A character group is composed of a set of included and // excluded character types. The set of included character types begins the // character group, and a '^' or '!' separates it from the set of excluded // character types. // // A character type can be one of the following: // // - Character literal: a single character, i.e., 'c'. // // - Character group: a group of characters, i.e., '[:alnum:]', etc. // // - Character range: a range of characters, i.e., 'a-z'. // // A Wildmatch pattern can be any combination of the above components, in any // ordering, and repeated any number of times. package wildmatch ================================================ FILE: modules/wildmatch/wildmatch.go ================================================ package wildmatch import ( "errors" "fmt" "path/filepath" "strings" "unicode" "unicode/utf8" ) // opt is an option type for configuring a new Wildmatch instance. type opt func(w *Wildmatch) var ( // Basename allows the receiving Wildmatch to match paths where the // pattern matches only the basename of the path when the pattern does // not contain directory separators. // // If the pattern contains directory separators, or if this option is // not given, the entire path will be matched. Basename opt = func(w *Wildmatch) { w.basename = true } // CaseFold allows the receiving Wildmatch to match paths with // different case structuring as in the pattern. CaseFold opt = func(w *Wildmatch) { w.caseFold = true } // GitAttributes augments the functionality of the matching algorithm // to match behavior of git when working with .gitattributes files. GitAttributes opt = func(w *Wildmatch) { w.gitattributes = true } // Contents indicates that if a pattern matches a directory that is a // parent of a path, then that path is included. This is the behavior // of patterns for .gitignore. Contents opt = func(w *Wildmatch) { w.contents = true } // SystemCase either folds or does not fold filepaths and patterns, // according to whether or not the operating system on which Wildmatch // runs supports case sensitive files or not. SystemCase opt ) const ( sep byte = '/' ) // Wildmatch implements pattern matching against filepaths using the format // described in the package documentation. // // For more, see documentation for package 'wildmatch'. type Wildmatch struct { // ts are the token set used to match the given pattern. ts []token // p is the raw pattern used to derive the token set. p string // basename indicates that this Wildmatch instance matches basenames // when possible (i.e., when there are no directory separators in the // pattern). basename bool // caseFold allows the instance Wildmatch to match patterns with the // same character but different case structures. caseFold bool // gitattributes flag indicates that logic specific to the .gitattributes file // should be used. The two main differences are that negative expressions are // not allowed and directories are not matched. gitattributes bool // contents indicates that if a pattern matches a directory that is a // parent of a path, then that path is included. This is the behavior // of patterns for .gitignore. contents bool } type MatchOpts struct { IsDirectory bool } // NewWildmatch constructs a new Wildmatch instance which matches filepaths // according to the given pattern and the rules for matching above. // // If the pattern is malformed, for instance, it has an unclosed character // group, escape sequence, or character class, NewWildmatch will panic(). func NewWildmatch(p string, opts ...opt) (*Wildmatch, error) { w := &Wildmatch{p: slashEscape(p)} for _, opt := range opts { opt(w) } if w.caseFold { // Before parsing the pattern, convert it to lower-case. w.p = strings.ToLower(w.p) } parts := strings.Split(w.p, string(sep)) if len(parts) > 1 { w.basename = false } var err error if w.ts, err = w.parseTokens(parts); err != nil { return nil, err } return w, nil } const ( // escapes is a constant string containing all escapable characters escapes = "\\[]*?#" ) // slashEscape converts paths "p" to POSIX-compliant path, independent of which // escape character the host machine uses. // // slashEscape respects escapable sequences, and thus will not transform // `foo\*bar` to `foo/*bar` on non-Windows operating systems. func slashEscape(p string) string { var bs strings.Builder for i := 0; i < len(p); { c := p[i] switch c { case '\\': if i+1 < len(p) && escapable(p[i+1]) { _ = bs.WriteByte('\\') _ = bs.WriteByte(p[i+1]) i += 2 } else { _ = bs.WriteByte('/') i += 1 } default: _ = bs.WriteByte(c) i += 1 } } return bs.String() } // escapable returns whether the given "c" is escapable. func escapable(c byte) bool { return strings.IndexByte(escapes, c) > -1 } // parseTokens parses a separated list of patterns into a sequence of // representative Tokens that will compose the pattern when applied in sequence. func (w *Wildmatch) parseTokens(dirs []string) ([]token, error) { if len(dirs) == 0 { return make([]token, 0), nil } var finalComponents []token if !w.gitattributes { trailingIsEmpty := len(dirs) > 1 && dirs[len(dirs)-1] == "" numNonEmptyDirs := len(dirs) if trailingIsEmpty { numNonEmptyDirs -= 1 } if w.contents { finalComponents = []token{&trailingComponents{}} if trailingIsEmpty { // Strip off the trailing empty string. dirs = dirs[:numNonEmptyDirs] } } // If we have one component, ignoring trailing empty // components and we know that a directory is permissible… if numNonEmptyDirs == 1 && (trailingIsEmpty || w.contents) { // We don't have a slash in the middle, so this can go // anywhere in the hierarchy. If there had been a slash // here, it would have been anchored at the root. rest, err := w.parseTokensSimple(dirs) if err != nil { return nil, err } tokens := []token{&unanchoredDirectory{ Until: rest[0], }} // If we're not matching all contents, then do include // the empty component so we don't match // non-directories. if finalComponents == nil && len(rest) > 1 { finalComponents = rest[1:] } return append(tokens, finalComponents...), nil } } components, err := w.parseTokensSimple(dirs) if err != nil { return nil, err } return append(components, finalComponents...), nil } func (w *Wildmatch) parseTokensSimple(dirs []string) ([]token, error) { if len(dirs) == 0 { return make([]token, 0), nil } switch dirs[0] { case "": if len(dirs) == 1 { return []token{&component{fns: []componentFn{substring("")}}}, nil } return w.parseTokensSimple(dirs[1:]) case "**": rest, err := w.parseTokensSimple(dirs[1:]) if err != nil { return nil, err } if len(rest) == 0 { // If there are no remaining tokens, return a lone // doubleStar token. return []token{&doubleStar{ Until: nil, }}, nil } // Otherwise, return a doubleStar token that will match greedily // until the first component in the remainder of the pattern, // and then the remainder of the pattern. return append([]token{&doubleStar{ Until: rest[0], }}, rest[1:]...), nil default: // Ordinarily, simply return the appropriate component, and // continue on. cc, err := parseComponent(dirs[0]) if err != nil { return nil, err } tokens, err := w.parseTokensSimple(dirs[1:]) if err != nil { return nil, err } return append([]token{&component{ fns: cc, }}, tokens...), nil } } // nonEmpty returns the non-empty strings in "all". func NonEmpty(all []string) (ne []string) { for _, x := range all { if len(x) > 0 { ne = append(ne, x) } } return ne } // Match returns true if and only if the pattern matched by the receiving // Wildmatch matches the entire filepath "t". func (w *Wildmatch) Match(t string) bool { dirs, ok := w.consume(t, MatchOpts{}) if !ok { return false } return len(dirs) == 0 } func (w *Wildmatch) MatchWithOpts(t string, opt MatchOpts) bool { dirs, ok := w.consume(t, opt) if !ok { return false } return len(dirs) == 0 } // consume performs the inner match of "t" against the receiver's pattern, and // returns a slice of remaining directory paths, and whether or not there was a // disagreement while matching. func (w *Wildmatch) consume(t string, opt MatchOpts) ([]string, bool) { if w.basename { // If the receiving Wildmatch has basename set, the pattern // matches only the basename of the given "t". t = filepath.Base(t) } if w.caseFold { // If the receiving Wildmatch is case insensitive, the pattern // "w.p" will be lower-case. // // To preserve insensitivity, lower the given path "t", as well. t = strings.ToLower(t) } var isDir bool if opt.IsDirectory { isDir = true // Standardize the formation of subject string so directories always // end with '/' if !strings.HasSuffix(t, "/") { t += "/" } } else { isDir = strings.HasSuffix(t, string(sep)) } dirs := strings.Split(t, string(sep)) // Git-attribute style matching can never match a directory if w.gitattributes && isDir { return dirs, false } // Match each directory token-wise, allowing each token to consume more // than one directory in the case of the '**' pattern. for _, tok := range w.ts { var ok bool dirs, ok = tok.Consume(dirs, isDir) if !ok { // If a pattern could not match the remainder of the // filepath, return so immediately, along with the paths // that we did successfully manage to match. return dirs, false } } // If this is a directory that we've otherwise matched and all we have // left is an empty path component, then this is a match. if isDir && len(dirs) == 1 && len(dirs[0]) == 0 { return nil, true } return dirs, true } // String implements fmt.Stringer and returns the receiver's pattern in the format // specified above. func (w *Wildmatch) String() string { return w.p } // token matches zero, one, or more directory components. type token interface { // Consume matches zero, one, or more directory components. // // Consider the following examples: // // (["foo", "bar", "baz"]) -> (["oo", "bar", baz"], true) // (["foo", "bar", "baz"]) -> (["bar", baz"], true) // (["foo", "bar", "baz"]) -> (["baz"], true) // (["foo", "bar", "baz"]) -> ([], true) // (["foo", "bar", "baz"]) -> (["foo", "bar", "baz"], false) // (["foo", "bar", "baz"]) -> (["oo", "bar", "baz"], false) // (["foo", "bar", "baz"]) -> (["bar", "baz"], false) // // The Consume operation can reduce the size of a single entry in the // slice (see: example (1) above), or remove it entirely, (see: examples // (2), (3), and (4) above). It can also refuse to match forward after // making any amount of progress (see: examples (5), (6), and (7) // above). // // Consume accepts a slice representing a path-delimited filepath on // disk, and a bool indicating whether the given path is a directory // (i.e., "foo/bar/" is, but "foo/bar" isn't). Consume(path []string, isDir bool) ([]string, bool) // String returns the string representation this component of the // pattern; i.e., a string that, when parsed, would form the same token. String() string } // doubleStar is an implementation of the Token interface which greedily matches // one-or-more path components until a successor token. type doubleStar struct { Until token EmptyPath bool } // Consume implements token.Consume as above. func (d *doubleStar) Consume(path []string, isDir bool) ([]string, bool) { if len(path) == 0 { return path, d.EmptyPath } // If there are no remaining tokens to match, allow matching the entire // path. if d.Until == nil { return nil, true } for i := len(path); i > 0; i-- { rest, ok := d.Until.Consume(path[i:], false) if ok { return rest, ok } } // If no match has been found, we assume that the '**' token matches the // empty string, and defer pattern matching to the rest of the path. return d.Until.Consume(path, isDir) } // String implements Component.String. func (d *doubleStar) String() string { if d.Until == nil { return "**" } return fmt.Sprintf("**/%s", d.Until.String()) } // unanchoredDirectory is an implementation of the Token interface which // greedily matches one-or-more path components until a successor token. type unanchoredDirectory struct { Until token } // Consume implements token.Consume as above. func (d *unanchoredDirectory) Consume(path []string, isDir bool) ([]string, bool) { // This matches the same way as a doubleStar, so just use that // implementation. s := &doubleStar{Until: d.Until} return s.Consume(path, isDir) } // String implements Component.String. func (d *unanchoredDirectory) String() string { return fmt.Sprintf("%s/", d.Until.String()) } // trailingComponents is an implementation of the Token interface which // greedily matches any trailing components, even if empty. type trailingComponents struct { } // Consume implements token.Consume as above. func (d *trailingComponents) Consume(path []string, isDir bool) ([]string, bool) { // This matches the same way as a doubleStar, so just use that // implementation. s := &doubleStar{Until: nil, EmptyPath: true} return s.Consume(path, isDir) } // String implements Component.String. func (d *trailingComponents) String() string { return "" } // componentFn is a functional type designed to match a single component of a // directory structure by reducing the unmatched part, and returning whether or // not a match was successful. type componentFn interface { Apply(s string) (rest string, ok bool) String() string } // cfn is a wrapper type for the Component interface that includes an applicable // function, and a string that represents it. type cfn struct { fn func(s string) (rest string, ok bool) str string } // Apply executes the component function as described above. func (c *cfn) Apply(s string) (rest string, ok bool) { return c.fn(s) } // String returns the string representation of this component. func (c *cfn) String() string { return c.str } // component is an implementation of the Token interface, which matches a single // component at the front of a tree structure by successively applying // implementations of the componentFn type. type component struct { // fns is the list of componentFn implementations to be successively // applied. fns []componentFn } // parseComponent parses a single component from its string representation, // including wildcards, character classes, string literals, and escape // sequences. func parseComponent(s string) ([]componentFn, error) { if len(s) == 0 { // The empty string represents the absence of componentFn's. return make([]componentFn, 0), nil } switch s[0] { case '\\': // If the first character is a '\', the following character is a // part of an escape sequence, or it is unclosed. if len(s) < 2 { return nil, errors.New("wildmatch: unclosed escape sequence") } literal := substring(string(s[1])) var rest []componentFn if len(s) > 2 { // If there is more to follow, i.e., "\*foo", then parse // the remainder. var err error if rest, err = parseComponent(s[2:]); err != nil { return nil, err } } return cons(literal, rest), nil case '[': var ( // i will denote the currently-inspected index of the character // group. i = 1 // include will denote the list of included runeFn's // composing the character group. include []runeFn // exclude will denote the list of excluded runeFn's // composing the character group. exclude []runeFn // run is the current run of strings (to either compose // a range, or select "any") run string // neg is whether we have seen a negation marker. neg bool ) for i < len(s) { c := s[i] if c == '^' || c == '!' { // Once a '^' or '!' character has been seen, // anything following it will be negated. neg = !neg i++ continue } if strings.HasPrefix(s[i:], "[:") { closeIdx := strings.Index(s[i:], ":]") if closeIdx < 0 { return nil, errors.New("unclosed character class") } if closeIdx == 1 { // The case "[:]" has a prefix "[:", and // a suffix ":]", but the atom refers to // a character group including the // literal ":", not an ill-formed // character class. // // Parse it as such; increment one // _less_ than expected, to terminate // the group. run += "[:]" i += 2 continue } // Find the associated character class. name := strings.TrimPrefix( strings.ToLower(s[i:i+closeIdx]), "[:") fn, ok := classes[name] if !ok { return nil, fmt.Errorf("wildmatch: unknown class: %q", name) } include, exclude = appendMaybe(!neg, include, exclude, fn) // Advance to the first index beyond the closing // ":]". i = i + closeIdx + 2 continue } if c == '-' { if i < len(s) { // If there is a range marker at the // non-final position, construct a range // and an optional "any" match: var start, end byte if len(run) > 0 { // If there is at least one // character in the run, use it // as the starting point of the // range, and remove it from the // run. start = run[len(run)-1] run = run[:len(run)-1] } if i+1 >= len(s) { return nil, errors.New("wildmatch: invalid range, missing end") } end = s[i+1] if len(run) > 0 { // If there is still information // in the run, construct a rune // function matching any // characters in the run. cfn := anyRune(run) include, exclude = appendMaybe(!neg, include, exclude, cfn) run = "" } // Finally, construct the rune range and // add it appropriately. bfn := between(rune(start), rune(end)) include, exclude = appendMaybe(!neg, include, exclude, bfn) i += 2 } else { // If this is in the final position, add // it to the run and exit the loop. run += "-" i += 2 } continue } if c == '\\' { // If we encounter an escape sequence in the // group, check its bounds and add it to the // run. if i+1 >= len(s) { return nil, errors.New("wildmatch: unclosed escape") } run += string(s[i+1]) i += 2 continue } if c == ']' { // If we encounter a closing ']', then stop // parsing the group. break } // Otherwise, add the character to the run and // advance forward. run += string(s[i]) i++ } if len(run) > 0 { fn := anyRune(run) include, exclude = appendMaybe(!neg, include, exclude, fn) } var rest string if i+1 < len(s) { rest = s[i+1:] } // Assemble a character class, and cons it in front of the // remainder of the component pattern. cc, err := parseComponent(rest) if err != nil { return nil, err } return cons(charClass(include, exclude), cc), nil case '?': cc, err := parseComponent(s[1:]) if err != nil { return nil, err } return []componentFn{wildcard(1, cc)}, nil case '*': cc, err := parseComponent(s[1:]) if err != nil { return nil, err } return []componentFn{wildcard(-1, cc)}, nil default: // Advance forward until we encounter a special character // (either '*', '[', '*', or '?') and parse across the divider. var i int for ; i < len(s); i++ { if s[i] == '[' || s[i] == '*' || s[i] == '?' || s[i] == '\\' { break } } cc, err := parseComponent(s[i:]) if err != nil { return nil, err } return cons(substring(s[:i]), cc), nil } } // appendMaybe appends the value "x" to either "a" or "b" depending on "yes". func appendMaybe(yes bool, a, b []runeFn, x runeFn) (ax, bx []runeFn) { if yes { return append(a, x), b } return a, append(b, x) } // cons prepends the "head" componentFn to the "tail" of componentFn's. func cons(head componentFn, tail []componentFn) []componentFn { return append([]componentFn{head}, tail...) } // Consume implements token.Consume as above by applying the above set of // componentFn's in succession to the first element of the path tree. func (c *component) Consume(path []string, isDir bool) ([]string, bool) { if len(path) == 0 { return path, false } head := path[0] for _, fn := range c.fns { var ok bool // Apply successively the component functions to make progress // matching the head. if head, ok = fn.Apply(head); !ok { // If any of the functions failed to match, there are // no other paths to match success, so return a failure // immediately. return path, false } } if len(head) > 0 { return append([]string{head}, path[1:]...), false } if len(path) == 1 { // Components can not match directories. If we were matching the // last path in a tree structure, we can only match if it // _wasn't_ a directory. return path[1:], true } return path[1:], true } // String implements token.String. func (c *component) String() string { var bs strings.Builder for _, fn := range c.fns { bs.WriteString(fn.String()) } return bs.String() } // substring returns a componentFn that matches a prefix of "sub". func substring(sub string) componentFn { return &cfn{ fn: func(s string) (rest string, ok bool) { if !strings.HasPrefix(s, sub) { return s, false } return s[len(sub):], true }, str: sub, } } // wildcard returns a componentFn that greedily matches until a set of other // component functions no longer matches. func wildcard(n int, fns []componentFn) componentFn { until := func(s string) (string, bool) { head := s for _, fn := range fns { var ok bool if head, ok = fn.Apply(head); !ok { return s, false } } if len(head) > 0 { return s, false } return "", true } var bs strings.Builder bs.WriteString("*") for _, fn := range fns { bs.WriteString(fn.String()) } return &cfn{ fn: func(s string) (rest string, ok bool) { if n > -1 { if n > len(s) { return "", false } return until(s[n:]) } for i := len(s); i > 0; i-- { rest, ok = until(s[i:]) if ok { return rest, ok } } return until(s) }, str: bs.String(), } } // charClass returns a component function emulating a character class, i.e., // that a single character can match if and only if it is included in one of the // includes (or true if there were no includes) and none of the excludes. func charClass(include, exclude []runeFn) componentFn { return &cfn{ fn: func(s string) (rest string, ok bool) { if len(s) == 0 { return s, false } // Find "r", the first rune in the string "s". r, l := utf8.DecodeRuneInString(s) var match bool for _, ifn := range include { // Attempt to find a match on "r" with "ifn". if ifn(r) { match = true break } } // If there wasn't a match and there were some including // patterns, return a failure to match. Otherwise, continue on // to make sure that no patterns exclude the rune "r". if !match && len(include) != 0 { return s, false } for _, efn := range exclude { // Attempt to find a negative match on "r" with "efn". if efn(r) { return s, false } } // If we progressed this far, return the remainder of the // string. return s[l:], true }, str: "", } } // runeFn matches a single rune. type runeFn func(rune) bool var ( // classes is a mapping from character class name to a rune function // that implements its behavior. classes = map[string]runeFn{ "alnum": func(r rune) bool { return unicode.In(r, unicode.Number, unicode.Letter) }, "alpha": unicode.IsLetter, "blank": func(r rune) bool { return r == ' ' || r == '\t' }, "cntrl": unicode.IsControl, "digit": unicode.IsDigit, "graph": unicode.IsGraphic, "lower": unicode.IsLower, "print": unicode.IsPrint, "punct": unicode.IsPunct, "space": unicode.IsSpace, "upper": unicode.IsUpper, "xdigit": func(r rune) bool { return unicode.IsDigit(r) || ('a' <= r && r <= 'f') || ('A' <= r && r <= 'F') }, } ) // anyRune returns true so long as the rune "r" appears in the string "s". func anyRune(s string) runeFn { return func(r rune) bool { return strings.ContainsRune(s, r) } } // between returns true so long as the rune "r" appears between "a" and "b". func between(a, b rune) runeFn { if b < a { a, b = b, a } return func(r rune) bool { return a <= r && r <= b } } ================================================ FILE: modules/wildmatch/wildmatch_casefold.go ================================================ //go:build windows || darwin package wildmatch func init() { SystemCase = CaseFold } ================================================ FILE: modules/wildmatch/wildmatch_nocasefold.go ================================================ //go:build !windows && !darwin package wildmatch func init() { SystemCase = func(w *Wildmatch) {} } ================================================ FILE: modules/wildmatch/wildmatch_test.go ================================================ package wildmatch import ( "runtime" "testing" ) type Case struct { Pattern string Subject string Match bool Opts []opt MatchOpts MatchOpts } func (c *Case) Assert(t *testing.T) { p, err := NewWildmatch(c.Pattern, c.Opts...) if err != nil { if c.Match { t.Errorf("could not parse: %s (%s)", c.Pattern, err) } return } if (c.MatchOpts != MatchOpts{} && p.MatchWithOpts(c.Subject, c.MatchOpts) != c.Match) || (c.MatchOpts == MatchOpts{} && p.Match(c.Subject) != c.Match) { if c.Match { t.Errorf("expected match: %s, %s", c.Pattern, c.Subject) } else { t.Errorf("unexpected match: %s, %s", c.Pattern, c.Subject) } } } var Cases = []*Case{ { Pattern: `foo`, Subject: `foo`, Match: true, }, { Pattern: `bar`, Subject: `foo`, Match: false, }, { Pattern: `???`, Subject: `foo`, Match: true, }, { Pattern: `??`, Subject: `foo`, Match: false, }, { Pattern: `*`, Subject: `foo`, Match: true, }, { Pattern: `f*`, Subject: `foo`, Match: true, }, { Pattern: `*f`, Subject: `foo`, Match: false, }, { Pattern: `*foo*`, Subject: `foo`, Match: true, }, { Pattern: `*ob*a*r*`, Subject: `foobar`, Match: true, }, { Pattern: `*ab`, Subject: `aaaaaaabababab`, Match: true, }, { Pattern: `foo\*`, Subject: `foo*`, Match: true, }, { Pattern: `foo\*bar`, Subject: `foobar`, Match: false, }, { Pattern: `f\\oo`, Subject: `f\oo`, Match: true, }, { Pattern: `*[al]?`, Subject: `ball`, Match: true, }, { Pattern: `[ten]`, Subject: `ten`, Match: false, }, { Pattern: `**[!te]`, Subject: `ten`, Match: true, }, { Pattern: `**[!ten]`, Subject: `ten`, Match: false, }, { Pattern: `t[a-g]n`, Subject: `ten`, Match: true, }, { Pattern: `t[!a-g]n`, Subject: `ten`, Match: false, }, { Pattern: `t[!a-g]n`, Subject: `ton`, Match: true, }, { Pattern: `t[^a-g]n`, Subject: `ton`, Match: true, }, { Pattern: `]`, Subject: `]`, Match: true, }, { Pattern: `foo*bar`, Subject: `foo/baz/bar`, Match: false, }, { Pattern: `foo?bar`, Subject: `foo/bar`, Match: false, }, { Pattern: `foo[/]bar`, Subject: `foo/bar`, Match: false, }, { Pattern: `f[^eiu][^eiu][^eiu][^eiu][^eiu]r`, Subject: `foo/bar`, Match: false, }, { Pattern: `f[^eiu][^eiu][^eiu][^eiu][^eiu]r`, Subject: `foo-bar`, Match: true, }, { Pattern: `**/foo`, Subject: `foo`, Match: true, }, { Pattern: `**/foo`, Subject: `/foo`, Match: true, }, { Pattern: `**/foo`, Subject: `bar/baz/foo`, Match: true, }, { Pattern: `*/foo`, Subject: `bar/baz/foo`, Match: false, }, { Pattern: `**/bar*`, Subject: `foo/bar/baz`, Match: false, }, { Pattern: `**/bar/*`, Subject: `deep/foo/bar/baz`, Match: true, }, { Pattern: `**/bar/*`, Subject: `deep/foo/bar/baz/`, Match: true, }, { Pattern: `**/bar/**`, Subject: `deep/foo/bar/baz/`, Match: true, }, { Pattern: `**/bar/*`, Subject: `deep/foo/bar`, Match: false, }, { Pattern: `**/bar/**`, Subject: `deep/foo/bar/`, Match: true, }, { Pattern: `**/bar/**`, Subject: `deep/foo/bar`, Match: false, }, { Pattern: `**/bar/**/*`, Subject: `deep/foo/bar/`, Match: true, }, { Pattern: `**/bar/**/*`, Subject: `deep/foo/bar`, Match: false, }, { Pattern: `**/bar/**/*`, Subject: `deep/bar/bar`, Match: false, }, { Pattern: `*/bar/**`, Subject: `foo/bar/baz/x`, Match: true, }, { Pattern: `*/bar/**`, Subject: `deep/foo/bar/baz/x`, Match: false, }, { Pattern: `**/bar/*/*`, Subject: `deep/foo/bar/baz/x`, Match: true, }, { Pattern: `*.txt`, Subject: `foo/bar/baz.txt`, Match: false, }, { Pattern: `*.txt`, Subject: `你好-世界.txt`, Match: true, }, { Pattern: `你好-世界.txt`, Subject: `你好-世界.txt`, Match: true, }, { Pattern: `foo*`, Subject: `foobar`, Match: true, }, { Pattern: `*foo*`, Subject: `somethingfoobar`, Match: true, }, { Pattern: `*foo`, Subject: `barfoo`, Match: true, }, { Pattern: `a[c-c]st`, Subject: `acrt`, Match: false, }, { Pattern: `a[c-c]rt`, Subject: `acrt`, Match: true, }, { Pattern: `\`, Subject: `''`, Match: false, }, { Pattern: `\`, Subject: `\`, Match: false, }, { Pattern: `*/\`, Subject: `/\`, Match: false, }, { Pattern: `foo`, Subject: `foo`, Match: true, }, { Pattern: `@foo`, Subject: `@foo`, Match: true, }, { Pattern: `@foo`, Subject: `foo`, Match: false, }, { Pattern: `\[ab]`, Subject: `[ab]`, Match: true, }, { Pattern: `[[]ab]`, Subject: `[ab]`, Match: true, }, { Pattern: `[[:]ab]`, Subject: `[ab]`, Match: true, }, { Pattern: `[[::]ab]`, Subject: `[ab]`, Match: false, }, { Pattern: `[[:digit]ab]`, Subject: `[ab]`, Match: false, }, { Pattern: `[\[:]ab]`, Subject: `[ab]`, Match: true, }, { Pattern: `\??\?b`, Subject: `?a?b`, Match: true, }, { Pattern: `''`, Subject: `foo`, Match: false, }, { Pattern: `**/t[o]`, Subject: `foo/bar/baz/to`, Match: true, }, { Pattern: `[[:alpha:]][[:digit:]][[:upper:]]`, Subject: `a1B`, Match: true, }, { Pattern: `[[:digit:][:upper:][:space:]]`, Subject: `a`, Match: false, }, { Pattern: `[[:digit:][:upper:][:space:]]`, Subject: `A`, Match: true, }, { Pattern: `[[:digit:][:upper:][:space:]]`, Subject: `1`, Match: true, }, { Pattern: `[[:digit:][:upper:][:spaci:]]`, Subject: `1`, Match: false, }, { Pattern: `'`, Subject: `'`, Match: true, }, { Pattern: `[[:digit:][:upper:][:space:]]`, Subject: `.`, Match: false, }, { Pattern: `[[:digit:][:punct:][:space:]]`, Subject: `.`, Match: true, }, { Pattern: `[[:xdigit:]]`, Subject: `5`, Match: true, }, { Pattern: `[[:xdigit:]]`, Subject: `f`, Match: true, }, { Pattern: `[[:xdigit:]]`, Subject: `D`, Match: true, }, { Pattern: `[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]`, Subject: `_`, Match: true, }, { Pattern: `[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]`, Subject: `.`, Match: true, }, { Pattern: `[a-c[:digit:]x-z]`, Subject: `5`, Match: true, }, { Pattern: `[a-c[:digit:]x-z]`, Subject: `b`, Match: true, }, { Pattern: `[a-c[:digit:]x-z]`, Subject: `y`, Match: true, }, { Pattern: `[a-c[:digit:]x-z]`, Subject: `q`, Match: false, }, { Pattern: `[\\-^]`, Subject: `]`, Match: true, }, { Pattern: `[\\-^]`, Subject: `[`, Match: false, }, { Pattern: `a[]b`, Subject: `ab`, Match: false, }, { Pattern: `a[]b`, Subject: `a[]b`, Match: false, }, { Pattern: `[!`, Subject: `ab`, Match: false, }, { Pattern: `[-`, Subject: `ab`, Match: false, }, { Pattern: `[-]`, Subject: `-`, Match: true, }, { Pattern: `[a-`, Subject: `-`, Match: false, }, { Pattern: `[!a-`, Subject: `-`, Match: false, }, { Pattern: `'`, Subject: `'`, Match: true, }, { Pattern: `'[`, Subject: `0`, Match: false, }, { Pattern: `[---]`, Subject: `-`, Match: true, }, { Pattern: `[------]`, Subject: `-`, Match: true, }, { Pattern: `[!------]`, Subject: `a`, Match: true, }, { Pattern: `[a^bc]`, Subject: `^`, Match: true, }, { Pattern: `[\]`, Subject: `\`, Match: false, }, { Pattern: `[\\]`, Subject: `\`, Match: true, }, { Pattern: `[!\\]`, Subject: `\`, Match: false, }, { Pattern: `[A-\\]`, Subject: `G`, Match: true, }, { Pattern: `b*a`, Subject: `aaabbb`, Match: false, }, { Pattern: `*ba*`, Subject: `aabcaa`, Match: false, }, { Pattern: `[,]`, Subject: `,`, Match: true, }, { Pattern: `[\\,]`, Subject: `,`, Match: true, }, { Pattern: `[\\,]`, Subject: `\`, Match: true, }, { Pattern: `[,-.]`, Subject: `-`, Match: true, }, { Pattern: `[,-.]`, Subject: `+`, Match: false, }, { Pattern: `[,-.]`, Subject: `-.]`, Match: false, }, { Pattern: `-*-*-*-*-*-*-12-*-*-*-m-*-*-*`, Subject: `-adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1`, Match: true, }, { Pattern: `-*-*-*-*-*-*-12-*-*-*-m-*-*-*`, Subject: `-adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1`, Match: false, }, { Pattern: `-*-*-*-*-*-*-12-*-*-*-m-*-*-*`, Subject: `-adobe-courier-bold-o-normal--12-120-75-75-/-70-iso8859-1`, Match: false, }, { Pattern: `**/*a*b*g*n*t`, Subject: `abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt`, Match: true, }, { Pattern: `**/*a*b*g*n*t`, Subject: `abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz`, Match: false, }, { Pattern: `file[[:space:]]with[[:space:]]spaces.\#`, Subject: `file with spaces.#`, Match: true, }, { Pattern: `foo`, Subject: `FOO`, Match: false, }, { Pattern: `foo`, Subject: `FOO`, Opts: []opt{CaseFold}, Match: true, }, { Pattern: `**/a*.txt`, Subject: `foo-a.txt`, Match: false, }, { Pattern: `*.txt`, Subject: `file.txt`, Opts: []opt{Basename}, Match: true, }, { Pattern: `file.txt`, Subject: `file.txt`, Opts: []opt{Basename, Contents}, Match: true, }, { Pattern: `*.txt`, Subject: `path/to/file.txt`, Opts: []opt{Basename}, Match: true, }, { Pattern: `path/to/*.txt`, Subject: `path/to/file.txt`, Opts: []opt{Basename}, Match: true, }, { Pattern: `path/to/*.txt`, Subject: `path/to/file.txt`, Match: true, }, { Pattern: `path/to/*.txt`, Subject: `outside/of/path/to/file.txt`, Opts: []opt{Basename}, Match: false, }, { Pattern: `path/to/*.txt`, Subject: `path/to/some/intermediaries/to/file.txt`, Opts: []opt{Basename}, Match: false, }, { Pattern: `path/`, Subject: `path/to/some/intermediaries/to/file.txt`, Match: false, }, { // GitAttribute-style matching directory. // false becalse gitattribute never matches directories. Pattern: `anotherfile.txt/`, Subject: `anotherfile.txt`, Opts: []opt{GitAttributes}, MatchOpts: MatchOpts{IsDirectory: true}, Match: false, }, { // gitAttribute-style matching normal file. // false as gitattribute matches ending in '/' indicate // trying to match directory but gitattribute never matches directory Pattern: `anotherfile1.txt/`, Subject: `anotherfile1.txt`, Opts: []opt{GitAttributes}, Match: false, }, { // gitignore-style matching directory. Pattern: `anotherfile2.txt/`, Subject: `anotherfile2.txt`, MatchOpts: MatchOpts{IsDirectory: true}, Match: true, }, { Pattern: `anotherfile3.txt/`, Subject: `anotherfile3.txt`, Match: false, }, { Pattern: `anotherfile4.txt`, Subject: `anotherfile4.txt/`, Opts: []opt{GitAttributes}, Match: false, }, { Pattern: `**/pdfkit.frameworks/pdfkit/**`, Subject: `MyFolder/libs/pdfkit.frameworks/pdfkit`, Match: false, }, { Pattern: `foo/`, Subject: `bar/baz/foo`, MatchOpts: MatchOpts{IsDirectory: true}, Match: true, }, { Pattern: `foo/`, Subject: `foo`, MatchOpts: MatchOpts{IsDirectory: true}, Match: true, }, { Pattern: `foo/`, Subject: `foo/`, Match: true, }, { Pattern: `/foo/`, Subject: `foo/`, Match: true, }, { Pattern: `big/b`, Subject: `big/b/b1`, Opts: []opt{Contents}, Match: true, }, { Pattern: `big`, Subject: `big/b/b1`, Opts: []opt{Contents}, Match: true, }, { Pattern: `b`, Subject: `big/b/b1`, Opts: []opt{Contents}, Match: true, }, { Pattern: `/foo/`, Subject: `foo/`, Opts: []opt{Contents}, Match: true, }, { Pattern: `/foo/`, Subject: `foo/`, Opts: []opt{Basename, Contents}, Match: true, }, { Pattern: `/foo`, Subject: `foo`, Match: true, }, { Pattern: `/foo/filename.txt`, Subject: `foo/filename.txt`, Match: true, }, { Pattern: `/foo/filename.txt`, Subject: `bar/foo/filename.txt`, Match: false, }, { Pattern: `/foo/*.txt`, Subject: `foo/filename.txt`, Match: true, }, { Pattern: `/*.txt`, Subject: `foo/filename.txt`, Match: false, }, { Pattern: `/foo/*.txt`, Subject: `bar/foo/filename.txt`, Match: false, }, { Pattern: `/foo/`, Subject: `foo`, MatchOpts: MatchOpts{IsDirectory: true}, Match: true, }, { Pattern: `/foo/`, Subject: `foo/filename.txt`, Opts: []opt{Contents}, Match: true, }, { Pattern: `/foo/**`, Subject: `foo/filename.txt`, Match: true, }, { Pattern: `path/`, Subject: `path/to/some/intermediaries/to/file.txt`, Opts: []opt{Contents}, Match: true, }, { Pattern: `to/`, Subject: `path/to/some/intermediaries/to/file.txt`, Opts: []opt{Contents}, Match: true, }, { Pattern: `nonexistent/`, Subject: `path/to/some/intermediaries/to/file.txt`, Opts: []opt{Contents}, Match: false, }, } func TestWildmatch(t *testing.T) { for _, c := range Cases { c.Assert(t) } } type SlashCase struct { Given string Expect string } func (c *SlashCase) Assert(t *testing.T) { got := slashEscape(c.Given) if c.Expect != got { t.Errorf("wildmatch: expected slashEscape(\"%s\") -> %s, got: %s", c.Given, c.Expect, got, ) } } func TestSlashEscape(t *testing.T) { for _, c := range []*SlashCase{ {Given: ``, Expect: ``}, {Given: `foo/bar`, Expect: `foo/bar`}, {Given: `foo\bar`, Expect: `foo/bar`}, {Given: `foo\*bar`, Expect: `foo\*bar`}, {Given: `foo\?bar`, Expect: `foo\?bar`}, {Given: `foo\[bar`, Expect: `foo\[bar`}, {Given: `foo\]bar`, Expect: `foo\]bar`}, {Given: `foo\#bar`, Expect: `foo\#bar`}, } { c.Assert(t) } } func TestCaseFold(t *testing.T) { m, err := NewWildmatch("*.bin", SystemCase) if err != nil { t.Errorf("wildmatch: %v", err) } if runtime.GOOS == "windows" || runtime.GOOS == "darwin" { if !m.Match("UPCASE.BIN") { t.Errorf("wildmatch: expected system case to be folding") } } else if m.Match("UPCASE.BIN") { t.Errorf("wildmatch: expected system case to be non-folding") } } ================================================ FILE: modules/zeta/backend/decode.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "bytes" "context" "encoding/binary" "errors" "io" "os" "strings" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" "github.com/antgroup/hugescm/modules/zeta/backend/pack" "github.com/antgroup/hugescm/modules/zeta/object" ) const ( BLANK_BLOB = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" ) var ( BLANK_BLOB_HASH = plumbing.NewHash(BLANK_BLOB) ) var ( ErrObjectNotCached = errors.New("object cannot be cached") ) func (d *Database) store(a any) error { if !d.enableLRU { return nil } switch v := a.(type) { case *object.Commit: // don't save backend _ = d.metaLRU.Set(v.Hash.String(), object.NewSnapshotCommit(v, nil), 1) case *object.Tree: // don't save backend _ = d.metaLRU.Set(v.Hash.String(), object.NewSnapshotTree(v, nil), 1) case *object.Fragments: _ = d.metaLRU.Set(v.Hash.String(), v, 1) case *object.Tag: _ = d.metaLRU.Set(v.Hash.String(), v, 1) default: return ErrObjectNotCached } return nil } func (d *Database) fromCache(oid plumbing.Hash) (any, error) { a, ok := d.metaLRU.Get(oid.String()) if !ok { return nil, os.ErrNotExist } switch v := a.(type) { case *object.Commit: return object.NewSnapshotCommit(v, d), nil case *object.Tree: return object.NewSnapshotTree(v, d), nil case *object.Fragment: return v, nil case *object.Tag: return v, nil default: } return nil, ErrObjectNotCached } func (d *Database) Exists(oid plumbing.Hash, metadata bool) error { d.mu.RLock() defer d.mu.RUnlock() if metadata { return d.metaRO.Exists(oid) } if oid == BLANK_BLOB_HASH { return nil } return d.ro.Exists(oid) } // Object: find object and set backend // decode and set backend func (d *Database) Object(_ context.Context, oid plumbing.Hash) (any, error) { if oid == plumbing.EmptyTree { return object.NewEmptyTree(d.backend), nil } d.mu.RLock() defer d.mu.RUnlock() if d.enableLRU { if a, err := d.fromCache(oid); err == nil { return a, nil } } rc, err := d.metaRO.Open(oid) if err != nil { return nil, err } defer rc.Close() // nolint a, err := object.Decode(rc, oid, d.backend) if err == nil { _ = d.store(a) } return a, err } func (d *Database) Commit(ctx context.Context, oid plumbing.Hash) (*object.Commit, error) { a, err := d.Object(ctx, oid) if err != nil { return nil, err } if c, ok := a.(*object.Commit); ok { return c, nil } return nil, NewErrMismatchedObjectType(oid, "commit") } func (d *Database) ParseRevEx(ctx context.Context, oid plumbing.Hash) (*object.Commit, []plumbing.Hash, error) { objects := make([]plumbing.Hash, 0, 2) for range 10 { a, err := d.Object(ctx, oid) if err != nil { return nil, nil, err } if c, ok := a.(*object.Commit); ok { return c, objects, nil } t, ok := a.(*object.Tag) if !ok { return nil, nil, NewErrMismatchedObjectType(oid, "tag") } objects = append(objects, oid) if t.ObjectType != object.CommitObject && t.ObjectType != object.TagObject { return nil, nil, NewErrMismatchedObjectType(oid, "commit") } oid = t.Object } return nil, nil, NewErrMismatchedObjectType(oid, "commit") } func (d *Database) Tree(ctx context.Context, oid plumbing.Hash) (*object.Tree, error) { a, err := d.Object(ctx, oid) if err != nil { return nil, err } if t, ok := a.(*object.Tree); ok { return t, nil } return nil, NewErrMismatchedObjectType(oid, "tree") } func (d *Database) Fragments(ctx context.Context, oid plumbing.Hash) (*object.Fragments, error) { a, err := d.Object(ctx, oid) if err != nil { return nil, err } if f, ok := a.(*object.Fragments); ok { return f, nil } return nil, NewErrMismatchedObjectType(oid, "fragments") } func (d *Database) Tag(ctx context.Context, oid plumbing.Hash) (*object.Tag, error) { a, err := d.Object(ctx, oid) if err != nil { return nil, err } if t, ok := a.(*object.Tag); ok { return t, nil } return nil, NewErrMismatchedObjectType(oid, "tag") } func (d *Database) Blob(_ context.Context, oid plumbing.Hash) (br *object.Blob, err error) { if oid == BLANK_BLOB_HASH { return &object.Blob{Contents: strings.NewReader("")}, nil } d.mu.RLock() defer d.mu.RUnlock() var rc io.ReadCloser if rc, err = d.ro.Open(oid); err != nil { return nil, err } if br, err = object.NewBlob(rc); err != nil { _ = rc.Close() } return } type SizeReader interface { io.Reader io.Closer Size() int64 } type sizeReader struct { io.Reader closer io.Closer size int64 } func (sr *sizeReader) Close() error { if sr.closer == nil { return nil } return sr.closer.Close() } func (sr *sizeReader) Size() int64 { return sr.size } const ( // ZSTD_MAGIC: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames ZSTD_MAGIC = 0xFD2FB528 ) func isZstdMagic(magic [4]byte) bool { return binary.LittleEndian.Uint32(magic[:]) == ZSTD_MAGIC } func (d *Database) metaSizeReader(oid plumbing.Hash) (SizeReader, error) { rc, err := d.metaRO.Open(oid) if err != nil { return nil, err } var magic [4]byte if _, err := io.ReadFull(rc, magic[:]); err != nil { return nil, err } // TODO: When the server supports compressed metadata, we don't need to decompress it. if isZstdMagic(magic) { defer rc.Close() // nolint b := &bytes.Buffer{} zr, err := streamio.GetZstdReader(rc) if err != nil { return nil, err } defer streamio.PutZstdReader(zr) if _, err := zr.WriteTo(b); err != nil { return nil, err } rawBytes := b.Bytes() return &sizeReader{Reader: bytes.NewReader(rawBytes), size: int64(len(rawBytes))}, nil } reader := io.MultiReader(bytes.NewReader(magic[:]), rc) switch v := rc.(type) { case *os.File: si, err := v.Stat() if err != nil { _ = v.Close() return nil, err } return &sizeReader{Reader: reader, closer: v, size: si.Size()}, nil case *pack.SizeReader: return &sizeReader{Reader: reader, closer: v, size: v.Size()}, nil default: } _ = rc.Close() return nil, errors.New("unable detect reader size") } func (d *Database) Size(oid plumbing.Hash, meta bool) (size int64, err error) { var sr SizeReader if sr, err = d.SizeReader(oid, meta); err != nil { return } size = sr.Size() _ = sr.Close() return } func (d *Database) SizeReader(oid plumbing.Hash, meta bool) (SizeReader, error) { d.mu.RLock() defer d.mu.RUnlock() if meta { return d.metaSizeReader(oid) } rc, err := d.ro.Open(oid) if err != nil { return nil, err } switch v := rc.(type) { case *os.File: si, err := v.Stat() if err != nil { _ = v.Close() return nil, err } return &sizeReader{Reader: v, closer: v, size: si.Size()}, nil case *pack.SizeReader: return &sizeReader{Reader: v, closer: v, size: v.Size()}, nil default: } _ = rc.Close() return nil, errors.New("unable detect reader size") } type readCloser struct { io.Reader closeFn func() error } func (r *readCloser) Close() error { if r.closeFn == nil { return nil } return r.closeFn() } func (d *Database) OpenReader(oid plumbing.Hash, meta bool) (io.ReadCloser, error) { d.mu.RLock() defer d.mu.RUnlock() if !meta { return d.ro.Open(oid) } rc, err := d.metaRO.Open(oid) if err != nil { return nil, err } var magic [4]byte if _, err := io.ReadFull(rc, magic[:]); err != nil { return nil, err } // TODO: When the server supports compressed metadata, we don't need to decompress it. if isZstdMagic(magic) { defer rc.Close() // nolint zr, err := streamio.GetZstdReader(rc) if err != nil { return nil, err } return &readCloser{Reader: zr, closeFn: func() error { streamio.PutZstdReader(zr) return rc.Close() }}, nil } return &readCloser{ Reader: io.MultiReader(bytes.NewReader(magic[:]), rc), closeFn: func() error { return rc.Close() }}, nil } func (d *Database) Search(prefix string) (oid plumbing.Hash, err error) { h := plumbing.NewHash(prefix) if oid, err = d.metaRO.Search(h); err == nil { return } if !plumbing.IsNoSuchObject(err) { return } oid, err = d.ro.Search(h) return } ================================================ FILE: modules/zeta/backend/encode.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "context" "fmt" "io" "os" "path/filepath" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/object" ) func (d *Database) WriteEncoded(e object.Encoder) (oid plumbing.Hash, err error) { d.mu.RLock() defer d.mu.RUnlock() return d.metaRW.WriteEncoded(e) } // HashTo: // // size == -1: unknown file size, need to detect file size. // size == 0: empty file, returns the specified BLOB. // size > 0: the file size is known. func (d *Database) HashTo(ctx context.Context, r io.Reader, size int64) (oid plumbing.Hash, err error) { if size == 0 { return BLANK_BLOB_HASH, nil } d.mu.RLock() defer d.mu.RUnlock() return d.rw.HashTo(ctx, r, size) } func (d *Database) WriteTo(ctx context.Context, oid plumbing.Hash, r io.Reader) error { d.mu.RLock() defer d.mu.RUnlock() return d.rw.Unpack(oid, r) } func (d *Database) JoinPart(oid plumbing.Hash) string { name := oid.String() + ".part" if len(d.sharingRoot) != 0 { return filepath.Join(d.sharingRoot, "incoming", name) } return filepath.Join(d.root, "incoming", name) } func (d *Database) ValidatePart(saveTo string, oid plumbing.Hash) error { fd, err := os.Open(saveTo) if err != nil { return err } return d.ValidateFD(fd, oid) } func (d *Database) newPartName(oid plumbing.Hash) string { encoded := oid.String() partName := encoded + ".part" if len(d.sharingRoot) != 0 { return filepath.Join(d.sharingRoot, "incoming", partName) } return filepath.Join(d.root, "incoming", partName) } func (d *Database) encodedPath(oid plumbing.Hash) string { encoded := oid.String() if len(d.sharingRoot) != 0 { return filepath.Join(d.sharingRoot, "blob", encoded[:2], encoded[2:4], encoded) } return filepath.Join(d.root, "blob", encoded[:2], encoded[2:4], encoded) } // NewFD: new file fd func (d *Database) NewFD(oid plumbing.Hash) (*os.File, error) { return os.OpenFile(d.newPartName(oid), os.O_APPEND|os.O_CREATE|os.O_RDWR, 0644) } func (d *Database) NewTruncateFD(oid plumbing.Hash) (*os.File, error) { return os.OpenFile(d.newPartName(oid), os.O_TRUNC|os.O_CREATE|os.O_RDWR, 0644) } func (d *Database) validateFD(fd *os.File, oid plumbing.Hash) error { defer fd.Close() // nolint if _, err := fd.Seek(0, io.SeekStart); err != nil { return err } b, err := object.NewBlob(io.NopCloser(fd)) if err != nil { return err } h := plumbing.NewHasher() if _, err := io.Copy(h, b.Contents); err != nil { return err } if s := h.Sum(); s != oid { return fmt.Errorf("bad blob oid: want '%s' got '%s'", oid, s) } _ = fd.Chmod(0444) // Set blob to read-only return nil } func (d *Database) ValidateFD(fd *os.File, oid plumbing.Hash) error { saveTo := d.encodedPath(oid) name := fd.Name() if err := d.validateFD(fd, oid); err != nil { _ = os.Remove(name) return err } if err := os.MkdirAll(filepath.Dir(saveTo), 0755); err != nil { _ = os.Remove(name) return err } if err := finalizeObject(name, saveTo); err != nil { _ = os.Remove(name) return err } return nil } ================================================ FILE: modules/zeta/backend/errors.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package backend import ( "errors" "fmt" "github.com/antgroup/hugescm/modules/plumbing" ) type ErrMismatchedObjectType struct { oid plumbing.Hash t string } func (e *ErrMismatchedObjectType) Error() string { return fmt.Sprintf("object %s not %s", e.oid, e.t) } func IsErrMismatchedObjectType(err error) bool { var e *ErrMismatchedObjectType return errors.As(err, &e) } func NewErrMismatchedObjectType(oid plumbing.Hash, t string) error { return &ErrMismatchedObjectType{oid: oid, t: t} } ================================================ FILE: modules/zeta/backend/file_storer.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "errors" "bytes" "context" "encoding/binary" "fmt" "io" "io/fs" "os" "path/filepath" "strings" "github.com/antgroup/hugescm/modules/mime" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/zeta/backend/storage" "github.com/antgroup/hugescm/modules/zeta/object" ) const ( mimePacketSize = 4096 DEFAULT_BLOB_VERSION uint16 = 1 ) var ( BLOB_MAGIC = [4]byte{'Z', 'B', 0x00, 0x01} ) type CompressMethod uint16 const ( STORE CompressMethod = 0 ZSTD CompressMethod = 1 BROTLI CompressMethod = 2 DEFLATE CompressMethod = 3 XZ CompressMethod = 4 BZ2 CompressMethod = 5 ) func fromCompressionALGO(compressionALGO string) CompressMethod { switch strings.ToLower(compressionALGO) { case "zlib", "deflate": return DEFLATE case "xz": return XZ case "bz2": return BZ2 case "brotli": return BROTLI default: // zstd } return ZSTD } func isBinaryPayload(payload []byte) bool { result := mime.DetectAny(payload) for p := result; p != nil; p = p.Parent() { if p.Is("text/plain") { return false } } return true } // fileStorer implements the storer interface by writing to the .git/objects // directory on disc. type fileStorer struct { // root is the top level /objects directory's path on disc. root string // temp directory, defaults to os.TempDir incoming string selectedMethod CompressMethod } var ( _ storage.Storage = &fileStorer{} ) // NewFileStorer returns a new fileStorer instance with the given root. func newFileStorer(root, incoming, compressionALGO string) *fileStorer { return &fileStorer{ root: root, incoming: incoming, selectedMethod: fromCompressionALGO(compressionALGO), } } func Join(root string, oid plumbing.Hash) string { encoded := oid.String() return filepath.Join(root, encoded[:2], encoded[2:4], encoded) } // path returns an absolute path on disk to the object given by the OID "sha". func (fo *fileStorer) path(oid plumbing.Hash) string { encoded := oid.String() return filepath.Join(fo.root, encoded[:2], encoded[2:4], encoded) } // Open implements the storer.Open function, and returns a io.ReadCloser // for the given SHA. If the file does not exist, or if there was any other // error in opening the file, an error will be returned. // // It is the caller's responsibility to close the given file "f" after its use // is complete. func (fo *fileStorer) Open(oid plumbing.Hash) (f io.ReadCloser, err error) { f, err = fo.open(fo.path(oid), os.O_RDONLY) if os.IsNotExist(err) { return nil, plumbing.NoSuchObject(oid) } return f, err } func (fo *fileStorer) Exists(oid plumbing.Hash) error { p := fo.path(oid) if _, err := os.Stat(p); err != nil && os.IsNotExist(err) { return plumbing.NoSuchObject(oid) } return nil } // Root gives the absolute (fully-qualified) path to the file storer on disk. func (fo *fileStorer) Root() string { return fo.root } // Close closes the file storer. func (fo *fileStorer) Close() error { return nil } // open opens a given file. func (fo *fileStorer) open(path string, flag int) (*os.File, error) { return os.OpenFile(path, flag, 0) } // method: compressed flag --> content has been compressed func (fo *fileStorer) method(compressed bool) CompressMethod { if compressed { return STORE } return fo.selectedMethod } type ExtendWriter interface { io.ReaderFrom io.Writer } func compress(r io.Reader, w ExtendWriter, method CompressMethod) (written int64, err error) { switch method { case STORE: return w.ReadFrom(r) case ZSTD: zw := streamio.GetZstdWriter(w) defer streamio.PutZstdWriter(zw) return zw.ReadFrom(r) case DEFLATE: zw := streamio.GetZlibWriter(w) defer streamio.PutZlibWriter(zw) return io.Copy(zw, r) default: return 0, fmt.Errorf("unsupported method: %d", method) } } // hashToInternal: write reader to disk // 4 byte magic // 2 byte version // 2 byte method // 8 byte uncompressed length // N bytes raw or compressed data func (fo *fileStorer) hashToInternal(fd *os.File, r io.Reader, size int64, compressed bool) error { var err error // 4 byte magic if _, err := fd.Write(BLOB_MAGIC[:]); err != nil { return err } // 2 byte version if err := binary.Write(fd, binary.BigEndian, DEFAULT_BLOB_VERSION); err != nil { return err } // 2 byte method method := fo.method(compressed) if err := binary.Write(fd, binary.BigEndian, method); err != nil { return err } // 8 byte uncompressed length if err = binary.Write(fd, binary.BigEndian, size); err != nil { return err } bytes, err := compress(r, fd, method) if err != nil { return err } if size >= 0 { if size != bytes { return fmt.Errorf("blob size not match expected, actual size %d, expected size %d", bytes, size) } return nil } if err := fd.Sync(); err != nil { return err } if _, err := fd.Seek(8, io.SeekStart); err != nil { return err } if err := binary.Write(fd, binary.BigEndian, bytes); err != nil { return err } return nil } func mkdir(paths ...string) error { for _, path := range paths { // os.MkdirAll check dir exists if err := os.MkdirAll(path, 0755); err != nil { return err } } return nil } func finalizeObject(oldPath string, newPath string) (err error) { if err = strengthen.FinalizeObject(oldPath, newPath); err == nil { _ = os.Chmod(newPath, 0444) } return } // HashTo encode input reader to blob // BLOB format // // 4 byte magic // 2 byte version // 2 byte method // 8 byte uncompressed length // N bytes raw or compressed data func (fo *fileStorer) HashTo(ctx context.Context, r io.Reader, size int64) (oid plumbing.Hash, err error) { var payload []byte if payload, err = streamio.ReadMax(r, mimePacketSize); err != nil && !errors.Is(err, io.EOF) { return oid, fmt.Errorf("ReadFull error: %w", err) } compressed := isBinaryPayload(payload) var contents io.Reader = bytes.NewReader(payload) if !errors.Is(err, io.EOF) { contents = io.MultiReader(contents, r) } hasher := plumbing.NewHasher() if err = mkdir(fo.incoming); err != nil { return } var fd *os.File if fd, err = os.CreateTemp(fo.incoming, "blob"); err != nil { return oid, err } incomingPath := fd.Name() if err = fo.hashToInternal(fd, io.TeeReader(contents, hasher), size, compressed); err != nil { _ = fd.Close() _ = os.Remove(incomingPath) return } _ = fd.Sync() // flush _ = fd.Close() oid = hasher.Sum() objectPath := fo.path(oid) if err = os.MkdirAll(filepath.Dir(objectPath), 0755); err != nil { _ = os.Remove(incomingPath) return } if err = finalizeObject(incomingPath, objectPath); err != nil { _ = os.Remove(incomingPath) return } return } func (fo *fileStorer) WriteEncoded(e object.Encoder) (oid plumbing.Hash, err error) { var fd *os.File if err = mkdir(fo.incoming); err != nil { return } if fd, err = os.CreateTemp(fo.incoming, "metadata"); err != nil { return oid, err } incomingPath := fd.Name() hasher := plumbing.NewHasher() if err = e.Encode(io.MultiWriter(hasher, fd)); err != nil { _ = fd.Close() _ = os.Remove(incomingPath) return } _ = fd.Sync() // flush _ = fd.Close() oid = hasher.Sum() metaObjectPath := fo.path(oid) if err = os.MkdirAll(filepath.Dir(metaObjectPath), 0755); err != nil { _ = os.Remove(incomingPath) return } if err = finalizeObject(incomingPath, metaObjectPath); err != nil { _ = os.Remove(incomingPath) return } return } var ( ignoreDir = map[string]bool{ "pack": true, } ) func (fo *fileStorer) Search(prefix plumbing.Hash) (oid plumbing.Hash, err error) { prefixStr := prefix.Prefix() searchRoot := filepath.Join(fo.root, prefixStr[0:2], prefixStr[2:4]) err = filepath.WalkDir(searchRoot, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { if ignoreDir[d.Name()] { return filepath.SkipDir } return nil } name := d.Name() if !strings.HasPrefix(name, prefixStr) { return nil } if !plumbing.ValidateHashHex(name) { return nil } oid = plumbing.NewHash(name) return filepath.SkipAll }) if oid.IsZero() { return oid, plumbing.NoSuchObject(prefix) } return } type LooseObject struct { Hash plumbing.Hash Size int64 Modification int64 } type LooseObjects []*LooseObject func (fo *fileStorer) looseObjects(sizeMax int64) (LooseObjects, error) { objects := make([]*LooseObject, 0, 100) err := filepath.WalkDir(fo.root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { if ignoreDir[d.Name()] { return filepath.SkipDir } return nil } name := d.Name() if !plumbing.ValidateHashHex(name) { return nil } si, err := d.Info() if err != nil { return err } // skip large files if si.Size() > sizeMax { return nil } objects = append(objects, &LooseObject{Hash: plumbing.NewHash(name), Size: si.Size(), Modification: si.ModTime().Unix()}) return nil }) return objects, err } func (fo *fileStorer) LooseObjects() ([]plumbing.Hash, error) { oids := make([]plumbing.Hash, 0, 100) err := filepath.WalkDir(fo.root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { if ignoreDir[d.Name()] { return filepath.SkipDir } return nil } name := d.Name() if !plumbing.ValidateHashHex(name) { return nil } oids = append(oids, plumbing.NewHash(name)) return nil }) return oids, err } func (fo *fileStorer) Unpack(oid plumbing.Hash, r io.Reader) (err error) { if err = mkdir(fo.incoming); err != nil { return } var fd *os.File if fd, err = os.CreateTemp(fo.incoming, "object"); err != nil { return } incomingPath := fd.Name() if _, err = fd.ReadFrom(r); err != nil { _ = fd.Close() _ = os.Remove(incomingPath) return } _ = fd.Close() objectPath := fo.path(oid) if err = os.MkdirAll(filepath.Dir(objectPath), 0755); err != nil { _ = os.Remove(incomingPath) return } if err = finalizeObject(incomingPath, objectPath); err != nil { _ = os.Remove(incomingPath) return } return } // func removeEmptyDirs(ctx context.Context, target string) (int, error) { // if err := ctx.Err(); err != nil { // return 0, err // } // entries, err := os.ReadDir(target) // switch { // case os.IsNotExist(err): // return 0, nil // race condition: someone else deleted it first // case err != nil: // return 0, err // } // prunedDirsTotal := 0 // for _, e := range entries { // if !e.IsDir() { // continue // } // prunedDirs, err := removeEmptyDirs(ctx, filepath.Join(target, e.Name())) // if err != nil { // return prunedDirsTotal, err // } // prunedDirsTotal += prunedDirs // } // // recheck entries now that we have potentially removed some dirs // entries, err = os.ReadDir(target) // if err != nil && !os.IsNotExist(err) { // return prunedDirsTotal, err // } // if len(entries) > 0 { // return prunedDirsTotal, nil // } // switch err := os.Remove(target); { // case os.IsNotExist(err): // return prunedDirsTotal, nil // race condition: someone else deleted it first // case err != nil: // return prunedDirsTotal, err // } // return prunedDirsTotal + 1, nil // } func removeDirIfEmpty(ctx context.Context, target string) (total int, deleted bool, err error) { entries, err := os.ReadDir(target) switch { case os.IsNotExist(err): return 0, true, nil // race condition: someone else deleted it first case err != nil: return 0, false, err } var removedEntries int for _, e := range entries { if !e.IsDir() { return } name := filepath.Join(target, e.Name()) var sd int var ok bool if sd, ok, err = removeDirIfEmpty(ctx, name); err != nil { return } if ok { removedEntries++ } total += sd } if removedEntries != len(entries) { return total, false, nil } switch err = os.Remove(target); { case os.IsExist(err): return total, false, nil case err != nil: return total, false, err } return total + 1, true, nil } func (fo *fileStorer) Prune(ctx context.Context) (int, error) { total, _, err := removeDirIfEmpty(ctx, fo.root) return total, err } func (fo *fileStorer) PruneObject(ctx context.Context, oid plumbing.Hash) error { if err := ctx.Err(); err != nil { return err } p := fo.path(oid) if err := os.Remove(p); err != nil { return err } return nil } func (fo *fileStorer) PruneObjects(ctx context.Context, largeSize int64) ([]plumbing.Hash, int64, error) { oids := make([]plumbing.Hash, 0, 100) var totalSize int64 err := filepath.WalkDir(fo.root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { if ignoreDir[d.Name()] { return filepath.SkipDir } return nil } name := d.Name() if !plumbing.ValidateHashHex(name) { return nil } si, err := d.Info() if err != nil { return err } size := si.Size() if size < largeSize { return nil } if err = os.Remove(filepath.Join(path, name)); err == nil { oids = append(oids, plumbing.NewHash(name)) totalSize += size return nil } if !os.IsNotExist(err) { return err } return nil }) return oids, totalSize, err } ================================================ FILE: modules/zeta/backend/odb.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "errors" "fmt" "io" "path/filepath" "sync" "sync/atomic" "github.com/antgroup/hugescm/modules/zeta/backend/pack" "github.com/antgroup/hugescm/modules/zeta/backend/storage" "github.com/antgroup/hugescm/modules/zeta/object" "github.com/dgraph-io/ristretto/v2" ) const ( DefaultHashALGO = "BLAKE3" DefaultCompressionALGO = "zstd" ) type Database struct { root string sharingRoot string compressionALGO string // ro is the locations from which we can read objects. metaRO storage.Storage metaRW storage.WritableStorage ro storage.Storage rw storage.WritableStorage metaLRU *ristretto.Cache[string, any] // closed is a uint32 managed by sync/atomic's Uint32 methods. It // yields a value of 0 if the *Database it is stored upon is open, // and a value of 1 if it is closed. closed uint32 mu sync.RWMutex backend object.Backend enableLRU bool } type Option func(*Database) func WithSharingRoot(sharingRoot string) Option { return func(d *Database) { if len(sharingRoot) != 0 { d.sharingRoot = sharingRoot } } } func WithEnableLRU(enableLRU bool) Option { return func(d *Database) { d.enableLRU = enableLRU } } func WithAbstractBackend(backend object.Backend) Option { return func(d *Database) { d.backend = backend } } func WithCompressionALGO(compressionALGO string) Option { return func(d *Database) { if len(compressionALGO) != 0 { d.compressionALGO = compressionALGO } } } func (d *Database) Reload() error { d.mu.Lock() defer d.mu.Unlock() if err := d.initializeMetadataStorage(); err != nil { return fmt.Errorf("reload metadata storage error: %w", err) } if err := d.initializeBlobStorage(); err != nil { _ = d.metaRO.Close() _ = d.metaRW.Close() return fmt.Errorf("reload objects storage error: %w", err) } return nil } func NewDatabase(root string, opts ...Option) (*Database, error) { d := &Database{ root: root, compressionALGO: DefaultCompressionALGO, } for _, o := range opts { o(d) } if err := d.Reload(); err != nil { return nil, err } if d.backend == nil { d.backend = d } return d, nil } func (d *Database) initializeBlobStorage() error { if d.ro != nil { _ = d.ro.Close() d.ro = nil } if d.rw != nil { _ = d.rw.Close() d.rw = nil } zetaDir := d.root if len(d.sharingRoot) != 0 { zetaDir = d.sharingRoot } root := filepath.Join(zetaDir, "blob") incoming := filepath.Join(zetaDir, "incoming") if err := mkdir(root, incoming); err != nil { return err } fo := newFileStorer(root, incoming, d.compressionALGO) packs, err := pack.NewStorage(root) if err != nil { return err } d.ro = storage.MultiStorage(fo, packs) d.rw = fo return nil } func (d *Database) initializeMetadataStorage() error { if d.metaRO != nil { _ = d.metaRO.Close() d.metaRO = nil } if d.metaRW != nil { _ = d.metaRW.Close() d.metaRW = nil } root := filepath.Join(d.root, "metadata") incoming := filepath.Join(d.root, "incoming") if err := mkdir(root, incoming); err != nil { return err } fo := newFileStorer(root, incoming, d.compressionALGO) packs, err := pack.NewStorage(root) if err != nil { return err } d.metaRO = storage.MultiStorage(fo, packs) d.metaRW = fo if !d.enableLRU { return nil } if d.metaLRU != nil { d.metaLRU.Close() d.metaLRU = nil } if d.metaLRU, err = ristretto.NewCache(&ristretto.Config[string, any]{ NumCounters: 100000, MaxCost: 100000, BufferItems: 64, }); err != nil { return err } return nil } func closeSafe(a ...io.Closer) error { errs := make([]error, 0, len(a)) for _, c := range a { if c == nil { continue } if err := c.Close(); err != nil { errs = append(errs, err) } } return errors.Join(errs...) } // Close closes the *Database // // If Close() has already been called, this function will return an error. func (d *Database) Close() error { if !atomic.CompareAndSwapUint32(&d.closed, 0, 1) { return errors.New("zeta: *Database already closed") } return closeSafe(d.ro, d.metaRO, d.rw, d.metaRW) } func (d *Database) CompressionALGO() string { return d.compressionALGO } func (d *Database) Root() string { return d.root } ================================================ FILE: modules/zeta/backend/odb_test.go ================================================ package backend import ( "fmt" "io" "os" "runtime" "testing" "time" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/backend/pack" "github.com/antgroup/hugescm/modules/zeta/object" ) func TestHashTo(t *testing.T) { db, err := NewDatabase("/tmp/blat/.zeta") if err != nil { fmt.Fprintf(os.Stderr, "open database error: %v\n", err) return } defer db.Close() // nolint _, filename, _, _ := runtime.Caller(0) fd, err := os.Open(filename) if err != nil { fmt.Fprintf(os.Stderr, "open file error: %v\n", err) return } defer fd.Close() // nolint si, err := fd.Stat() if err != nil { fmt.Fprintf(os.Stderr, "stat error: %v\n", err) return } oid, err := db.HashTo(t.Context(), fd, si.Size()) if err != nil { fmt.Fprintf(os.Stderr, "hashTo error: %v\n", err) return } fmt.Fprintf(os.Stderr, "oid: %s\n", oid) } func TestPackDeocde(t *testing.T) { sa, err := pack.NewScanner("/tmp/zeta-pack") if err != nil { fmt.Fprintf(os.Stderr, "read set error: %v\n", err) return } defer sa.Close() // nolint oid := plumbing.NewHash("ff07b8065913e8f9b8e4c74ad6d2bd64a8b8f0ef8f025567f79950d0c39fe138") sr, err := sa.Open(oid) if err != nil { fmt.Fprintf(os.Stderr, "read object error: %v\n", err) return } br, err := object.NewBlob(sr) if err != nil { fmt.Fprintf(os.Stderr, "resolve blob error: %v\n", err) _ = sr.Close() return } _, _ = io.Copy(os.Stderr, br.Contents) var count int if err := sa.PackedObjects(func(oid plumbing.Hash, mtime int64) error { count++ return nil }); err != nil { return } fmt.Fprintf(os.Stderr, "count: %d\n", count) } func TestSearchObject(t *testing.T) { odb, err := NewDatabase("/tmp/xh5/.zeta") if err != nil { fmt.Fprintf(os.Stderr, "resolve blob error: %v\n", err) return } defer odb.Close() // nolint oid, err := odb.Search("ff0929c5c92f519f59518666d094c315f") if err != nil { fmt.Fprintf(os.Stderr, "read set error: %v prefix: %s\n", err, oid.Prefix()) return } fmt.Fprintf(os.Stderr, "object %s\n", oid) } func TestRemoveNonEmptyDir(t *testing.T) { err := os.Remove("/tmp/b2") fmt.Fprintf(os.Stderr, "%s %v\n", err, os.IsExist(err)) } func TestSleep(t *testing.T) { time.Sleep(0) fmt.Fprintf(os.Stderr, "%s\n", os.Getenv("LANG")) } ================================================ FILE: modules/zeta/backend/pack/bounds.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import "fmt" // bounds encapsulates the window of search for a single iteration of binary // search. // // Callers may choose to treat the return values from Left() and Right() as // inclusive or exclusive. *bounds makes no assumptions on the inclusively of // those values. // // See: *zeta/object/pack:.Index for more. type bounds struct { // left is the left or lower bound of the bounds. left int64 // right is the rightmost or upper bound of the bounds. right int64 } // newBounds returns a new *bounds instance with the given left and right // values. func newBounds(left, right int64) *bounds { return &bounds{ left: left, right: right, } } // Left returns the leftmost value or lower bound of this *bounds instance. func (b *bounds) Left() int64 { return b.left } // right returns the rightmost value or upper bound of this *bounds instance. func (b *bounds) Right() int64 { return b.right } // WithLeft returns a new copy of this *bounds instance, replacing the left // value with the given argument. func (b *bounds) WithLeft(newLeft int64) *bounds { return &bounds{ left: newLeft, right: b.right, } } // WithRight returns a new copy of this *bounds instance, replacing the right // value with the given argument. func (b *bounds) WithRight(newRight int64) *bounds { return &bounds{ left: b.left, right: newRight, } } // Equal returns whether or not the receiving *bounds instance is equal to the // given one: // // - If both the argument and receiver are nil, they are given to be equal. // - If both the argument and receiver are not nil, and they share the same // Left() and Right() values, they are equal. // - If both the argument and receiver are not nil, but they do not share the // same Left() and Right() values, they are not equal. // - If either the argument or receiver is nil, but the other is not, they are // not equal. func (b *bounds) Equal(other *bounds) bool { if b == nil { return other == nil } if other == nil { return false } return b.left == other.left && b.right == other.right } // String returns a string representation of this bounds instance, given as: // // [,] func (b *bounds) String() string { return fmt.Sprintf("[%d,%d]", b.Left(), b.Right()) } ================================================ FILE: modules/zeta/backend/pack/encode.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package pack import ( "bufio" "bytes" "fmt" "hash/crc32" "io" "math" "os" "path/filepath" "sort" "sync/atomic" "github.com/antgroup/hugescm/modules/binary" "github.com/antgroup/hugescm/modules/plumbing" ) const ( PackVersion uint32 = 'Z' NoEntries uint32 = 0 entriesOffset = 4 + 4 // MAGIC(4)+VERSION(4) objectOffset = entriesOffset + 4 // ENTRIES(4) ) var ( packMagic = [4]byte{'P', 'A', 'C', 'K'} ) type Entry struct { Hash plumbing.Hash CRC32 uint32 Offset uint64 Modification uint64 } type objects []*Entry // EntriesSort sorts a slice of write index in increasing order. func EntriesSort(o objects) { sort.Sort(o) } func (o objects) Len() int { return len(o) } func (o objects) Less(i, j int) bool { return bytes.Compare(o[i].Hash[:], o[j].Hash[:]) < 0 } func (o objects) Swap(i, j int) { o[i], o[j] = o[j], o[i] } type Encoder struct { fd *os.File hasher plumbing.Hasher bw *bufio.Writer w io.Writer version uint32 entries uint32 offset uint64 objects objects sum plumbing.Hash } func NewEncoder(fd *os.File, entries uint32) (*Encoder, error) { e := &Encoder{fd: fd, bw: bufio.NewWriter(fd), version: PackVersion, entries: entries} if entries != 0 { e.hasher = plumbing.NewHasher() e.w = io.MultiWriter(e.bw, e.hasher) e.objects = make([]*Entry, 0, int(entries)) } else { e.w = e.bw e.objects = make([]*Entry, 0, 400) } if _, err := e.w.Write(packMagic[:]); err != nil { return nil, err } if err := binary.WriteUint32(e.w, e.version); err != nil { return nil, err } if err := binary.WriteUint32(e.w, e.entries); err != nil { return nil, err } e.offset = objectOffset return e, nil } func (e *Encoder) WriteTrailer() error { if e.hasher.Hash != nil { e.sum = e.hasher.Sum() if _, err := e.bw.Write(e.sum[:]); err != nil { return err } return e.bw.Flush() } // Flush all data if err := e.bw.Flush(); err != nil { return err } // The data in the buffer should be flushed to the file immediately, // then the number of entries should be corrected, the file BLAKE3 hash should be calculated, and written to the end of the packet. if _, err := e.fd.WriteAt(binary.Swap32(uint32(len(e.objects))), entriesOffset); err != nil { return err } if _, err := e.fd.Seek(0, io.SeekStart); err != nil { return err } hasher := plumbing.NewHasher() if _, err := io.Copy(hasher, e.fd); err != nil { return err } // When we have read all the data, the offset of the file has reached the end. e.sum = hasher.Sum() _, err := e.fd.Write(e.sum[:]) return err } func (e *Encoder) Write(oid plumbing.Hash, size uint32, r io.Reader, modification int64) (err error) { if err = binary.WriteUint32(e.w, size); err != nil { return } var written int64 cr := crc32.New(crc32.IEEETable) if written, err = io.Copy(e.w, io.TeeReader(r, cr)); err != nil { return } if written != int64(size) { return fmt.Errorf("written %d not equal object %s size %d: %w", written, oid, size, io.ErrShortWrite) } e.objects = append(e.objects, &Entry{Hash: oid, CRC32: cr.Sum32(), Offset: e.offset, Modification: uint64(modification)}) e.offset += uint64(size) + 4 return } func (e *Encoder) Name() string { return e.sum.String() } const ( offset64PosMask = uint64(1) << 31 ) // https://codewords.recurse.com/issues/three/unpacking-git-packfiles func (e *Encoder) WriteIndex(fd *os.File) error { sort.Sort(e.objects) var fanout [256]uint32 for _, o := range e.objects { fanout[uint8(o.Hash[0])]++ //nolint:unconvert // byte -> uint8 index conversion } hasher := plumbing.NewHasher() bufWriter := bufio.NewWriter(fd) w := io.MultiWriter(bufWriter, hasher) if err := binary.Write(w, indexMagic[:]); err != nil { return err } if err := binary.WriteUint32(w, IndexVersionCurrent); err != nil { return err } var fanoutStore uint32 for i := range 256 { fanoutStore += fanout[i] if err := binary.WriteUint32(w, fanoutStore); err != nil { return err } } for _, o := range e.objects { if err := binary.Write(w, o.Hash[:]); err != nil { return err } } for _, o := range e.objects { if err := binary.WriteUint32(w, o.CRC32); err != nil { return err } } offset64Set := make([]uint64, 0, 20) var offset64Pos uint64 for _, o := range e.objects { offset := o.Offset if offset > math.MaxInt32 { offset64Set = append(offset64Set, offset) offset = offset64Pos | offset64PosMask offset64Pos++ } if err := binary.WriteUint32(w, uint32(offset)); err != nil { return err } } for _, o := range offset64Set { if err := binary.WriteUint64(w, o); err != nil { return err } } if err := binary.Write(w, e.sum[:]); err != nil { return err } sum := hasher.Sum() if err := binary.Write(bufWriter, sum[:]); err != nil { return err } return bufWriter.Flush() } var ( mtimeMagic = [4]byte{'M', 'T', 'E', 'M'} ) func (e *Encoder) WriteModification(fd *os.File) error { hasher := plumbing.NewHasher() bufWriter := bufio.NewWriter(fd) w := io.MultiWriter(bufWriter, hasher) if err := binary.Write(w, mtimeMagic[:]); err != nil { return err } if err := binary.WriteUint32(w, PackVersion); err != nil { return err } for _, o := range e.objects { if err := binary.WriteUint64(w, o.Modification); err != nil { return err } } sum := hasher.Sum() if err := binary.Write(bufWriter, sum[:]); err != nil { return err } return bufWriter.Flush() } type Writer struct { e *Encoder fd *os.File packDir string closed uint32 } func NewWriter(packDir string, entries uint32) (*Writer, error) { if err := os.MkdirAll(packDir, 0755); err != nil { return nil, err } fd, err := os.CreateTemp(packDir, "pack-") if err != nil { return nil, err } e, err := NewEncoder(fd, entries) if err != nil { return nil, err } return &Writer{e: e, fd: fd, packDir: packDir}, nil } func (w *Writer) Close() error { if w.fd != nil && atomic.CompareAndSwapUint32(&w.closed, 0, 1) { _ = w.fd.Chmod(0444) // Set pack to read-only return w.fd.Close() } return nil } func (w *Writer) Write(oid plumbing.Hash, size uint32, r io.Reader, modification int64) (err error) { return w.e.Write(oid, size, r, modification) } func (w *Writer) WriteTrailer() error { if err := w.e.WriteTrailer(); err != nil { return err } name := w.e.Name() packName := w.fd.Name() _ = w.Close() packNewName := filepath.Join(w.packDir, fmt.Sprintf("pack-%s.pack", name)) if err := os.Rename(packName, packNewName); err != nil { return err } ifd, err := os.Create(filepath.Join(w.packDir, fmt.Sprintf("pack-%s.idx", name))) if err != nil { return err } defer ifd.Close() // nolint if err := w.e.WriteIndex(ifd); err != nil { return err } _ = ifd.Chmod(0444) // Set idx to read-only mfd, err := os.Create(filepath.Join(w.packDir, fmt.Sprintf("pack-%s.mtimes", name))) if err != nil { return err } defer mfd.Close() // nolint err = w.e.WriteModification(mfd) _ = mfd.Chmod(0444) // Set mtimes to read-only return err } ================================================ FILE: modules/zeta/backend/pack/errors.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import ( "errors" "fmt" ) // UnsupportedVersionErr is a type implementing 'error' which indicates a // the presence of an unsupported packfile version. type UnsupportedVersionErr struct { // Got is the unsupported version that was detected. Got uint32 } // Error implements 'error.Error()'. func (u *UnsupportedVersionErr) Error() string { return fmt.Sprintf("zeta: unsupported version: %d", u.Got) } var ( errBadPackHeader = errors.New("zeta: bad pack header") errBadIndexHeader = errors.New("zeta: bad index header") ) ================================================ FILE: modules/zeta/backend/pack/index.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import ( "bytes" "encoding/binary" "errors" "io" "github.com/antgroup/hugescm/modules/plumbing" ) // https://git-scm.com/docs/gitformat-pack const ( IndexVersionCurrent = 'Z' // indexMagicWidth is the width of the magic header of packfiles version // 1 and newer. indexMagicWidth = 4 // indexVersionWidth is the width of the version following the magic // header. indexVersionWidth = 4 // indexV2Width is the total width of the header in V2. indexWidth = indexMagicWidth + indexVersionWidth // indexFanoutEntries is the number of entries in the fanout table. indexFanoutEntries = 256 // indexFanoutEntryWidth is the width of each entry in the fanout table. indexFanoutEntryWidth = 4 // indexFanoutWidth is the width of the entire fanout table. indexFanoutWidth = indexFanoutEntries * indexFanoutEntryWidth // indexOffsetStart is the location of the first object outside of the // header. indexOffsetStart = indexWidth + indexFanoutWidth // indexObjectCRCWidth is the width of the CRC accompanying each object. indexObjectCRCWidth = 4 // indexObjectSmallOffsetWidth is the width of the small offset encoded // into each object. indexObjectSmallOffsetWidth = 4 // indexObjectLargeOffsetWidth is the width of the optional large offset // encoded into the small offset. indexObjectLargeOffsetWidth = 8 ) var ( indexMagic = [4]byte{0xff, 0x74, 0x4f, 0x63} ) /* * Minimum size: * - 8 bytes of header * - 256 index entries 4 bytes each * - 32-byte BLAKE3 entry * nr * - 4-byte crc entry * nr * - 4-byte offset entry * nr * - 32-byte BLAKE3 of the packfile * - 32-byte BLAKE3 file checksum * And after the 4-byte offset table might be a * variable sized table containing 8-byte entries * for offsets larger than 2^31. */ // IndexEntry specifies data encoded into an entry in the pack index. type IndexEntry struct { Pos int64 // PackOffset is the number of bytes before the associated object in a // packfile. PackOffset uint64 } type IndexVersion interface { // Name returns the name of the object located at the given offset "at", // in the Index file "idx". // // It returns an error if the object at that location could not be // parsed. Name(idx *Index, at int64) (plumbing.Hash, error) // Entry parses and returns the full *IndexEntry located at the offset // "at" in the Index file "idx". // // If there was an error parsing the IndexEntry at that location, it // will be returned. Entry(idx *Index, at int64) (*IndexEntry, error) // PackedObjects PackedObjects(idx *Index, recv RecvFunc) error // Width returns the number of bytes occupied by the header of a // particular index version. Width() int64 } // Index stores information about the location of objects in a corresponding // packfile. type Index struct { // version is the encoding version used by this index. // // Currently, versions 1 and 2 are supported. version IndexVersion // fanout is the L1 fanout table stored in this index. For a given index // "i" into the array, the value stored at that index specifies the // number of objects in the packfile/index that are lexicographically // less than or equal to that index. // // See: https://github.com/git/git/blob/v2.13.0/Documentation/technical/pack-format.txt#L41-L45 fanout []uint32 // r is the underlying set of encoded data comprising this index file. r io.ReaderAt } // Count returns the number of objects in the packfile. func (i *Index) Count() int { return int(i.fanout[255]) } // Close closes the packfile index if the underlying data stream is closeable. // If so, it returns any error involved in closing. func (i *Index) Close() error { if c, ok := i.r.(io.Closer); ok { return c.Close() } return nil } var ( // errNotFound is an error returned by Index.Entry() (see: below) when // an object cannot be found in the index. errNotFound = errors.New("zeta: object not found in index") // ErrShortFanout is an error representing situations where the entire // fanout table could not be read, and is thus too short. ErrShortFanout = errors.New("zeta: too short fanout table") ) // IsNotFound returns whether a given error represents a missing object in the // index. func IsNotFound(err error) bool { return errors.Is(err, errNotFound) } // Entry returns an entry containing the offset of a given BLAKE3 "name". // // Entry operates in O(log(n))-time in the worst case, where "n" is the number // of objects that begin with the first byte of "name". // // If the entry cannot be found, (nil, ErrNotFound) will be returned. If there // was an error searching for or parsing an entry, it will be returned as (nil, // err). // // Otherwise, (entry, nil) will be returned. func (i *Index) Entry(name plumbing.Hash) (*IndexEntry, error) { var last *bounds bounds := i.bounds(name) for bounds.Left() < bounds.Right() { if last.Equal(bounds) { // If the bounds are unchanged, that means either that // the object does not exist in the packfile, or the // fanout table is corrupt. // // Either way, we won't be able to find the object. // Return immediately to prevent infinite looping. return nil, errNotFound } last = bounds // Find the midpoint between the upper and lower bounds. mid := bounds.Left() + ((bounds.Right() - bounds.Left()) / 2) got, err := i.version.Name(i, mid) if err != nil { return nil, err } if cmp := bytes.Compare(name[:], got[:]); cmp == 0 { // If "cmp" is zero, that means the object at that index // "at" had a SHA equal to the one given by name, and we // are done. return i.version.Entry(i, mid) } else if cmp < 0 { // If the comparison is less than 0, we searched past // the desired object, so limit the upper bound of the // search to the midpoint. bounds = bounds.WithRight(mid) } else if cmp > 0 { // Likewise, if the comparison is greater than 0, we // searched below the desired object. Modify the bounds // accordingly. bounds = bounds.WithLeft(mid) } } return nil, errNotFound } func prefixCompare(want, got plumbing.Hash) int { sl := want.Shorten() return bytes.Compare(want[:sl], got[:sl]) } func (i *Index) Search(name plumbing.Hash) (oid plumbing.Hash, err error) { var last *bounds bounds := i.bounds(name) for bounds.Left() < bounds.Right() { if last.Equal(bounds) { // If the bounds are unchanged, that means either that // the object does not exist in the packfile, or the // fanout table is corrupt. // // Either way, we won't be able to find the object. // Return immediately to prevent infinite looping. return oid, errNotFound } last = bounds // Find the midpoint between the upper and lower bounds. mid := bounds.Left() + ((bounds.Right() - bounds.Left()) / 2) got, err := i.version.Name(i, mid) if err != nil { return oid, err } if cmp := prefixCompare(name, got); cmp == 0 { // If "cmp" is zero, that means the object at that index // "at" had a SHA equal to the one given by name, and we // are done. return got, nil } else if cmp < 0 { // If the comparison is less than 0, we searched past // the desired object, so limit the upper bound of the // search to the midpoint. bounds = bounds.WithRight(mid) } else if cmp > 0 { // Likewise, if the comparison is greater than 0, we // searched below the desired object. Modify the bounds // accordingly. bounds = bounds.WithLeft(mid) } } return oid, errNotFound } // readAt is a convenience method that allow reading into the underlying data // source from other callers within this package. func (i *Index) readAt(p []byte, at int64) (n int, err error) { return i.r.ReadAt(p, at) } // bounds returns the initial bounds for a given name using the fanout table to // limit search results. func (i *Index) bounds(name plumbing.Hash) *bounds { var left, right int64 if name[0] == 0 { // If the lower bound is 0, there are no objects before it, // start at the beginning of the index file. left = 0 } else { // Otherwise, make the lower bound the slot before the given // object. left = int64(i.fanout[name[0]-1]) } if name[0] == 255 { // As above, if the upper bound is the max byte value, make the // upper bound the last object in the list. right = int64(i.Count()) } else { // Otherwise, make the upper bound the first object which is not // within the given slot. right = int64(i.fanout[name[0]+1]) } return newBounds(left, right) } func (i *Index) PackedObjects(recv RecvFunc) error { return i.version.PackedObjects(i, recv) } // DecodeIndex decodes an index whose underlying data is supplied by "r". // // DecodeIndex reads only the header and fanout table, and does not eagerly // parse index entries. // // If there was an error parsing, it will be returned immediately. func DecodeIndex(r io.ReaderAt) (*Index, error) { version, err := decodeIndexHeader(r) if err != nil { return nil, err } fanout, err := decodeIndexFanout(r, version.Width()) if err != nil { return nil, err } return &Index{ version: version, fanout: fanout, r: r, }, nil } // decodeIndexHeader determines which version the index given by "r" is. func decodeIndexHeader(r io.ReaderAt) (IndexVersion, error) { hdr := make([]byte, 4) if _, err := r.ReadAt(hdr, 0); err != nil { return nil, err } if !bytes.Equal(hdr, indexMagic[:]) { return nil, errBadIndexHeader } versionByte := make([]byte, 4) if _, err := r.ReadAt(versionByte, 4); err != nil { return nil, err } version := binary.BigEndian.Uint32(versionByte) switch version { case IndexVersionCurrent: return &IndexZ{}, nil } return nil, &UnsupportedVersionErr{version} } // decodeIndexFanout decodes the fanout table given by "r" and beginning at the // given offset. func decodeIndexFanout(r io.ReaderAt, offset int64) ([]uint32, error) { b := make([]byte, 256*4) if _, err := r.ReadAt(b, offset); err != nil { if errors.Is(err, io.EOF) { return nil, ErrShortFanout } return nil, err } fanout := make([]uint32, 256) for i := range fanout { fanout[i] = binary.BigEndian.Uint32(b[(i * 4):]) } return fanout, nil } ================================================ FILE: modules/zeta/backend/pack/index_version.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import ( "bufio" "encoding/binary" "errors" "io" "os" "strings" "github.com/antgroup/hugescm/modules/plumbing" ) const ( HashDigestSize = plumbing.HASH_DIGEST_SIZE ) // IndexZ implements IndexVersion for packfiles. type IndexZ struct { } // Name implements IndexVersion.Name by returning the 32 byte BLAKE3 object name // for the given entry at offset "at" in the v2 index file "idx". func (v *IndexZ) Name(idx *Index, at int64) (oid plumbing.Hash, err error) { if _, err = idx.readAt(oid[:], hashOffset(at)); err != nil { return } return } // Entry implements IndexVersion.Entry for v2 packfiles by parsing and returning // the IndexEntry specified at the offset "at" in the given index file. func (v *IndexZ) Entry(idx *Index, at int64) (*IndexEntry, error) { var offs [4]byte if _, err := idx.readAt(offs[:], smallOffsetOffset(at, int64(idx.Count()))); err != nil { return nil, err } loc := uint64(binary.BigEndian.Uint32(offs[:])) if loc&0x80000000 > 0 { // If the most significant bit (MSB) of the offset is set, then // the offset encodes the indexed location for an 8-byte offset. // // Mask away (offs&0x7fffffff) the MSB to use as an index to // find the offset of the 8-byte pack offset. lo := largeOffsetOffset(int64(loc&0x7fffffff), int64(idx.Count())) var offs [8]byte if _, err := idx.readAt(offs[:], lo); err != nil { return nil, err } loc = binary.BigEndian.Uint64(offs[:]) } return &IndexEntry{PackOffset: loc, Pos: at}, nil } // Width implements IndexVersion.Width() by returning the number of bytes that // v2 packfile index header occupy. func (v *IndexZ) Width() int64 { return indexWidth } type RecvFunc func(oid plumbing.Hash, modification int64) error func openMtimesFD(idx *Index) (*os.File, error) { fd, ok := idx.r.(*os.File) if !ok { return nil, errors.New("bad index") } return os.Open(strings.TrimSuffix(fd.Name(), ".idx") + ".mtimes") } func (v *IndexZ) PackedObjects(idx *Index, recv RecvFunc) error { total := idx.Count() br := bufio.NewReader(NewSizeReader(idx.r, indexOffsetStart, int64(total*HashDigestSize))) mfd, err := openMtimesFD(idx) if err != nil { for range total { var oid plumbing.Hash if _, err := io.ReadFull(br, oid[:]); err != nil { return err } if err := recv(oid, 0); err != nil { return err } } return nil } defer mfd.Close() // nolint if _, err := mfd.Seek(8, io.SeekStart); err != nil { return err } mbr := bufio.NewReader(mfd) var mtimeBytes [8]byte for range total { var oid plumbing.Hash if _, err := io.ReadFull(br, oid[:]); err != nil { return err } if _, err := io.ReadFull(mbr, mtimeBytes[:]); err != nil { return err } if err := recv(oid, int64(binary.BigEndian.Uint64(mtimeBytes[:]))); err != nil { return err } } return nil } // hashOffset returns the offset of a SHA1 given at "at" in the V2 index file. func hashOffset(at int64) int64 { // Skip the packfile index header and the L1 fanout table. return indexOffsetStart + // Skip until the desired name in the sorted names table. (HashDigestSize * at) } // smallOffsetOffset returns the offset of an object's small (4-byte) offset // given by "at". func smallOffsetOffset(at, total int64) int64 { // Skip the packfile index header and the L1 fanout table. return indexOffsetStart + // Skip the name table. (HashDigestSize * total) + // Skip the CRC table. (indexObjectCRCWidth * total) + // Skip until the desired index in the small offsets table. (indexObjectSmallOffsetWidth * at) } // largeOffsetOffset returns the offset of an object's large (4-byte) offset, // given by the index "at". func largeOffsetOffset(at, total int64) int64 { // Skip the packfile index header and the L1 fanout table. return indexOffsetStart + // Skip the name table. (HashDigestSize * total) + // Skip the CRC table. (indexObjectCRCWidth * total) + // Skip the small offsets table. (indexObjectSmallOffsetWidth * total) + // Seek to the large offset within the large offset(s) table. (indexObjectLargeOffsetWidth * at) } ================================================ FILE: modules/zeta/backend/pack/pack_test.go ================================================ package pack import ( "encoding/hex" "fmt" "io" "os" "testing" "github.com/antgroup/hugescm/modules/binary" "github.com/antgroup/hugescm/modules/plumbing" ) func TestPackDecode(t *testing.T) { fd, err := os.Open("/tmp/git-pack.idx") if err != nil { fmt.Fprintf(os.Stderr, "open index error: %v\n", err) return } defer fd.Close() // nolint _, _ = fd.Seek(4+4, io.SeekStart) for i := range 256 { n, err := binary.ReadUint32(fd) if err != nil { fmt.Fprintf(os.Stderr, "open index error: %v\n", err) return } fmt.Fprintf(os.Stderr, "Fanout: %d - %d\n", i, n) } _, _ = fd.Seek(4+4+4*256, io.SeekStart) for range 260 { var oid [20]byte if _, err := io.ReadFull(fd, oid[:]); err != nil { fmt.Fprintf(os.Stderr, "read oid index error: %v\n", err) return } fmt.Fprintf(os.Stderr, "%s\n", hex.EncodeToString(oid[:])) } } func TestLastIndexByte(t *testing.T) { ss := []string{ "00", "12", "123456", "abcd000000123455", "abcdefdd", } for _, s := range ss { o := plumbing.NewHash(s) fmt.Fprintf(os.Stderr, "prefix: %s\n", o.Prefix()) } } ================================================ FILE: modules/zeta/backend/pack/packfile.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import ( "bytes" "encoding/binary" "fmt" "io" "github.com/antgroup/hugescm/modules/plumbing" ) // Packfile encapsulates the behavior of accessing an unpacked representation of // all of the objects encoded in a single packfile. type Packfile struct { // Version is the version of the packfile. Version uint32 // Objects is the total number of objects in the packfile. Objects uint32 // idx is the corresponding "pack-*.idx" file giving the positions of // objects in this packfile. idx *Index // r is an io.ReaderAt that allows read access to the packfile itself. r io.ReaderAt } // Close closes the packfile if the underlying data stream is closeable. If so, // it returns any error involved in closing. func (p *Packfile) Close() error { var iErr error if p.idx != nil { iErr = p.idx.Close() } if closer, ok := p.r.(io.Closer); ok { return closer.Close() } return iErr } func (p *Packfile) Exists(name plumbing.Hash) error { if _, err := p.idx.Entry(name); err != nil { if !IsNotFound(err) { // If the error was not an errNotFound, re-wrap it with // additional context. err = fmt.Errorf("zeta: could not load index: %w", err) } return err } return nil } func (p *Packfile) Search(name plumbing.Hash) (oid plumbing.Hash, err error) { return p.idx.Search(name) } func (p *Packfile) Object(name plumbing.Hash) (*SizeReader, error) { // First, try and determine the offset of the last entry in the // delta-base chain by loading it from the corresponding pack index. entry, err := p.idx.Entry(name) if err != nil { if !IsNotFound(err) { // If the error was not an errNotFound, re-wrap it with // additional context. err = fmt.Errorf("zeta: could not load index: %w", err) } return nil, err } return p.find(int64(entry.PackOffset)) } func (p *Packfile) find(offset int64) (*SizeReader, error) { var sizeBytes [4]byte if _, err := p.r.ReadAt(sizeBytes[:], offset); err != nil { return nil, err } size := binary.BigEndian.Uint32(sizeBytes[:]) return NewSizeReader(p.r, offset+4, int64(size)), nil } // DecodePackfile opens the packfile given by the io.ReaderAt "r" for reading. // It does not apply any delta-base chains, nor does it do reading otherwise // beyond the header. // // If the header is malformed, or otherwise cannot be read, an error will be // returned without a corresponding packfile. func DecodePackfile(r io.ReaderAt) (*Packfile, error) { header := make([]byte, 12) if _, err := r.ReadAt(header, 0); err != nil { return nil, err } if !bytes.Equal(header[0:4], packMagic[:]) { return nil, errBadPackHeader } version := binary.BigEndian.Uint32(header[4:]) objects := binary.BigEndian.Uint32(header[8:]) return &Packfile{ Version: version, Objects: objects, r: r, }, nil } ================================================ FILE: modules/zeta/backend/pack/reader.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package pack import "io" // SizeReader transforms an io.ReaderAt into an io.Reader by beginning and // advancing all reads at the given offset. type SizeReader struct { // raw is the data source for this instance of *OffsetReaderAt. raw io.ReaderAt // offset if the number of bytes read from the underlying data source, "r". // It is incremented upon reads. offset int64 n int64 // max bytes remaining size int64 } func NewSizeReader(r io.ReaderAt, offset int64, size int64) *SizeReader { return &SizeReader{raw: r, offset: offset, n: size, size: size} } func (r *SizeReader) Size() int64 { return r.size } // close func (r *SizeReader) Close() error { return nil } // Read implements io.Reader.Read by reading into the given []byte, "p" from the // last known offset provided to the OffsetReaderAt. // // It returns any error encountered from the underlying data stream, and // advances the reader forward by "n", the number of bytes read from the // underlying data stream. func (r *SizeReader) Read(p []byte) (n int, err error) { if r.n <= 0 { return 0, io.EOF } if int64(len(p)) > r.n { p = p[0:r.n] } n, err = r.raw.ReadAt(p, r.offset) r.offset += int64(n) r.n -= int64(n) return } ================================================ FILE: modules/zeta/backend/pack/set.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import ( "os" "path/filepath" "regexp" "sort" "strings" "github.com/antgroup/hugescm/modules/plumbing" ) type Set interface { Object(name plumbing.Hash) (*SizeReader, error) Exists(name plumbing.Hash) error Search(prefix plumbing.Hash) (plumbing.Hash, error) Close() error } type set struct { // m maps the leading byte of a BLAKE3 object name to a set of packfiles // that might contain that object, in order of which packfile is most // likely to contain that object. m map[byte][]*Packfile // closeFn is a function that is run by Close(), designated to free // resources held by the *Set, like open packfiles. closeFn func() error } var ( _ Set = &set{} ) // Close closes all open packfiles, returning an error if one was encountered. func (s *set) Close() error { if s.closeFn == nil { return nil } return s.closeFn() } // iterFn is a function that takes a given packfile and opens an object from it. type iterFn func(p *Packfile) (r *SizeReader, err error) func (s *set) Object(name plumbing.Hash) (*SizeReader, error) { return s.each(name, func(p *Packfile) (*SizeReader, error) { return p.Object(name) }) } func (s *set) each(name plumbing.Hash, fn iterFn) (*SizeReader, error) { k := name[0] for _, pack := range s.m[k] { o, err := fn(pack) if err != nil { if IsNotFound(err) { continue } return nil, err } return o, nil } return nil, plumbing.NoSuchObject(name) } func (s *set) Exists(name plumbing.Hash) error { return s.eachExists(name, func(p *Packfile) error { return p.Exists(name) }) } func (s *set) eachExists(name plumbing.Hash, fn func(*Packfile) error) error { k := name[0] for _, pack := range s.m[k] { err := fn(pack) if err != nil { if IsNotFound(err) { continue } return err } return nil } return plumbing.NoSuchObject(name) } type searchFn func(p *Packfile) (oid plumbing.Hash, err error) func (s *set) Search(prefix plumbing.Hash) (oid plumbing.Hash, err error) { return s.eachSearch(prefix, func(p *Packfile) (oid plumbing.Hash, err error) { return p.Search(prefix) }) } func (s *set) eachSearch(name plumbing.Hash, fn searchFn) (oid plumbing.Hash, err error) { k := name[0] for _, pack := range s.m[k] { o, err := fn(pack) if err != nil { if IsNotFound(err) { continue } return oid, err } return o, nil } return oid, plumbing.NoSuchObject(name) } // packsConcat creates a new *Set from the given packfiles. func packsConcat(packs ...*Packfile) Set { m := make(map[byte][]*Packfile) for i := range 256 { n := byte(i) for j := range packs { pack := packs[j] var count uint32 if n == 0 { count = pack.idx.fanout[n] } else { count = pack.idx.fanout[n] - pack.idx.fanout[n-1] } if count > 0 { m[n] = append(m[n], pack) } } sort.Slice(m[n], func(i, j int) bool { ni := m[n][i].idx.fanout[n] nj := m[n][j].idx.fanout[n] return ni > nj }) } return &set{ m: m, closeFn: func() error { for _, pack := range packs { if err := pack.Close(); err != nil { return err } } return nil }, } } var ( // nameRe is a regular expression that matches the basename of a // filepath that is a packfile. // // It includes one matchgroup, which is the SHA-1 name of the pack. nameRe = regexp.MustCompile(`^(.*)\.pack$`) ) // globEscapes uses these escapes because filepath.Glob does not understand // backslash escapes on Windows. var globEscapes = map[string]string{ "*": "[*]", "?": "[?]", "[": "[[]", } func escapeGlobPattern(s string) string { for char, escape := range globEscapes { s = strings.ReplaceAll(s, char, escape) } return s } func newPacks(db string) ([]*Packfile, error) { pd := filepath.Join(db, "pack") paths, err := filepath.Glob(filepath.Join(escapeGlobPattern(pd), "*.pack")) if err != nil { return nil, err } packs := make([]*Packfile, 0, len(paths)) for _, path := range paths { subMatch := nameRe.FindStringSubmatch(filepath.Base(path)) if len(subMatch) != 2 { continue } name := subMatch[1] ifd, err := os.Open(filepath.Join(pd, name+".idx")) if err != nil { // We have a pack (since it matched the regex), but the // index is missing or unusable. Skip this pack and // continue on with the next one, as Git does. if ifd != nil { // In the unlikely event that we did open a // file, close it, but discard any error in // doing so. _ = ifd.Close() } continue } pfd, err := os.Open(filepath.Join(pd, name+".pack")) if err != nil { _ = ifd.Close() return nil, err } pack, err := DecodePackfile(pfd) if err != nil { _ = ifd.Close() return nil, err } idx, err := DecodeIndex(ifd) if err != nil { _ = pack.Close() return nil, err } pack.idx = idx packs = append(packs, pack) } return packs, nil } // NewSets func NewSets(db string) (Set, error) { packs, err := newPacks(db) if err != nil { return nil, err } return packsConcat(packs...), nil } type Packs []*Packfile func (ps Packs) PackedObjects(recv RecvFunc) error { for _, p := range ps { if err := p.idx.PackedObjects(recv); err != nil { return err } } return nil } func NewPacks(db string) (Set, Packs, error) { packs, err := newPacks(db) if err != nil { return nil, nil, err } return packsConcat(packs...), packs, nil } ================================================ FILE: modules/zeta/backend/pack/storage.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package pack import ( "io" "os" "github.com/antgroup/hugescm/modules/plumbing" ) // Storage implements the storage.Storage interface. type Storage struct { packs Set } // NewStorage returns a new storage object based on a pack set. func NewStorage(root string) (*Storage, error) { packs, err := NewSets(root) if err != nil { return nil, err } return &Storage{packs: packs}, nil } // Open implements the storage.Storage.Open interface. func (f *Storage) Open(oid plumbing.Hash) (r io.ReadCloser, err error) { return f.packs.Object(oid) } // check object exists func (f *Storage) Exists(name plumbing.Hash) error { return f.packs.Exists(name) } func (f *Storage) Search(prefix plumbing.Hash) (oid plumbing.Hash, err error) { return f.packs.Search(prefix) } // Open implements the storage.Storage.Open interface. func (f *Storage) Close() error { return f.packs.Close() } type Scanner struct { set Set packs Packs } func NewScanner(root string) (*Scanner, error) { set, packs, err := NewPacks(root) if err != nil { return nil, err } return &Scanner{set: set, packs: packs}, nil } // Open implements the storage.Storage.Open interface. func (s *Scanner) Open(oid plumbing.Hash) (r io.ReadCloser, err error) { return s.set.Object(oid) } func (s *Scanner) PackedObjects(recv RecvFunc) error { return s.packs.PackedObjects(recv) } // check object exists func (s *Scanner) Exists(name plumbing.Hash) error { for _, p := range s.packs { if err := p.Exists(name); err != nil { if plumbing.IsNoSuchObject(err) { continue } return err } return nil } return plumbing.NoSuchObject(name) } func (s *Scanner) Search(prefix plumbing.Hash) (plumbing.Hash, error) { for _, p := range s.packs { oid, err := p.Search(prefix) if err != nil { if plumbing.IsNoSuchObject(err) { continue } return plumbing.ZeroHash, err } return oid, nil } return plumbing.ZeroHash, plumbing.NoSuchObject(prefix) } func (s *Scanner) Names() []string { names := make([]string, 0, len(s.packs)) for _, p := range s.packs { if fd, ok := p.r.(*os.File); ok { names = append(names, fd.Name()) } } return names } // Open implements the storage.Storage.Open interface. func (s *Scanner) Close() error { return s.set.Close() } ================================================ FILE: modules/zeta/backend/pack-objects.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "context" "errors" "fmt" "os" "path/filepath" "strings" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/backend/pack" "github.com/antgroup/hugescm/modules/zeta/backend/storage" ) type Indicators interface { Add(n int) Wait() Run(ctx context.Context) } type NewIndicators func(description, completed string, total uint64, quiet bool) Indicators type nonIndicators struct{} func (p nonIndicators) Add(n int) {} func (p nonIndicators) Wait() {} func (p nonIndicators) Run(ctx context.Context) {} var ( _ Indicators = &nonIndicators{} ) func preservePack(root, quarantine string) error { packDir := filepath.Join(root, "pack") if err := mkdir(packDir); err != nil { return err } dirs, err := os.ReadDir(quarantine) if err != nil { return err } for _, d := range dirs { if d.IsDir() { continue } if err := finalizeObject(filepath.Join(quarantine, d.Name()), filepath.Join(packDir, d.Name())); err != nil { return err } } return nil } type packedObject struct { size int64 modification int64 packed bool } type packedObjects map[plumbing.Hash]*packedObject func openObject(ro storage.Storage, oid plumbing.Hash, o *packedObject) (SizeReader, int64, error) { rc, err := ro.Open(oid) if err != nil { return nil, 0, err } switch v := rc.(type) { case *os.File: si, err := v.Stat() if err != nil { _ = v.Close() return nil, 0, err } return &sizeReader{Reader: v, closer: v, size: si.Size()}, o.modification, nil case *pack.SizeReader: return &sizeReader{Reader: v, closer: v, size: v.Size()}, o.modification, nil default: } _ = rc.Close() return nil, 0, errors.New("unable detect reader size") } func repackMetaObjects(ctx context.Context, ro storage.Storage, objects packedObjects, quarantine string, bar Indicators) error { select { case <-ctx.Done(): return ctx.Err() default: } w, err := pack.NewWriter(quarantine, uint32(len(objects))) if err != nil { return err } defer w.Close() // nolint for oid, po := range objects { bar.Add(1) sr, modification, err := openObject(ro, oid, po) if err != nil { return err } err = w.Write(oid, uint32(sr.Size()), sr, modification) _ = sr.Close() if err != nil { return err } } return w.WriteTrailer() } func repackObjects(ctx context.Context, opts *PackOptions, ro storage.Storage, fo *fileStorer, objects packedObjects, quarantine string, bar Indicators) error { select { case <-ctx.Done(): return ctx.Err() default: } unpack := func(oid plumbing.Hash, po *packedObject, sr SizeReader) error { defer sr.Close() // nolint if !po.packed { return nil } return fo.Unpack(oid, sr) } w, err := pack.NewWriter(quarantine, 0) if err != nil { return err } defer w.Close() // nolint for oid, po := range objects { bar.Add(1) sr, modification, err := openObject(ro, oid, po) if err != nil { return err } if sr.Size() > opts.PackThreshold { if err := unpack(oid, po, sr); err != nil { return err } objects[oid] = nil continue } err = w.Write(oid, uint32(sr.Size()), sr, modification) _ = sr.Close() if err != nil { return err } } return w.WriteTrailer() } func repackObjectsEx(ctx context.Context, opts *PackOptions, ro storage.Storage, fo *fileStorer, objects packedObjects, quarantine string, meta bool) (err error) { bar := opts.NewIndicators("Writing objects", "", uint64(len(objects)), opts.Quiet) newCtx, cancelCtx := context.WithCancelCause(ctx) bar.Run(newCtx) if meta { err = repackMetaObjects(ctx, ro, objects, quarantine, bar) } else { err = repackObjects(ctx, opts, ro, fo, objects, quarantine, bar) } if err != nil { cancelCtx(err) bar.Wait() return err } cancelCtx(nil) bar.Wait() return nil } func pruneObjects0(ctx context.Context, fo *fileStorer, objects packedObjects, bar Indicators) int { var count int for oid, po := range objects { bar.Add(1) if po == nil { continue } if err := fo.PruneObject(ctx, oid); errors.Is(err, context.Canceled) { break } count++ } return count } func pruneObjects(ctx context.Context, opts *PackOptions, fo *fileStorer, objects packedObjects) int { bar := opts.NewIndicators("Prune objects", "", uint64(len(objects)), opts.Quiet) newCtx, cancelCtx := context.WithCancelCause(ctx) bar.Run(newCtx) count := pruneObjects0(ctx, fo, objects, bar) cancelCtx(nil) bar.Wait() return count } const ( MaxLooseObjects = 2048 MaxPacks = 4 MinPackSize = 200 << 20 // 200M ) func hasTidyPacks(root string) bool { packDir := filepath.Join(root, "pack") entries, err := os.ReadDir(packDir) if err != nil { return false } var count int var hasTidyPack bool for _, e := range entries { if e.IsDir() { continue } name := e.Name() if !strings.HasSuffix(name, ".pack") { continue } count++ si, err := e.Info() if err != nil { return false } if si.Size() < MinPackSize { hasTidyPack = true } } return hasTidyPack && count > 1 } func packObjectsInternal(ctx context.Context, opts *PackOptions, root string, meta bool) error { fo := newFileStorer(root, "", opts.CompressionALGO) packs, err := pack.NewScanner(root) if err != nil { return fmt.Errorf("new scanner error: %w", err) } ro := storage.MultiStorage(fo, packs) closed := false defer func() { if !closed { _ = ro.Close() } }() objects := make(packedObjects) looseObjects, err := fo.looseObjects(opts.PackThreshold) if err != nil { return err } step := "blob" if meta { step = "metadata" } if len(looseObjects) == 0 && !hasTidyPacks(root) { // no small loose objects, skipped. opts.Printf("Pack %s objects: no smaller loose object, skipping packing.\n", step) return nil } for _, o := range looseObjects { objects[o.Hash] = &packedObject{size: o.Size, modification: o.Modification} } var packedEntries int err = packs.PackedObjects(func(oid plumbing.Hash, modification int64) error { objects[oid] = &packedObject{modification: modification, packed: true} packedEntries++ return nil }) if err != nil { return err } quarantineDir, err := os.MkdirTemp(root, "quarantine-") if err != nil { return err } defer func() { _ = os.RemoveAll(quarantineDir) }() opts.Printf("Pack %s objects: loose object %d packed objects %d\n", step, len(looseObjects), packedEntries) if err := repackObjectsEx(ctx, opts, ro, fo, objects, quarantineDir, meta); err != nil { return fmt.Errorf("repack objects [metadata: %v] %w", meta, err) } if err := preservePack(root, quarantineDir); err != nil { return err } names := packs.Names() _ = ro.Close() closed = true for _, p := range names { _ = os.Remove(p) // PACK _ = os.Remove(strings.TrimSuffix(p, ".pack") + ".idx") // PACK INDEX _ = os.Remove(strings.TrimSuffix(p, ".pack") + ".mtimes") // PACK INDEX } count := pruneObjects(ctx, opts, fo, objects) var prunedDirs int if prunedDirs, err = fo.Prune(ctx); err != nil { return err } opts.Printf("Removed duplicate packages: %d, duplicate objects: %d empty dirs: %d\n", len(names), count, prunedDirs) return nil } type PackOptions struct { ZetaDir string SharingRoot string Quiet bool CompressionALGO string PackThreshold int64 Logger func(format string, a ...any) NewIndicators NewIndicators } const ( DefaultPackThreshold = 50 * 1024 * 1024 // 50M ) func (opts *PackOptions) checkInit() { if opts.PackThreshold == 0 { opts.PackThreshold = DefaultPackThreshold } if opts.CompressionALGO == "" { opts.CompressionALGO = "zstd" } if opts.NewIndicators == nil { opts.NewIndicators = func(description, completed string, total uint64, quiet bool) Indicators { return &nonIndicators{} } } } func (opts *PackOptions) Printf(format string, a ...any) { if opts.Logger != nil { opts.Logger(format, a...) } } func PackObjects(ctx context.Context, opts *PackOptions) error { opts.checkInit() metaRoot := filepath.Join(opts.ZetaDir, "metadata") if err := packObjectsInternal(ctx, opts, metaRoot, true); err != nil { return err } root := filepath.Join(opts.ZetaDir, "blob") if len(opts.SharingRoot) != 0 { root = filepath.Join(opts.SharingRoot, "blob") } return packObjectsInternal(ctx, opts, root, false) } ================================================ FILE: modules/zeta/backend/pack-objects_test.go ================================================ package backend import ( "fmt" "os" "testing" ) func TestPackObjects(t *testing.T) { opts := &PackOptions{ ZetaDir: "/tmp/xh3/.zeta", } if err := PackObjects(t.Context(), opts); err != nil { fmt.Fprintf(os.Stderr, "pack objects error: %v\n", err) } } ================================================ FILE: modules/zeta/backend/prune.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "context" "github.com/antgroup/hugescm/modules/plumbing" ) func (d *Database) PruneObject(ctx context.Context, oid plumbing.Hash, metadata bool) error { if metadata { return d.metaRW.PruneObject(ctx, oid) } return d.rw.PruneObject(ctx, oid) } func (d *Database) PruneObjects(ctx context.Context, largeSize int64) ([]plumbing.Hash, int64, error) { return d.rw.PruneObjects(ctx, largeSize) } ================================================ FILE: modules/zeta/backend/storage/storage.go ================================================ // Copyright (c) 2017- GitHub, Inc. and Git LFS contributors // SPDX-License-Identifier: MIT package storage import ( "context" "errors" "io" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/object" ) type Storage interface { // Open returns a handle on an existing object keyed by the given object // ID. It returns an error if that file does not already exist. Open(oid plumbing.Hash) (f io.ReadCloser, err error) // Exists(name plumbing.Hash) error // Search(prefix plumbing.Hash) (plumbing.Hash, error) // Close closes the filesystem, after which no more operations are // allowed. Close() error } type WritableStorage interface { Storage HashTo(ctx context.Context, r io.Reader, size int64) (oid plumbing.Hash, err error) Unpack(oid plumbing.Hash, r io.Reader) (err error) WriteEncoded(e object.Encoder) (oid plumbing.Hash, err error) LooseObjects() ([]plumbing.Hash, error) PruneObject(ctx context.Context, oid plumbing.Hash) error PruneObjects(ctx context.Context, largeSize int64) ([]plumbing.Hash, int64, error) } // Storage implements an interface for reading, but not writing, objects in an // object database. type multiStorage struct { storages []Storage } func MultiStorage(args ...Storage) Storage { return &multiStorage{storages: args} } // Open returns a handle on an existing object keyed by the given object // ID. It returns an error if that file does not already exist. func (m *multiStorage) Open(oid plumbing.Hash) (f io.ReadCloser, err error) { for _, s := range m.storages { f, err := s.Open(oid) if err != nil { if plumbing.IsNoSuchObject(err) { continue } return nil, err } return f, nil } return nil, plumbing.NoSuchObject(oid) } func (m *multiStorage) Exists(oid plumbing.Hash) error { for _, s := range m.storages { if err := s.Exists(oid); err != nil { if plumbing.IsNoSuchObject(err) { continue } return err } return nil } return plumbing.NoSuchObject(oid) } func (m *multiStorage) Search(prefix plumbing.Hash) (plumbing.Hash, error) { for _, s := range m.storages { oid, err := s.Search(prefix) if err != nil { if plumbing.IsNoSuchObject(err) { continue } return oid, err } return oid, nil } return plumbing.ZeroHash, plumbing.NoSuchObject(prefix) } // Close closes the filesystem, after which no more operations are // allowed. func (m *multiStorage) Close() error { var errs []error for _, s := range m.storages { if err := s.Close(); err != nil { errs = append(errs, err) } } return errors.Join(errs...) } ================================================ FILE: modules/zeta/backend/unpack.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package backend import ( "errors" "bytes" "encoding/binary" "fmt" "io" "os" "path/filepath" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" "github.com/antgroup/hugescm/modules/zeta/backend/pack" "github.com/antgroup/hugescm/modules/zeta/object" ) type Unpacker struct { *pack.Writer root string quarantineDir string selectedMethod CompressMethod } func (u *Unpacker) method(compressed bool) CompressMethod { if compressed { return STORE } return u.selectedMethod } func (u *Unpacker) HashTo(r io.Reader, size int64, modification int64) (oid plumbing.Hash, err error) { payload, err := streamio.ReadMax(r, mimePacketSize) if err != nil && !errors.Is(err, io.EOF) { return oid, fmt.Errorf("ReadFull error: %w", err) } compressed := isBinaryPayload(payload) var contents io.Reader = bytes.NewReader(payload) if !errors.Is(err, io.EOF) { contents = io.MultiReader(contents, r) } hasher := plumbing.NewHasher() buffer := streamio.GetBytesBuffer() defer streamio.PutBytesBuffer(buffer) // 4 byte magic if _, err = buffer.Write(BLOB_MAGIC[:]); err != nil { return } // 2 byte version if err = binary.Write(buffer, binary.BigEndian, DEFAULT_BLOB_VERSION); err != nil { return } // 2 byte method method := u.method(compressed) if err = binary.Write(buffer, binary.BigEndian, method); err != nil { return } // 8 byte uncompressed length if err = binary.Write(buffer, binary.BigEndian, size); err != nil { return } var written int64 if written, err = compress(io.TeeReader(contents, hasher), buffer, method); err != nil { return } if size != written { return oid, fmt.Errorf("blob size not match expected, actual size %d, expected size %d", written, size) } oid = hasher.Sum() encBytes := buffer.Bytes() if err = u.Write(oid, uint32(len(encBytes)), bytes.NewReader(encBytes), modification); err != nil { return } return } func (u *Unpacker) WriteEncoded(e object.Encoder, squeeze bool, modification int64) (plumbing.Hash, error) { buffer := streamio.GetBytesBuffer() defer streamio.PutBytesBuffer(buffer) hasher := plumbing.NewHasher() if squeeze { zw := streamio.GetZstdWriter(buffer) if err := e.Encode(io.MultiWriter(zw, hasher)); err != nil { streamio.PutZstdWriter(zw) return plumbing.ZeroHash, err } streamio.PutZstdWriter(zw) // MUST CLOSE ZSTD WRITER } else { if err := e.Encode(io.MultiWriter(buffer, hasher)); err != nil { return plumbing.ZeroHash, err } } oid := hasher.Sum() data := buffer.Bytes() if err := u.Write(oid, uint32(len(data)), bytes.NewReader(data), modification); err != nil { return oid, err } return oid, nil } func (u *Unpacker) Close() error { if u.Writer == nil { return nil } err := u.Writer.Close() if len(u.quarantineDir) != 0 { _ = os.RemoveAll(u.quarantineDir) } return err } func (d *Database) NewUnpackerEx(entries uint32, metadata bool, method CompressMethod) (*Unpacker, error) { var root, incoming string switch { case metadata: root = filepath.Join(d.root, "metadata") incoming = filepath.Join(d.root, "incoming") case len(d.sharingRoot) != 0: root = filepath.Join(d.sharingRoot, "blob") incoming = filepath.Join(d.sharingRoot, "incoming") default: root = filepath.Join(d.root, "blob") incoming = filepath.Join(d.root, "incoming") } quarantineDir, err := os.MkdirTemp(incoming, "quarantine-") if err != nil { return nil, err } w, err := pack.NewWriter(quarantineDir, entries) if err != nil { _ = os.RemoveAll(quarantineDir) return nil, err } return &Unpacker{Writer: w, root: root, quarantineDir: quarantineDir, selectedMethod: method}, nil } func (d *Database) NewUnpacker(entries uint32, metadata bool) (*Unpacker, error) { return d.NewUnpackerEx(entries, metadata, fromCompressionALGO(d.compressionALGO)) } func (u *Unpacker) Preserve() error { if err := u.WriteTrailer(); err != nil { return err } return preservePack(u.root, u.quarantineDir) } ================================================ FILE: modules/zeta/config/boolean_test.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "testing" ) func TestBooleanMerge(t *testing.T) { tests := []struct { name string b Boolean other Boolean expected int }{ {"UNSET + TRUE = TRUE", Boolean{val: BOOLEAN_UNSET}, Boolean{val: BOOLEAN_TRUE}, BOOLEAN_TRUE}, {"UNSET + FALSE = FALSE", Boolean{val: BOOLEAN_UNSET}, Boolean{val: BOOLEAN_FALSE}, BOOLEAN_FALSE}, {"UNSET + UNSET = UNSET", Boolean{val: BOOLEAN_UNSET}, Boolean{val: BOOLEAN_UNSET}, BOOLEAN_UNSET}, {"TRUE + FALSE = FALSE (higher priority)", Boolean{val: BOOLEAN_TRUE}, Boolean{val: BOOLEAN_FALSE}, BOOLEAN_FALSE}, {"FALSE + TRUE = TRUE (higher priority)", Boolean{val: BOOLEAN_FALSE}, Boolean{val: BOOLEAN_TRUE}, BOOLEAN_TRUE}, {"TRUE + UNSET = TRUE", Boolean{val: BOOLEAN_TRUE}, Boolean{val: BOOLEAN_UNSET}, BOOLEAN_TRUE}, {"FALSE + UNSET = FALSE", Boolean{val: BOOLEAN_FALSE}, Boolean{val: BOOLEAN_UNSET}, BOOLEAN_FALSE}, {"TRUE + TRUE = TRUE", Boolean{val: BOOLEAN_TRUE}, Boolean{val: BOOLEAN_TRUE}, BOOLEAN_TRUE}, {"FALSE + FALSE = FALSE", Boolean{val: BOOLEAN_FALSE}, Boolean{val: BOOLEAN_FALSE}, BOOLEAN_FALSE}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { b := tt.b b.Merge(&tt.other) if b.val != tt.expected { t.Errorf("Merge() = %v, want %v", b.val, tt.expected) } }) } } func TestBooleanUnmarshal(t *testing.T) { tests := []struct { name string input any expected int wantErr bool }{ // Boolean values {"bool true", true, BOOLEAN_TRUE, false}, {"bool false", false, BOOLEAN_FALSE, false}, // String values {"string true", "true", BOOLEAN_TRUE, false}, {"string false", "false", BOOLEAN_FALSE, false}, {"string yes", "yes", BOOLEAN_TRUE, false}, {"string no", "no", BOOLEAN_FALSE, false}, {"string on", "on", BOOLEAN_TRUE, false}, {"string off", "off", BOOLEAN_FALSE, false}, {"string 1", "1", BOOLEAN_TRUE, false}, {"string 0", "0", BOOLEAN_FALSE, false}, // Integer values {"int 1", int64(1), BOOLEAN_TRUE, false}, {"int 0", int64(0), BOOLEAN_FALSE, false}, // Case insensitive {"TRUE", "TRUE", BOOLEAN_TRUE, false}, {"FALSE", "FALSE", BOOLEAN_FALSE, false}, {"Yes", "Yes", BOOLEAN_TRUE, false}, {"No", "No", BOOLEAN_FALSE, false}, // Invalid values should error {"invalid string", "invalid", BOOLEAN_UNSET, true}, {"invalid float", 3.14, BOOLEAN_UNSET, true}, {"unsupported type", struct{}{}, BOOLEAN_UNSET, true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var b Boolean err := b.UnmarshalTOML(tt.input) if tt.wantErr { if err == nil { t.Errorf("UnmarshalTOML(%v) expected error, got nil", tt.input) } return } if err != nil { t.Errorf("UnmarshalTOML(%v) error = %v", tt.input, err) return } if b.val != tt.expected { t.Errorf("UnmarshalTOML(%v) = %v, want %v", tt.input, b.val, tt.expected) } }) } } func TestBooleanUnmarshalText(t *testing.T) { tests := []struct { name string input string expected int wantErr bool }{ {"true", "true", BOOLEAN_TRUE, false}, {"false", "false", BOOLEAN_FALSE, false}, {"yes", "yes", BOOLEAN_TRUE, false}, {"no", "no", BOOLEAN_FALSE, false}, {"on", "on", BOOLEAN_TRUE, false}, {"off", "off", BOOLEAN_FALSE, false}, {"1", "1", BOOLEAN_TRUE, false}, {"0", "0", BOOLEAN_FALSE, false}, {"TRUE", "TRUE", BOOLEAN_TRUE, false}, {"FALSE", "FALSE", BOOLEAN_FALSE, false}, {"invalid", "invalid", BOOLEAN_UNSET, true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var b Boolean err := b.UnmarshalText([]byte(tt.input)) if tt.wantErr { if err == nil { t.Errorf("UnmarshalText(%q) expected error, got nil", tt.input) } return } if err != nil { t.Errorf("UnmarshalText(%q) error = %v", tt.input, err) return } if b.val != tt.expected { t.Errorf("UnmarshalText(%q) = %v, want %v", tt.input, b.val, tt.expected) } }) } } ================================================ FILE: modules/zeta/config/codec_toml.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "bytes" "fmt" "io" "os" "github.com/pelletier/go-toml/v2" ) // LoadDocument loads a Document from TOML bytes. // It validates the TOML structure: // - Top-level must be a table (map) // - Each section must be a table (map) // - No array of tables // - No empty arrays (cannot infer type) func LoadDocument(data []byte) (Document, error) { var raw map[string]any decoder := toml.NewDecoder(bytes.NewReader(data)) if err := decoder.Decode(&raw); err != nil { return nil, err } return fromRawAny(raw) } // fromRawAny converts a map[string]any to Document with validation. func fromRawAny(raw map[string]any) (Document, error) { doc := make(Document) for sectionName, sectionValue := range raw { // Each top-level value must be a map (section) sectionMap, ok := sectionValue.(map[string]any) if !ok { return nil, fmt.Errorf("invalid TOML structure: top-level key %q is not a table", sectionName) } section := make(Section) for keyName, rawValue := range sectionMap { // Check for nested tables if _, isTable := rawValue.(map[string]any); isTable { return nil, fmt.Errorf("invalid TOML structure: nested table at %q.%q", sectionName, keyName) } // Check for array of tables if arr, isArray := rawValue.([]any); isArray && len(arr) > 0 { if _, isTable := arr[0].(map[string]any); isTable { return nil, fmt.Errorf("invalid TOML structure: array of tables at %q.%q not supported", sectionName, keyName) } } // Check for empty []any (cannot infer type) if arr, isArray := rawValue.([]any); isArray && len(arr) == 0 { return nil, fmt.Errorf("invalid TOML structure: empty array at %q.%q, cannot infer type", sectionName, keyName) } value, err := FromAny(rawValue) if err != nil { return nil, fmt.Errorf("section %q key %q: %w", sectionName, keyName, err) } section[keyName] = value } if len(section) > 0 { doc[sectionName] = section } } return doc, nil } // LoadDocumentFile loads a Document from a TOML file. func LoadDocumentFile(path string) (Document, error) { data, err := os.ReadFile(path) if err != nil { return nil, err } return LoadDocument(data) } // MarshalDocument marshals a Document to TOML bytes. func MarshalDocument(doc Document) ([]byte, error) { var buf bytes.Buffer encoder := newTOMLEncoder(&buf) if err := encoder.Encode(doc.Raw()); err != nil { return nil, err } return buf.Bytes(), nil } // newTOMLEncoder creates a TOML encoder with consistent configuration. func newTOMLEncoder(w io.Writer) *toml.Encoder { encoder := toml.NewEncoder(w) encoder.SetArraysMultiline(false) encoder.SetIndentTables(false) return encoder } // LoadConfig loads TOML bytes into a Config struct. func LoadConfig(data []byte, cfg *Config) error { decoder := toml.NewDecoder(bytes.NewReader(data)) return decoder.Decode(cfg) } // LoadConfigFile loads a TOML file into a Config struct. func LoadConfigFile(path string, cfg *Config) error { data, err := os.ReadFile(path) if err != nil { return err } return LoadConfig(data, cfg) } // ValidateDocumentAs validates that a Document can be decoded into the provided struct. // This is used to ensure that a Document represents a valid Config before writing. func ValidateDocumentAs(doc Document, target any) error { data, err := MarshalDocument(doc) if err != nil { return err } return toml.NewDecoder(bytes.NewReader(data)).Decode(target) } ================================================ FILE: modules/zeta/config/codec_toml_test.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "testing" ) func TestLoadDocument(t *testing.T) { tomlData := ` [core] editor = "vim" sparse = ["dir1", "dir2", "dir3"] timeout = 30 [user] name = "Alice" email = "alice@example.com" [http] sslVerify = true maxRetries = 5 ` doc, err := LoadDocument([]byte(tomlData)) if err != nil { t.Fatalf("LoadDocument() error: %v", err) } // Test string value value, exists, err := doc.Get("core.editor") if err != nil { t.Fatalf("Get(core.editor) error: %v", err) } if !exists { t.Fatalf("Get(core.editor) not found") } if value.Kind() != KindString { t.Errorf("core.editor kind = %v, want %v", value.Kind(), KindString) } if value.ToAny() != "vim" { t.Errorf("core.editor = %v, want vim", value.ToAny()) } // Test string slice value, exists, err = doc.Get("core.sparse") if err != nil { t.Fatalf("Get(core.sparse) error: %v", err) } if !exists { t.Fatalf("Get(core.sparse) not found") } if value.Kind() != KindStringSlice { t.Errorf("core.sparse kind = %v, want %v", value.Kind(), KindStringSlice) } all := value.All() if len(all) != 3 { t.Errorf("core.sparse len = %d, want 3", len(all)) } // Test int64 value value, exists, err = doc.Get("core.timeout") if err != nil { t.Fatalf("Get(core.timeout) error: %v", err) } if !exists { t.Fatalf("Get(core.timeout) not found") } if value.Kind() != KindInt64 { t.Errorf("core.timeout kind = %v, want %v", value.Kind(), KindInt64) } // Test bool value value, exists, err = doc.Get("http.sslVerify") if err != nil { t.Fatalf("Get(http.sslVerify) error: %v", err) } if !exists { t.Fatalf("Get(http.sslVerify) not found") } if value.Kind() != KindBool { t.Errorf("http.sslVerify kind = %v, want %v", value.Kind(), KindBool) } } func TestMarshalDocument(t *testing.T) { doc := NewDocument() _, _ = doc.Set("core.editor", "vim") _, _ = doc.Set("core.sparse", []string{"dir1", "dir2"}) _, _ = doc.Set("user.name", "Bob") _, _ = doc.Set("http.timeout", int64(60)) data, err := MarshalDocument(doc) if err != nil { t.Fatalf("MarshalDocument() error: %v", err) } // Parse it back doc2, err := LoadDocument(data) if err != nil { t.Fatalf("LoadDocument() error: %v", err) } // Verify round-trip value, exists, _ := doc2.Get("core.editor") if !exists || value.ToAny() != "vim" { t.Errorf("Round-trip core.editor failed") } value, exists, _ = doc2.Get("core.sparse") if !exists || value.Kind() != KindStringSlice { t.Errorf("Round-trip core.sparse failed") } value, exists, _ = doc2.Get("http.timeout") if !exists || value.Kind() != KindInt64 { t.Errorf("Round-trip http.timeout failed") } } func TestLoadConfig(t *testing.T) { tomlData := ` [core] editor = "vim" remote = "origin" snapshot = true [user] name = "Charlie" email = "charlie@example.com" [fragment] threshold = "2g" size = "1g" [http] sslVerify = false ` var cfg Config err := LoadConfig([]byte(tomlData), &cfg) if err != nil { t.Fatalf("LoadConfig() error: %v", err) } // Verify parsed config if cfg.Core.Editor != "vim" { t.Errorf("Core.Editor = %v, want vim", cfg.Core.Editor) } if cfg.User.Name != "Charlie" { t.Errorf("User.Name = %v, want Charlie", cfg.User.Name) } if cfg.User.Email != "charlie@example.com" { t.Errorf("User.Email = %v, want charlie@example.com", cfg.User.Email) } if !cfg.Core.Snapshot { t.Errorf("Core.Snapshot = false, want true") } if !cfg.HTTP.SSLVerify.False() { t.Errorf("HTTP.SSLVerify = true, want false") } } func TestValidateDocumentAs(t *testing.T) { // Valid document doc := NewDocument() _, _ = doc.Set("core.editor", "vim") _, _ = doc.Set("user.name", "Alice") var cfg Config err := ValidateDocumentAs(doc, &cfg) if err != nil { t.Errorf("ValidateDocumentAs() valid document error: %v", err) } } func TestLoadDocumentInvalidStructure(t *testing.T) { tests := []struct { name string toml string wantErr bool }{ { name: "valid simple", toml: ` [core] editor = "vim" `, wantErr: false, }, { name: "top-level scalar key invalid for document model", toml: `editor = "vim"`, wantErr: true, }, { name: "nested table", toml: ` [core] [core.nested] key = "value" `, wantErr: true, }, { name: "array of tables not supported", toml: ` [[core.items]] name = "item1" `, wantErr: true, }, { name: "empty array cannot infer type", toml: ` [core] items = [] `, wantErr: true, }, { name: "valid array", toml: ` [core] items = ["a", "b"] `, wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { _, err := LoadDocument([]byte(tt.toml)) if tt.wantErr { if err == nil { t.Errorf("LoadDocument() expected error, got nil") } } else { if err != nil { t.Errorf("LoadDocument() unexpected error: %v", err) } } }) } } ================================================ FILE: modules/zeta/config/compat_test.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "testing" ) // TestCompatConfigDecoding tests that the new implementation decodes Config // structs with the same semantics as the old implementation. func TestCompatConfigDecoding(t *testing.T) { tests := []struct { name string toml string want Config }{ { name: "basic config", toml: ` [core] editor = "vim" remote = "origin" snapshot = true [user] name = "Alice" email = "alice@example.com" `, want: Config{ Core: Core{ Editor: "vim", Remote: "origin", Snapshot: true, }, User: User{ Name: "Alice", Email: "alice@example.com", }, }, }, { name: "with size values", toml: ` [fragment] threshold = "2g" size = "1g" [transport] largeSize = "10m" maxEntries = 8 `, want: Config{ Fragment: Fragment{ ThresholdRaw: 2 * 1024 * 1024 * 1024, SizeRaw: 1 * 1024 * 1024 * 1024, }, Transport: Transport{ LargeSizeRaw: 10 * 1024 * 1024, MaxEntries: 8, }, }, }, { name: "with string array", toml: ` [core] sparse = ["dir1", "dir2", "dir3"] [http] extraHeader = ["X-Custom: value1", "X-Custom: value2"] `, want: Config{ Core: Core{ SparseDirs: []string{"dir1", "dir2", "dir3"}, }, HTTP: HTTP{ ExtraHeader: []string{"X-Custom: value1", "X-Custom: value2"}, }, }, }, { name: "with boolean", toml: ` [http] sslVerify = true [fragment] enable_cdc = false `, want: Config{ HTTP: HTTP{ SSLVerify: True, }, Fragment: Fragment{ EnableCDC: False, }, }, }, { name: "with credential", toml: ` [credential] storage = "file" encryptionKey = "secret-key" storagePath = "/path/to/creds" `, want: Config{ Credential: Credential{ Storage: "file", EncryptionKey: "secret-key", StoragePath: "/path/to/creds", }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var cfg Config err := LoadConfig([]byte(tt.toml), &cfg) if err != nil { t.Fatalf("LoadConfig() error: %v", err) } // Compare Core if cfg.Core.Editor != tt.want.Core.Editor { t.Errorf("Core.Editor = %q, want %q", cfg.Core.Editor, tt.want.Core.Editor) } if cfg.Core.Remote != tt.want.Core.Remote { t.Errorf("Core.Remote = %q, want %q", cfg.Core.Remote, tt.want.Core.Remote) } if cfg.Core.Snapshot != tt.want.Core.Snapshot { t.Errorf("Core.Snapshot = %v, want %v", cfg.Core.Snapshot, tt.want.Core.Snapshot) } if !stringSlicesEqual(cfg.Core.SparseDirs, tt.want.Core.SparseDirs) { t.Errorf("Core.SparseDirs = %v, want %v", cfg.Core.SparseDirs, tt.want.Core.SparseDirs) } // Compare User if cfg.User.Name != tt.want.User.Name { t.Errorf("User.Name = %q, want %q", cfg.User.Name, tt.want.User.Name) } if cfg.User.Email != tt.want.User.Email { t.Errorf("User.Email = %q, want %q", cfg.User.Email, tt.want.User.Email) } // Compare Fragment if cfg.Fragment.ThresholdRaw != tt.want.Fragment.ThresholdRaw { t.Errorf("Fragment.ThresholdRaw = %d, want %d", cfg.Fragment.ThresholdRaw, tt.want.Fragment.ThresholdRaw) } if cfg.Fragment.SizeRaw != tt.want.Fragment.SizeRaw { t.Errorf("Fragment.SizeRaw = %d, want %d", cfg.Fragment.SizeRaw, tt.want.Fragment.SizeRaw) } if cfg.Fragment.EnableCDC.True() != tt.want.Fragment.EnableCDC.True() { t.Errorf("Fragment.EnableCDC = %v, want %v", cfg.Fragment.EnableCDC.True(), tt.want.Fragment.EnableCDC.True()) } // Compare HTTP if !stringSlicesEqual(cfg.HTTP.ExtraHeader, tt.want.HTTP.ExtraHeader) { t.Errorf("HTTP.ExtraHeader = %v, want %v", cfg.HTTP.ExtraHeader, tt.want.HTTP.ExtraHeader) } if cfg.HTTP.SSLVerify.True() != tt.want.HTTP.SSLVerify.True() { t.Errorf("HTTP.SSLVerify = %v, want %v", cfg.HTTP.SSLVerify.True(), tt.want.HTTP.SSLVerify.True()) } // Compare Transport if cfg.Transport.LargeSizeRaw != tt.want.Transport.LargeSizeRaw { t.Errorf("Transport.LargeSizeRaw = %d, want %d", cfg.Transport.LargeSizeRaw, tt.want.Transport.LargeSizeRaw) } if cfg.Transport.MaxEntries != tt.want.Transport.MaxEntries { t.Errorf("Transport.MaxEntries = %d, want %d", cfg.Transport.MaxEntries, tt.want.Transport.MaxEntries) } // Compare Credential if cfg.Credential.Storage != tt.want.Credential.Storage { t.Errorf("Credential.Storage = %q, want %q", cfg.Credential.Storage, tt.want.Credential.Storage) } if cfg.Credential.EncryptionKey != tt.want.Credential.EncryptionKey { t.Errorf("Credential.EncryptionKey = %q, want %q", cfg.Credential.EncryptionKey, tt.want.Credential.EncryptionKey) } if cfg.Credential.StoragePath != tt.want.Credential.StoragePath { t.Errorf("Credential.StoragePath = %q, want %q", cfg.Credential.StoragePath, tt.want.Credential.StoragePath) } }) } } // TestCompatOverwrite tests that Overwrite methods maintain the same semantics. func TestCompatOverwrite(t *testing.T) { t.Run("Core.Overwrite", func(t *testing.T) { base := Core{ Editor: "vim", Remote: "origin", Snapshot: false, } override := Core{ Editor: "nano", Remote: "", // Empty string should not override Snapshot: true, } base.Overwrite(&override) if base.Editor != "nano" { t.Errorf("Editor = %q, want nano", base.Editor) } if base.Remote != "origin" { t.Errorf("Remote = %q, want origin (not overwritten)", base.Remote) } if !base.Snapshot { t.Errorf("Snapshot = false, want true") } }) t.Run("User.Overwrite", func(t *testing.T) { base := User{ Name: "Alice", Email: "alice@example.com", } override := User{ Name: "Bob", Email: "", // Empty should not override } base.Overwrite(&override) if base.Name != "Bob" { t.Errorf("Name = %q, want Bob", base.Name) } if base.Email != "alice@example.com" { t.Errorf("Email = %q, want alice@example.com (not overwritten)", base.Email) } }) t.Run("HTTP.Overwrite merges ExtraHeader", func(t *testing.T) { base := HTTP{ ExtraHeader: []string{"X-Header: value1"}, } override := HTTP{ ExtraHeader: []string{"X-Header: value2"}, } base.Overwrite(&override) if len(base.ExtraHeader) != 2 { t.Errorf("ExtraHeader len = %d, want 2", len(base.ExtraHeader)) } }) t.Run("Config.Overwrite priority", func(t *testing.T) { base := Config{ Core: Core{ Editor: "vim", }, User: User{ Name: "Alice", }, } override := Config{ Core: Core{ Editor: "nano", }, User: User{ Name: "Bob", }, } base.Overwrite(&override) if base.Core.Editor != "nano" { t.Errorf("Core.Editor = %q, want nano", base.Core.Editor) } if base.User.Name != "Bob" { t.Errorf("User.Name = %q, want Bob", base.User.Name) } }) } // TestCompatBooleanMerge tests Boolean.Merge semantics. func TestCompatBooleanMerge(t *testing.T) { tests := []struct { name string base Boolean other Boolean expected int }{ {"UNSET + TRUE = TRUE", Boolean{val: BOOLEAN_UNSET}, Boolean{val: BOOLEAN_TRUE}, BOOLEAN_TRUE}, {"UNSET + FALSE = FALSE", Boolean{val: BOOLEAN_UNSET}, Boolean{val: BOOLEAN_FALSE}, BOOLEAN_FALSE}, {"TRUE + FALSE = FALSE (higher priority)", Boolean{val: BOOLEAN_TRUE}, Boolean{val: BOOLEAN_FALSE}, BOOLEAN_FALSE}, {"FALSE + TRUE = TRUE (higher priority)", Boolean{val: BOOLEAN_FALSE}, Boolean{val: BOOLEAN_TRUE}, BOOLEAN_TRUE}, {"TRUE + UNSET = TRUE", Boolean{val: BOOLEAN_TRUE}, Boolean{val: BOOLEAN_UNSET}, BOOLEAN_TRUE}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { b := tt.base b.Merge(&tt.other) if b.val != tt.expected { t.Errorf("Merge() = %v, want %v", b.val, tt.expected) } }) } } // TestCompatKeyParsing tests that key parsing maintains the same semantics. func TestCompatKeyParsing(t *testing.T) { // Valid keys validKeys := []string{ "core.editor", "http.sslVerify", "user.name", "transport.maxEntries", } for _, key := range validKeys { t.Run("valid: "+key, func(t *testing.T) { _, err := ParseKey(key) if err != nil { t.Errorf("ParseKey(%q) error: %v", key, err) } }) } // Invalid keys invalidKeys := []string{ "core", // Missing dot ".editor", // Missing section "core.", // Missing name "a.b.c", // Nested path "", // Empty } for _, key := range invalidKeys { t.Run("invalid: "+key, func(t *testing.T) { _, err := ParseKey(key) if err == nil { t.Errorf("ParseKey(%q) expected error, got nil", key) } }) } } // Helper function func stringSlicesEqual(a, b []string) bool { if len(a) != len(b) { return false } for i := range a { if a[i] != b[i] { return false } } return true } ================================================ FILE: modules/zeta/config/config.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "errors" "fmt" "github.com/antgroup/hugescm/modules/strengthen" ) const ( FragmentThreshold int64 = 1 * strengthen.GiByte // 1G FragmentSize int64 = 1 * strengthen.GiByte // 1G ) // ErrBadConfigKey indicates an invalid configuration key was provided. type ErrBadConfigKey struct { key string } func (err *ErrBadConfigKey) Error() string { return fmt.Sprintf("bad zeta config key '%s'", err.key) } func IsErrBadConfigKey(err error) bool { var e *ErrBadConfigKey return errors.As(err, &e) } var ( ErrInvalidArgument = errors.New("invalid argument") ) type User struct { Name string `toml:"name,omitempty"` Email string `toml:"email,omitempty"` } func (u *User) Empty() bool { return u == nil || len(u.Email) == 0 || len(u.Name) == 0 } func overwrite(current, override string) string { if override != "" { return override } return current } func (u *User) Overwrite(o *User) { u.Name = overwrite(u.Name, o.Name) u.Email = overwrite(u.Email, o.Email) } type Core struct { SharingRoot string `toml:"sharingRoot,omitempty"` // GLOBAL HooksPath string `toml:"hooksPath,omitempty"` // GLOBAL Remote string `toml:"remote,omitempty"` Snapshot bool `toml:"snapshot,omitempty"` SparseDirs StringArray `toml:"sparse,omitempty"` HashALGO string `toml:"hash-algo,omitempty"` CompressionALGO string `toml:"compression-algo,omitempty"` Editor string `toml:"editor,omitempty"` OptimizeStrategy Strategy `toml:"optimizeStrategy,omitempty"` // zeta config core.optimizeStrategy eager OR ZETA_CORE_OPTIMIZE_STRATEGY="eager" Accelerator Accelerator `toml:"accelerator,omitempty"` // zeta config core.accelerator dragonfly OR ZETA_CORE_ACCELERATOR="dragonfly" ConcurrentTransfers int `toml:"concurrenttransfers,omitzero"` // zeta config core.concurrenttransfers 8 OR ZETA_CORE_CONCURRENT_TRANSFERS=8 } func (c *Core) Overwrite(o *Core) { c.SharingRoot = overwrite(c.SharingRoot, o.SharingRoot) c.HooksPath = overwrite(c.HooksPath, o.HooksPath) c.Remote = overwrite(c.Remote, o.Remote) c.Snapshot = o.Snapshot if len(o.Accelerator) != 0 { c.Accelerator = o.Accelerator } if len(o.OptimizeStrategy) != 0 { c.OptimizeStrategy = o.OptimizeStrategy } if o.ConcurrentTransfers > 0 { c.ConcurrentTransfers = o.ConcurrentTransfers } c.CompressionALGO = overwrite(c.CompressionALGO, o.CompressionALGO) c.Editor = overwrite(c.Editor, o.Editor) // merge sparse dirs if len(o.SparseDirs) != 0 { c.SparseDirs = o.SparseDirs } } // IsExtreme: Extreme cleanup strategy to delete large object snapshots in the repository. Typically used in AI scenarios, it is no longer necessary to save blobs when downloading models. func (c *Core) IsExtreme() bool { return c.OptimizeStrategy == StrategyExtreme } type Fragment struct { ThresholdRaw Size `toml:"threshold,omitempty"` SizeRaw Size `toml:"size,omitempty"` EnableCDC Boolean `toml:"enable_cdc,omitempty"` // Enable CDC (Content-Defined Chunking) for AI model files } func (f *Fragment) Overwrite(o *Fragment) { if o.ThresholdRaw > 0 { f.ThresholdRaw = o.ThresholdRaw } if o.SizeRaw > 0 { f.SizeRaw = o.SizeRaw } f.EnableCDC.Merge(&o.EnableCDC) } func (f Fragment) Threshold() int64 { if f.ThresholdRaw < strengthen.MiByte { return FragmentThreshold } return int64(f.ThresholdRaw) } func (f Fragment) Size() int64 { if f.SizeRaw < strengthen.MiByte { return FragmentSize } return int64(f.SizeRaw) } type HTTP struct { ExtraHeader StringArray `toml:"extraHeader,omitempty"` SSLVerify Boolean `toml:"sslVerify,omitempty"` } func (h *HTTP) Overwrite(o *HTTP) { if len(o.ExtraHeader) > 0 { h.ExtraHeader = append(h.ExtraHeader, o.ExtraHeader...) } h.SSLVerify.Merge(&o.SSLVerify) } type SSH struct { ExtraEnv StringArray `toml:"extraEnv,omitempty"` } func (u *SSH) Overwrite(o *SSH) { if len(o.ExtraEnv) > 0 { u.ExtraEnv = append(u.ExtraEnv, o.ExtraEnv...) } } type Transport struct { MaxEntries int `toml:"maxEntries,omitempty"` LargeSizeRaw Size `toml:"largeSize,omitempty"` ExternalProxy string `toml:"externalProxy,omitempty"` } const ( minLargeSize = 512 << 10 // 512K largeSize = 5 << 20 // 5M ) func (t Transport) LargeSize() int64 { if t.LargeSizeRaw < minLargeSize { return largeSize } return int64(t.LargeSizeRaw) } func (t *Transport) Overwrite(o *Transport) { if o.LargeSizeRaw >= minLargeSize { t.LargeSizeRaw = o.LargeSizeRaw } if o.MaxEntries > 0 { t.MaxEntries = o.MaxEntries } t.ExternalProxy = overwrite(t.ExternalProxy, o.ExternalProxy) } type Diff struct { Algorithm string `toml:"algorithm,omitempty"` } func (d *Diff) Overwrite(o *Diff) { d.Algorithm = overwrite(d.Algorithm, o.Algorithm) } type Merge struct { ConflictStyle string `toml:"conflictStyle,omitempty"` } func (m *Merge) Overwrite(o *Merge) { m.ConflictStyle = overwrite(m.ConflictStyle, o.ConflictStyle) } // Credential configures credential storage behavior. // Different platforms support different storage backends: // // macOS: // - Default: Uses Security.framework via purego (no CGO required) // - "security": Uses /usr/bin/security CLI tool (fallback when security software blocks framework access) // - "file": Uses encrypted file storage // // Windows: // - Default: Uses Windows Credential Manager API // - "file": Uses encrypted file storage // // Linux: // - Default: "none" (credentials not stored unless explicitly configured) // - "secret-service": Uses libsecret/Secret Service API (requires DBUS) // - "file": Uses encrypted file storage type Credential struct { // Storage specifies the credential storage backend. // // Common options: // - "auto" (default): Use the platform's default backend // - "file": Use encrypted file storage (requires encryptionKey) // - "none": Disable credential storage completely // // Platform-specific options: // - macOS: "security" (uses /usr/bin/security CLI) // - Linux: "secret-service" (requires DBUS/Secret Service) // // Can be set via: zeta config credential.storage // Or environment: ZETA_CREDENTIAL_STORAGE= Storage string `toml:"storage,omitempty"` // EncryptionKey specifies the key used for encrypting credentials in file storage. // Required when storage="file". If not set, falls back to "auto" mode. // // Security note: Store this key securely! Consider using environment variable: // ZETA_CREDENTIAL_ENCRYPTION_KEY= // // To generate a secure key: openssl rand -base64 32 EncryptionKey string `toml:"encryptionKey,omitempty"` // StoragePath specifies the path for encrypted credential file storage. // Only used when storage="file". // Default: ~/.config/zeta/credentials // // Can be set via: zeta config credential.storagePath // Or environment: ZETA_CREDENTIAL_STORAGE_PATH= StoragePath string `toml:"storagePath,omitempty"` } // CredentialStorageConstants defines valid storage backend values const ( CredentialStorageAuto = "auto" // Default backend for each platform CredentialStorageSecretService = "secret-service" // Linux: Secret Service API (libsecret) CredentialStorageFile = "file" // All platforms: encrypted file storage CredentialStorageNone = "none" // Disable credential storage CredentialStorageSecurity = "security" // macOS: /usr/bin/security CLI ) func (c *Credential) Overwrite(o *Credential) { c.Storage = overwrite(c.Storage, o.Storage) c.EncryptionKey = overwrite(c.EncryptionKey, o.EncryptionKey) c.StoragePath = overwrite(c.StoragePath, o.StoragePath) } type Config struct { Core Core `toml:"core,omitempty"` User User `toml:"user,omitempty"` Fragment Fragment `toml:"fragment,omitempty"` HTTP HTTP `toml:"http,omitempty"` SSH SSH `toml:"ssh,omitempty"` Transport Transport `toml:"transport,omitempty"` Diff Diff `toml:"diff,omitempty"` Merge Merge `toml:"merge,omitempty"` Credential Credential `toml:"credential,omitempty"` } // Overwrite: use local config overwrite config func (c *Config) Overwrite(other *Config) { c.Core.Overwrite(&other.Core) c.User.Overwrite(&other.User) c.Fragment.Overwrite(&other.Fragment) c.HTTP.Overwrite(&other.HTTP) c.SSH.Overwrite(&other.SSH) c.Transport.Overwrite(&other.Transport) c.Diff.Overwrite(&other.Diff) c.Merge.Overwrite(&other.Merge) c.Credential.Overwrite(&other.Credential) } ================================================ FILE: modules/zeta/config/config_test.toml ================================================ [core] remote = "https://example.com/tom/mono-zeta" # https://git-scm.com/docs/sparse-index sparse-checkout = ["dev/app/client", "dev/modules/basic"] hash-algo = "BLAKE3" compression-algo = "zstd" [user] name = "admin" email = "zeta@example.io" ================================================ FILE: modules/zeta/config/config_test_bad.toml ================================================ [core] compression-algo = "zstd" hash-algo = "BLAKE3" remote = "https://example.com/tom/mono-zeta" sparse-checkout = ["dev/app/client", "dev/modules/basic", 10086] [user] email = "zeta@example.io" name = "admin" ================================================ FILE: modules/zeta/config/decode.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "errors" "os" "path/filepath" "github.com/antgroup/hugescm/modules/strengthen" ) const ( ENV_ZETA_CONFIG_SYSTEM = "ZETA_CONFIG_SYSTEM" ) var ( ErrKeyNotFound = errors.New("key not found") ) func configSystemPath() string { if p, ok := os.LookupEnv(ENV_ZETA_CONFIG_SYSTEM); ok { return p } exe, err := os.Executable() if err != nil { return "" } // zeta prefix --> prefix := filepath.Dir(exe) if filepath.Base(prefix) == "bin" { prefix = filepath.Dir(prefix) } return filepath.Join(prefix, "/etc/zeta.toml") } func LoadSystem() (*Config, error) { systemPath := configSystemPath() if len(systemPath) == 0 { return nil, os.ErrNotExist } var cfg Config if _, err := os.Stat(systemPath); err != nil { return nil, err } if err := LoadConfigFile(systemPath, &cfg); err != nil { return nil, err } return &cfg, nil } func LoadGlobal() (*Config, error) { var cfg Config userPath := strengthen.ExpandPath("~/.zeta.toml") if _, err := os.Stat(userPath); err != nil && os.IsNotExist(err) { return &cfg, nil } if err := LoadConfigFile(userPath, &cfg); err != nil { return nil, err } return &cfg, nil } // LoadBaseline loads config with priority: Global > System. // System config provides defaults, Global config overrides them. func LoadBaseline() (*Config, error) { gc, err := LoadGlobal() if err != nil { return nil, err } cfg, err := LoadSystem() if os.IsNotExist(err) { return gc, nil } if err != nil { return nil, err } // Global config (gc) overrides System config (cfg) cfg.Overwrite(gc) return cfg, nil } func Load(zetaDir string) (*Config, error) { cfg, err := LoadBaseline() if err != nil { return nil, err } if len(zetaDir) == 0 { return cfg, nil } var rc Config if err := LoadConfigFile(filepath.Join(zetaDir, "zeta.toml"), &rc); err != nil { return nil, err } cfg.Overwrite(&rc) return cfg, nil } ================================================ FILE: modules/zeta/config/decode_test.go ================================================ package config import ( "fmt" "os" "path/filepath" "runtime" "testing" ) func TestDecode(t *testing.T) { var cc Config _, filename, _, _ := runtime.Caller(0) file := filepath.Join(filepath.Dir(filename), "config_test.toml") if err := LoadConfigFile(file, &cc); err != nil { fmt.Fprintf(os.Stderr, "decode error: %v\n", err) return } } func TestDecode2(t *testing.T) { _, filename, _, _ := runtime.Caller(0) file := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(file) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } d := &DisplayOptions{Writer: os.Stderr, Z: false} for k, s := range doc { if s == nil { continue } if err := s.displayTo(d, k); err != nil { return } } } func TestDecodeZ(t *testing.T) { _, filename, _, _ := runtime.Caller(0) p := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(p) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } d := &DisplayOptions{Writer: os.Stderr, Z: true} for k, s := range doc { if s == nil { continue } if err := s.displayTo(d, k); err != nil { return } } } func TestFilter(t *testing.T) { _, filename, _, _ := runtime.Caller(0) p := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(p) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } vals, err := doc.GetAll("core.sparse-checkout") if err != nil { fmt.Fprintf(os.Stderr, "filter all: %v\n", err) return } for _, v := range vals { fmt.Fprintf(os.Stderr, "values: %s\n", v) } } func TestLoad(t *testing.T) { _, filename, _, _ := runtime.Caller(0) p := filepath.Join(filepath.Dir(filename), "config_test_bad.toml") var rc Config if err := LoadConfigFile(p, &rc); err != nil { fmt.Fprintf(os.Stderr, "decode error: %v\n", err) return } fmt.Fprintf(os.Stderr, "%v\n", rc) } ================================================ FILE: modules/zeta/config/display.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "errors" "fmt" "io" "os" "path/filepath" "reflect" "strings" "github.com/antgroup/hugescm/modules/strengthen" "github.com/antgroup/hugescm/modules/trace" ) type DisplayOptions struct { io.Writer Z bool } const ( NUL = '\x00' maxDisplayDepth = 20 ) // formatKey converts a reflect.Value key to string. // For string keys, it returns the value directly to avoid fmt.Sprintf overhead. func formatKey(key reflect.Value) string { if key.Kind() == reflect.String { return key.String() } return fmt.Sprintf("%v", key.Interface()) } func (opts *DisplayOptions) Show(a any, keys ...string) error { if len(keys) > maxDisplayDepth { return nil } prefixKey := strings.Join(keys, ".") v := reflect.ValueOf(a) switch v.Kind() { case reflect.Array, reflect.Slice: for i := range v.Len() { if err := opts.Show(v.Index(i).Interface(), keys...); err != nil { return err } } return nil case reflect.Map: for _, key := range v.MapKeys() { mv := v.MapIndex(key) newKeys := append(keys, formatKey(key)) if err := opts.Show(mv.Interface(), newKeys...); err != nil { return err } } return nil case reflect.Struct: // structs are not supported for direct output return nil default: } if opts.Z { _, _ = fmt.Fprintf(opts.Writer, "%s\n%v%c", prefixKey, v, NUL) return nil } _, _ = fmt.Fprintf(opts.Writer, "%s=%v\n", prefixKey, v) return nil } func displayTo(d Display, zfg string) error { doc, err := LoadDocumentFile(zfg) if err != nil { return err } for sectionKey, section := range doc { if section == nil { continue } if err := section.displayTo(d, sectionKey); err != nil { return err } } return nil } func DisplaySystem(opts *DisplayOptions) error { zfg := configSystemPath() trace.DbgPrint("load system config: %s", zfg) if err := displayTo(opts, zfg); err != nil && !os.IsNotExist(err) { return err } return nil } func DisplayGlobal(opts *DisplayOptions) error { zfg := strengthen.ExpandPath("~/.zeta.toml") trace.DbgPrint("load global config: %s", zfg) if err := displayTo(opts, zfg); err != nil && !os.IsNotExist(err) { return err } return nil } func DisplayLocal(opts *DisplayOptions, zetaDir string) error { zfg := filepath.Join(zetaDir, "zeta.toml") trace.DbgPrint("load local config: %s", zfg) return displayTo(opts, zfg) } type GetOptions struct { io.Writer Keys []string ALL bool Z bool Verbose bool } func (opts *GetOptions) show(vals []any) { if opts.Z { for _, v := range vals { _, _ = fmt.Fprintf(opts, "%v%c", v, NUL) } return } for _, v := range vals { _, _ = fmt.Fprintln(opts, v) } } func getFromFile(opts *GetOptions, zfg string) error { doc, err := LoadDocumentFile(zfg) if err != nil { return err } if opts.ALL { for _, k := range opts.Keys { vals, err := doc.GetAll(k) if err != nil { return err } opts.show(vals) } return nil } for _, k := range opts.Keys { val, err := doc.GetFirst(k) if err != nil { return err } opts.show([]any{val}) } return nil } func GetSystem(opts *GetOptions) error { zfg := configSystemPath() trace.DbgPrint("load system config: %s", zfg) return getFromFile(opts, zfg) } func GetGlobal(opts *GetOptions) error { zfg := strengthen.ExpandPath("~/.zeta.toml") trace.DbgPrint("load global config: %s", zfg) return getFromFile(opts, zfg) } func GetLocal(opts *GetOptions, zetaDir string) error { zfg := filepath.Join(zetaDir, "zeta.toml") trace.DbgPrint("load local config: %s", zfg) return getFromFile(opts, zfg) } func Get(opts *GetOptions, zetaDir string, found bool) error { trace.DbgPrint("zeta-dir: %s filter keys: %v", zetaDir, opts.Keys) if len(zetaDir) != 0 { localPath := filepath.Join(zetaDir, "zeta.toml") trace.DbgPrint("load local config: %s", localPath) err := getFromFile(opts, localPath) switch { case err == nil: if !opts.ALL { return nil } found = true case !os.IsNotExist(err) && !errors.Is(err, ErrKeyNotFound): return err } } userPath := strengthen.ExpandPath("~/.zeta.toml") trace.DbgPrint("load global config: %s", userPath) err := getFromFile(opts, userPath) switch { case err == nil: if !opts.ALL { return nil } found = true case !os.IsNotExist(err) && !errors.Is(err, ErrKeyNotFound): return err } systemPath := configSystemPath() trace.DbgPrint("load system config: %s", systemPath) if err = getFromFile(opts, systemPath); err == nil { return nil } if found && (os.IsNotExist(err) || errors.Is(err, ErrKeyNotFound)) { // get all key not found in system scope return nil } return err } ================================================ FILE: modules/zeta/config/document.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "fmt" "strings" ) // Key represents a parsed configuration key with format "section.name". type Key struct { Section string Name string } // ParseKey parses a configuration key string into a Key struct. // The key must be in the format "section.name". // Returns ErrBadConfigKey for invalid formats. func ParseKey(s string) (Key, error) { section, name, ok := strings.Cut(s, ".") if !ok { return Key{}, &ErrBadConfigKey{key: s} } if section == "" || name == "" { return Key{}, &ErrBadConfigKey{key: s} } // Check for nested dots (e.g., "a.b.c") if strings.Contains(name, ".") { return Key{}, &ErrBadConfigKey{key: s} } return Key{Section: section, Name: name}, nil } // String returns the string representation of the key. func (k Key) String() string { return k.Section + "." + k.Name } // Section represents a configuration section with typed values. type Section map[string]Value // Document represents a configuration document with multiple sections. type Document map[string]Section // NewDocument creates a new empty Document. func NewDocument() Document { return make(Document) } // Get retrieves a value by key. // Returns the value, whether it exists, and an error if the key is invalid. func (d Document) Get(key string) (Value, bool, error) { k, err := ParseKey(key) if err != nil { return Value{}, false, err } section, ok := d[k.Section] if !ok { return Value{}, false, nil } value, ok := section[k.Name] return value, ok, nil } // GetFirst retrieves the first value by key. // For scalar values, returns the value itself. // For slice values, returns the first element. // Returns ErrKeyNotFound if the key doesn't exist or the slice is empty. func (d Document) GetFirst(key string) (any, error) { value, exists, err := d.Get(key) if err != nil { return nil, err } if !exists { return nil, ErrKeyNotFound } first, ok := value.First() if !ok { return nil, ErrKeyNotFound } return first, nil } // GetAll retrieves all values by key. // For scalar values, returns a single-element slice. // For slice values, returns all elements. // Returns ErrKeyNotFound if the key doesn't exist. func (d Document) GetAll(key string) ([]any, error) { value, exists, err := d.Get(key) if err != nil { return nil, err } if !exists { return nil, ErrKeyNotFound } all := value.All() if all == nil { return nil, ErrKeyNotFound } return all, nil } // Set sets a value by key. // Creates the section if it doesn't exist. // Returns true if an existing value was overwritten. func (d Document) Set(key string, val any) (bool, error) { k, err := ParseKey(key) if err != nil { return false, err } value, err := FromAny(val) if err != nil { return false, err } section, ok := d[k.Section] if !ok { section = make(Section) d[k.Section] = section } _, exists := section[k.Name] section[k.Name] = value return exists, nil } // Add appends a value to an existing key. // If the key doesn't exist, creates a new single-element slice. // If the key exists with a scalar value, converts to slice and appends. // Returns an error for type mismatch. func (d Document) Add(key string, val any) error { k, err := ParseKey(key) if err != nil { return err } newValue, err := FromAny(val) if err != nil { return err } section, ok := d[k.Section] if !ok { section = make(Section) d[k.Section] = section } existing, exists := section[k.Name] if !exists { // Key doesn't exist, set the new value directly section[k.Name] = newValue return nil } // Append to existing value combined, err := existing.Append(newValue) if err != nil { return fmt.Errorf("cannot add to key %q: %w", key, err) } section[k.Name] = combined return nil } // Delete removes a key from the document. // Returns ErrKeyNotFound if the key doesn't exist. func (d Document) Delete(key string) error { k, err := ParseKey(key) if err != nil { return err } section, ok := d[k.Section] if !ok { return ErrKeyNotFound } if _, ok := section[k.Name]; !ok { return ErrKeyNotFound } delete(section, k.Name) // Remove empty section if len(section) == 0 { delete(d, k.Section) } return nil } // Raw converts the Document to a map[string]map[string]any for encoding. func (d Document) Raw() map[string]map[string]any { result := make(map[string]map[string]any) for sectionName, section := range d { rawSection := make(map[string]any) for keyName, value := range section { rawSection[keyName] = value.ToAny() } if len(rawSection) > 0 { result[sectionName] = rawSection } } return result } // FromRaw creates a Document from a map[string]map[string]any. // Returns an error if any value has an unsupported type. func FromRaw(raw map[string]map[string]any) (Document, error) { doc := make(Document) for sectionName, rawSection := range raw { section := make(Section) for keyName, rawValue := range rawSection { value, err := FromAny(rawValue) if err != nil { return nil, fmt.Errorf("section %q key %q: %w", sectionName, keyName, err) } section[keyName] = value } if len(section) > 0 { doc[sectionName] = section } } return doc, nil } // displayTo displays all values in the section to the Display interface. func (s Section) displayTo(d Display, sectionKey string) error { for subKey, value := range s { if err := d.Show(value.ToAny(), sectionKey, subKey); err != nil { return err } } return nil } ================================================ FILE: modules/zeta/config/document_test.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "errors" "testing" ) func TestParseKey(t *testing.T) { tests := []struct { name string input string wantSection string wantName string wantError bool }{ { name: "valid key", input: "core.editor", wantSection: "core", wantName: "editor", }, { name: "valid key with hyphen", input: "http.ssl-verify", wantSection: "http", wantName: "ssl-verify", }, { name: "missing dot - core", input: "core", wantError: true, }, { name: "missing name - core.", input: "core.", wantError: true, }, { name: "missing section - .editor", input: ".editor", wantError: true, }, { name: "nested path - a.b.c", input: "a.b.c", wantError: true, }, { name: "empty string", input: "", wantError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { key, err := ParseKey(tt.input) if tt.wantError { if err == nil { t.Errorf("ParseKey(%q) expected error, got nil", tt.input) } return } if err != nil { t.Errorf("ParseKey(%q) unexpected error: %v", tt.input, err) return } if key.Section != tt.wantSection { t.Errorf("ParseKey(%q) section = %q, want %q", tt.input, key.Section, tt.wantSection) } if key.Name != tt.wantName { t.Errorf("ParseKey(%q) name = %q, want %q", tt.input, key.Name, tt.wantName) } }) } } func TestDocumentSetGet(t *testing.T) { doc := NewDocument() // Test Set and Get overwritten, err := doc.Set("core.editor", "vim") if err != nil { t.Fatalf("Set() error: %v", err) } if overwritten { t.Errorf("Set() overwritten = true, want false (first set)") } value, exists, err := doc.Get("core.editor") if err != nil { t.Fatalf("Get() error: %v", err) } if !exists { t.Errorf("Get() exists = false, want true") } if value.Kind() != KindString { t.Errorf("Get() kind = %v, want %v", value.Kind(), KindString) } if value.ToAny() != "vim" { t.Errorf("Get() value = %v, want vim", value.ToAny()) } // Test overwrite overwritten, err = doc.Set("core.editor", "nano") if err != nil { t.Fatalf("Set() error: %v", err) } if !overwritten { t.Errorf("Set() overwritten = false, want true (overwrite)") } value, _, _ = doc.Get("core.editor") if value.ToAny() != "nano" { t.Errorf("Get() value = %v, want nano", value.ToAny()) } // Test GetFirst first, err := doc.GetFirst("core.editor") if err != nil { t.Fatalf("GetFirst() error: %v", err) } if first != "nano" { t.Errorf("GetFirst() = %v, want nano", first) } // Test non-existent key _, exists, err = doc.Get("nonexistent.key") if err != nil { t.Fatalf("Get() error: %v", err) } if exists { t.Errorf("Get() exists = true for non-existent key, want false") } _, err = doc.GetFirst("nonexistent.key") if !errors.Is(err, ErrKeyNotFound) { t.Errorf("GetFirst() error = %v, want ErrKeyNotFound", err) } } func TestDocumentAdd(t *testing.T) { doc := NewDocument() // Add to non-existent key -> creates single value err := doc.Add("core.sparse", "dir1") if err != nil { t.Fatalf("Add() error: %v", err) } value, _, _ := doc.Get("core.sparse") if value.Kind() != KindString { t.Errorf("Add() kind = %v, want %v", value.Kind(), KindString) } // Add to existing scalar -> creates slice err = doc.Add("core.sparse", "dir2") if err != nil { t.Fatalf("Add() error: %v", err) } value, _, _ = doc.Get("core.sparse") if value.Kind() != KindStringSlice { t.Errorf("Add() kind = %v, want %v", value.Kind(), KindStringSlice) } all := value.All() if len(all) != 2 { t.Errorf("Add() len = %d, want 2", len(all)) } // Add to existing slice -> appends err = doc.Add("core.sparse", "dir3") if err != nil { t.Fatalf("Add() error: %v", err) } value, _, _ = doc.Get("core.sparse") all = value.All() if len(all) != 3 { t.Errorf("Add() len = %d, want 3", len(all)) } // Type mismatch should error err = doc.Add("core.sparse", 123) if err == nil { t.Errorf("Add() expected error for type mismatch, got nil") } } func TestDocumentDelete(t *testing.T) { doc := NewDocument() // Set a value _, _ = doc.Set("core.editor", "vim") // Delete existing key err := doc.Delete("core.editor") if err != nil { t.Fatalf("Delete() error: %v", err) } // Verify deletion _, exists, _ := doc.Get("core.editor") if exists { t.Errorf("Get() exists = true after delete, want false") } // Delete non-existent key should return ErrKeyNotFound err = doc.Delete("nonexistent.key") if !errors.Is(err, ErrKeyNotFound) { t.Errorf("Delete() error = %v, want ErrKeyNotFound", err) } // Delete invalid key should return ErrBadConfigKey err = doc.Delete("invalid") if err == nil { t.Errorf("Delete() expected error for invalid key, got nil") } } func TestDocumentRawRoundTrip(t *testing.T) { doc := NewDocument() _, _ = doc.Set("core.editor", "vim") _, _ = doc.Set("core.sparse", []string{"dir1", "dir2"}) _, _ = doc.Set("user.name", "Alice") _, _ = doc.Set("user.email", "alice@example.com") _, _ = doc.Set("http.timeout", int64(30)) // Convert to raw raw := doc.Raw() // Verify raw structure if len(raw) != 3 { t.Errorf("Raw() len = %d, want 3", len(raw)) } if raw["core"]["editor"] != "vim" { t.Errorf("Raw() core.editor = %v, want vim", raw["core"]["editor"]) } // Convert back from raw doc2, err := FromRaw(raw) if err != nil { t.Fatalf("FromRaw() error: %v", err) } // Verify round-trip value, exists, _ := doc2.Get("core.editor") if !exists || value.ToAny() != "vim" { t.Errorf("Round-trip core.editor failed") } value, exists, _ = doc2.Get("user.name") if !exists || value.ToAny() != "Alice" { t.Errorf("Round-trip user.name failed") } } func TestDocumentGetAll(t *testing.T) { doc := NewDocument() // Single value _, _ = doc.Set("core.editor", "vim") all, err := doc.GetAll("core.editor") if err != nil { t.Fatalf("GetAll() error: %v", err) } if len(all) != 1 { t.Errorf("GetAll() len = %d, want 1", len(all)) } // Slice value _, _ = doc.Set("core.sparse", []string{"dir1", "dir2", "dir3"}) all, err = doc.GetAll("core.sparse") if err != nil { t.Fatalf("GetAll() error: %v", err) } if len(all) != 3 { t.Errorf("GetAll() len = %d, want 3", len(all)) } // Non-existent key _, err = doc.GetAll("nonexistent.key") if !errors.Is(err, ErrKeyNotFound) { t.Errorf("GetAll() error = %v, want ErrKeyNotFound", err) } } func TestDocumentBadKey(t *testing.T) { doc := NewDocument() // Test Get with bad key _, _, err := doc.Get("a.b.c") if err == nil { t.Errorf("Get(a.b.c) expected error, got nil") } if !IsErrBadConfigKey(err) { t.Errorf("Get(a.b.c) error = %v, want ErrBadConfigKey", err) } // Test Set with bad key _, err = doc.Set("a.b.c", "value") if err == nil { t.Errorf("Set(a.b.c) expected error, got nil") } if !IsErrBadConfigKey(err) { t.Errorf("Set(a.b.c) error = %v, want ErrBadConfigKey", err) } // Test Add with bad key err = doc.Add("a.b.c", "value") if err == nil { t.Errorf("Add(a.b.c) expected error, got nil") } if !IsErrBadConfigKey(err) { t.Errorf("Add(a.b.c) error = %v, want ErrBadConfigKey", err) } // Test Delete with bad key err = doc.Delete("a.b.c") if err == nil { t.Errorf("Delete(a.b.c) expected error, got nil") } if !IsErrBadConfigKey(err) { t.Errorf("Delete(a.b.c) error = %v, want ErrBadConfigKey", err) } } ================================================ FILE: modules/zeta/config/encode.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "bytes" "errors" "fmt" "os" "path/filepath" "time" "github.com/antgroup/hugescm/modules/strengthen" ) func atomicEncode(zf string, doc Document) error { data, err := MarshalDocument(doc) if err != nil { return err } return atomicWrite(zf, data) } // atomicWrite writes data to a file atomically using write-and-rename pattern. func atomicWrite(path string, data []byte) error { dir := filepath.Dir(path) _ = os.MkdirAll(dir, 0755) cachePath := fmt.Sprintf("%s/.zeta-%d.toml", dir, time.Now().UnixNano()) if err := os.WriteFile(cachePath, data, 0644); err != nil { return err } if err := os.Rename(cachePath, path); err != nil { _ = os.Remove(cachePath) return err } return nil } func Encode(zetaDir string, config *Config) error { if config == nil || len(zetaDir) == 0 { return ErrInvalidArgument } zf := filepath.Join(zetaDir, "zeta.toml") return atomicWriteConfig(zf, config) } func EncodeGlobal(config *Config) error { if config == nil { return ErrInvalidArgument } zfg := strengthen.ExpandPath("~/.zeta.toml") return atomicWriteConfig(zfg, config) } // atomicWriteConfig writes a Config struct to a file atomically using go-toml/v2. func atomicWriteConfig(path string, config *Config) error { var buf bytes.Buffer encoder := newTOMLEncoder(&buf) if err := encoder.Encode(config); err != nil { return err } return atomicWrite(path, buf.Bytes()) } type UpdateOptions struct { Values map[string]any Append bool } func updateInternal(zf string, opts *UpdateOptions) error { if opts == nil || opts.Values == nil { return errors.New("invalid argument for update config") } // Load existing document or create new one doc, err := loadDocumentOrNew(zf) if err != nil { return err } // Apply updates for k, v := range opts.Values { if opts.Append { if err := doc.Add(k, v); err != nil { return err } } else { if _, err := doc.Set(k, v); err != nil { return err } } } // Validate before write if err := ValidateDocument(doc); err != nil { return fmt.Errorf("validation failed: %w", err) } return atomicEncode(zf, doc) } // loadDocumentOrNew loads a document from file or returns a new empty document. func loadDocumentOrNew(path string) (Document, error) { doc, err := LoadDocumentFile(path) if err != nil { if os.IsNotExist(err) { return NewDocument(), nil } return nil, err } return doc, nil } func UpdateSystem(opts *UpdateOptions) error { zfg := configSystemPath() return updateInternal(zfg, opts) } func UpdateGlobal(opts *UpdateOptions) error { zfg := strengthen.ExpandPath("~/.zeta.toml") return updateInternal(zfg, opts) } func UpdateLocal(zetaDir string, opts *UpdateOptions) error { zf := filepath.Join(zetaDir, "zeta.toml") return updateInternal(zf, opts) } func unsetInternal(zf string, keys ...string) error { if len(keys) == 0 { return nil } // Load existing document doc, err := LoadDocumentFile(zf) if err != nil { if os.IsNotExist(err) { return nil } return err } // Delete keys for _, k := range keys { if err := doc.Delete(k); err != nil { if errors.Is(err, ErrKeyNotFound) { continue } return err } } // Validate before write if err := ValidateDocument(doc); err != nil { return fmt.Errorf("validation failed: %w", err) } return atomicEncode(zf, doc) } func UnsetSystem(keys ...string) error { zfg := configSystemPath() return unsetInternal(zfg, keys...) } func UnsetGlobal(keys ...string) error { zfg := strengthen.ExpandPath("~/.zeta.toml") return unsetInternal(zfg, keys...) } func UnsetLocal(zetaDir string, keys ...string) error { zf := filepath.Join(zetaDir, "zeta.toml") return unsetInternal(zf, keys...) } ================================================ FILE: modules/zeta/config/encode_test.go ================================================ package config import ( "bytes" "fmt" "os" "path/filepath" "runtime" "testing" "github.com/pelletier/go-toml/v2" ) func TestEncode(t *testing.T) { _, filename, _, _ := runtime.Caller(0) file := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(file) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } // Add user section _, _ = doc.Set("user.email", "zeta@example.io") _, _ = doc.Set("user.name", "bob") data, err := MarshalDocument(doc) if err != nil { fmt.Fprintf(os.Stderr, "encode error: %v\n", err) return } _, _ = fmt.Fprintf(os.Stdout, "%s", data) } func TestUpdateConfig(t *testing.T) { values := map[string]any{ "core.sharingRoot": "/tmp/sharingRoot", "user.email": "zeta@example.io", "user.name": "bob", } _ = UpdateLocal("/tmp/testconfig/.zeta", &UpdateOptions{Values: values}) values["user.name"] = "Staff" _ = UpdateLocal("/tmp/testconfig/.zeta", &UpdateOptions{Values: values}) } func TestEncodeInt(t *testing.T) { s := &Core{} var buf bytes.Buffer encoder := toml.NewEncoder(&buf) encoder.SetArraysMultiline(false) encoder.SetIndentTables(false) if err := encoder.Encode(s); err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) } } func TestUpdateKey(t *testing.T) { _, filename, _, _ := runtime.Caller(0) file := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(file) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } if err := doc.Add("core.sparse-checkout", "dev/jack"); err != nil { fmt.Fprintf(os.Stderr, "add error: %v\n", err) return } data, err := MarshalDocument(doc) if err != nil { fmt.Fprintf(os.Stderr, "encode error: %v\n", err) return } fmt.Fprintf(os.Stderr, "%s", data) } func TestUpdateNot(t *testing.T) { _, filename, _, _ := runtime.Caller(0) file := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(file) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } if err := doc.Add("core.sparse-checkout", int64(10086)); err != nil { fmt.Fprintf(os.Stderr, "add error: %v\n", err) return } data, err := MarshalDocument(doc) if err != nil { fmt.Fprintf(os.Stderr, "encode error: %v\n", err) return } fmt.Fprintf(os.Stderr, "%s", data) } func TestUpdateNot2(t *testing.T) { _, filename, _, _ := runtime.Caller(0) file := filepath.Join(filepath.Dir(filename), "config_test.toml") doc, err := LoadDocumentFile(file) if err != nil { fmt.Fprintf(os.Stderr, "load error: %v\n", err) return } if err := doc.Add("core.namespace", int64(10086)); err != nil { fmt.Fprintf(os.Stderr, "add error: %v\n", err) return } data, err := MarshalDocument(doc) if err != nil { fmt.Fprintf(os.Stderr, "encode error: %v\n", err) return } fmt.Fprintf(os.Stderr, "%s", data) } func TestUpdateValidationFailure(t *testing.T) { // Create a temp file with valid content tmpDir := t.TempDir() tmpFile := filepath.Join(tmpDir, "zeta.toml") originalContent := `[core] editor = "vim" ` if err := os.WriteFile(tmpFile, []byte(originalContent), 0644); err != nil { t.Fatalf("WriteFile() error: %v", err) } // Read original content dataBefore, err := os.ReadFile(tmpFile) if err != nil { t.Fatalf("ReadFile() error: %v", err) } // Try to update with an invalid key (nested path) err = updateInternal(tmpFile, &UpdateOptions{ Values: map[string]any{ "a.b.c": "value", }, }) if err == nil { t.Errorf("updateInternal() expected error for bad key, got nil") } // Read content after failed update dataAfter, err := os.ReadFile(tmpFile) if err != nil { t.Fatalf("ReadFile() error: %v", err) } // Content should be unchanged if !bytes.Equal(dataBefore, dataAfter) { t.Errorf("File content changed after failed update\nBefore: %s\nAfter: %s", dataBefore, dataAfter) } } func TestUnsetValidationFailure(t *testing.T) { // Create a temp file with valid content tmpDir := t.TempDir() tmpFile := filepath.Join(tmpDir, "zeta.toml") originalContent := `[core] editor = "vim" ` if err := os.WriteFile(tmpFile, []byte(originalContent), 0644); err != nil { t.Fatalf("WriteFile() error: %v", err) } // Read original content dataBefore, err := os.ReadFile(tmpFile) if err != nil { t.Fatalf("ReadFile() error: %v", err) } // Try to unset with an invalid key (nested path) err = unsetInternal(tmpFile, "a.b.c") if err == nil { t.Errorf("unsetInternal() expected error for bad key, got nil") } // Read content after failed unset dataAfter, err := os.ReadFile(tmpFile) if err != nil { t.Fatalf("ReadFile() error: %v", err) } // Content should be unchanged if !bytes.Equal(dataBefore, dataAfter) { t.Errorf("File content changed after failed unset\nBefore: %s\nAfter: %s", dataBefore, dataAfter) } } ================================================ FILE: modules/zeta/config/type.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "bytes" "fmt" "strings" "github.com/antgroup/hugescm/modules/strengthen" ) const ( UNSPECIFIED = "" BOOLEAN = "bool" INTEGER = "int" BOOLORINT = "bool-or-int" PATH = "path" DATETIME = "datetime" ) const ( BOOLEAN_UNSET = 0 BOOLEAN_TRUE = 1 BOOLEAN_FALSE = 2 ) type Boolean struct { val int } var ( True = Boolean{val: BOOLEAN_TRUE} False = Boolean{val: BOOLEAN_FALSE} ) func (b *Boolean) UnmarshalTOML(a any) error { var s string switch data := a.(type) { case fmt.Stringer: s = data.String() case string: s = data case bool: if data { b.val = BOOLEAN_TRUE } else { b.val = BOOLEAN_FALSE } return nil case int64: if data != 0 { b.val = BOOLEAN_TRUE } else { b.val = BOOLEAN_FALSE } return nil case int: if data != 0 { b.val = BOOLEAN_TRUE } else { b.val = BOOLEAN_FALSE } return nil default: return fmt.Errorf("invalid boolean value: %T", a) } switch strings.ToLower(s) { case "true", "yes", "on", "1": b.val = BOOLEAN_TRUE case "false", "no", "off", "0": b.val = BOOLEAN_FALSE default: return fmt.Errorf("invalid boolean value: %q", s) } return nil } // UnmarshalText implements encoding.TextUnmarshaler for Boolean. // This is used by go-toml/v2 for decoding boolean values. func (b *Boolean) UnmarshalText(text []byte) error { s := strings.ToLower(string(text)) switch s { case "true", "yes", "on", "1": b.val = BOOLEAN_TRUE case "false", "no", "off", "0": b.val = BOOLEAN_FALSE default: return fmt.Errorf("invalid boolean value: %q", string(text)) } return nil } func (b *Boolean) IsUnset() bool { return b.val == BOOLEAN_UNSET } // Merge merges the other boolean value into b. // If other has a definite value (TRUE or FALSE), it overrides b's value. // This follows the config priority: local > global > system. func (b *Boolean) Merge(other *Boolean) { // If other has a definite value, it should override b (higher priority) if other.val != BOOLEAN_UNSET { b.val = other.val } // If other is UNSET, keep b's current value (don't override with UNSET) } func (b *Boolean) True() bool { return b.val == BOOLEAN_TRUE } func (b *Boolean) False() bool { return b.val == BOOLEAN_FALSE } func (b *Boolean) Set(v bool) bool { if v { b.val = BOOLEAN_TRUE return true } b.val = BOOLEAN_FALSE return false } func (b *Boolean) Unset() { b.val = BOOLEAN_UNSET } // MarshalText implements encoding.TextMarshaler for Boolean. // This is used by TOML encoder to convert Boolean to text representation. func (b Boolean) MarshalText() ([]byte, error) { switch b.val { case BOOLEAN_TRUE: return []byte("true"), nil case BOOLEAN_FALSE: return []byte("false"), nil default: // UNSET - return empty string (will be handled by omitempty) return []byte(""), nil } } type StringArray []string func (a *StringArray) UnmarshalTOML(data any) error { switch v := data.(type) { case string: *a = []string{v} case []any: var vv []string for _, e := range v { if s, ok := e.(string); ok { vv = append(vv, s) continue } return fmt.Errorf("expected string in array, but got %T", e) } *a = vv default: return fmt.Errorf("unexpected type %T", data) } return nil } type Size int64 func (s *Size) UnmarshalText(text []byte) error { if bytes.HasSuffix(text, []byte("b")) || bytes.HasSuffix(text, []byte("B")) { text = text[0 : len(text)-1] } size, err := strengthen.ParseSize(string(text)) *s = Size(size) return err } type Accelerator string const ( Direct Accelerator = "direct" Dragonfly Accelerator = "dragonfly" Aria2 Accelerator = "aria2" // https://github.com/aria2/aria2 ) type Strategy string // Prune strategy const ( StrategyUnspecified Strategy = "unspecified" StrategyHeuristical Strategy = "heuristical" StrategyEager Strategy = "eager" StrategyExtreme Strategy = "extreme" ) type Display interface { Show(a any, keys ...string) error } ================================================ FILE: modules/zeta/config/type_test.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "strings" "testing" "github.com/pelletier/go-toml/v2" ) func TestSizeUnmarshalText(t *testing.T) { tests := []struct { input string expected int64 }{ {"100", 100}, {"1k", 1024}, {"1K", 1024}, {"1m", 1024 * 1024}, {"1M", 1024 * 1024}, {"1g", 1024 * 1024 * 1024}, {"1G", 1024 * 1024 * 1024}, {"10m", 10 * 1024 * 1024}, {"10M", 10 * 1024 * 1024}, {"10mb", 10 * 1024 * 1024}, {"10MB", 10 * 1024 * 1024}, {"512k", 512 * 1024}, {"512kb", 512 * 1024}, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { var s Size if err := s.UnmarshalText([]byte(tt.input)); err != nil { t.Fatalf("UnmarshalText(%q) error: %v", tt.input, err) } if int64(s) != tt.expected { t.Errorf("UnmarshalText(%q) = %d, want %d", tt.input, s, tt.expected) } }) } } func TestSizeTOMLDecode(t *testing.T) { type Config struct { Threshold Size `toml:"threshold"` Size Size `toml:"size"` } tests := []struct { name string input string expected Config }{ { name: "basic sizes", input: ` threshold = "10m" size = "100m" `, expected: Config{ Threshold: 10 * 1024 * 1024, Size: 100 * 1024 * 1024, }, }, { name: "with B suffix", input: ` threshold = "10MB" size = "100GB" `, expected: Config{ Threshold: 10 * 1024 * 1024, Size: 100 * 1024 * 1024 * 1024, }, }, { name: "numeric values", input: ` threshold = "1024" size = "2048" `, expected: Config{ Threshold: 1024, Size: 2048, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var c Config if err := toml.NewDecoder(strings.NewReader(tt.input)).Decode(&c); err != nil { t.Fatalf("Decode error: %v", err) } if c.Threshold != tt.expected.Threshold { t.Errorf("Threshold = %d, want %d", c.Threshold, tt.expected.Threshold) } if c.Size != tt.expected.Size { t.Errorf("Size = %d, want %d", c.Size, tt.expected.Size) } }) } } func TestSizeInFragment(t *testing.T) { input := ` [fragment] threshold = "2g" size = "5g" ` var cfg Config if err := toml.NewDecoder(strings.NewReader(input)).Decode(&cfg); err != nil { t.Fatalf("Decode error: %v", err) } // Check raw values if cfg.Fragment.ThresholdRaw != 2*1024*1024*1024 { t.Errorf("ThresholdRaw = %d, want %d", cfg.Fragment.ThresholdRaw, 2*1024*1024*1024) } if cfg.Fragment.SizeRaw != 5*1024*1024*1024 { t.Errorf("SizeRaw = %d, want %d", cfg.Fragment.SizeRaw, 5*1024*1024*1024) } // Check computed values if expected := int64(2 * 1024 * 1024 * 1024); cfg.Fragment.Threshold() != expected { t.Errorf("Threshold() = %d, want %d", cfg.Fragment.Threshold(), expected) } if expected := int64(5 * 1024 * 1024 * 1024); cfg.Fragment.Size() != expected { t.Errorf("Size() = %d, want %d", cfg.Fragment.Size(), expected) } } func TestSizeDefault(t *testing.T) { // When not set, should use defaults input := ` [fragment] ` var cfg Config if err := toml.NewDecoder(strings.NewReader(input)).Decode(&cfg); err != nil { t.Fatalf("Decode error: %v", err) } // Check defaults are used when values are 0 if cfg.Fragment.Threshold() != FragmentThreshold { t.Errorf("Threshold() = %d, want default %d", cfg.Fragment.Threshold(), FragmentThreshold) } if cfg.Fragment.Size() != FragmentSize { t.Errorf("Size() = %d, want default %d", cfg.Fragment.Size(), FragmentSize) } } func TestSizeInTransport(t *testing.T) { input := ` [transport] largeSize = "10m" maxEntries = 8 ` var cfg Config if err := toml.NewDecoder(strings.NewReader(input)).Decode(&cfg); err != nil { t.Fatalf("Decode error: %v", err) } if cfg.Transport.LargeSizeRaw != 10*1024*1024 { t.Errorf("LargeSizeRaw = %d, want %d", cfg.Transport.LargeSizeRaw, 10*1024*1024) } if cfg.Transport.MaxEntries != 8 { t.Errorf("MaxEntries = %d, want 8", cfg.Transport.MaxEntries) } // Check computed value if expected := int64(10 * 1024 * 1024); cfg.Transport.LargeSize() != expected { t.Errorf("LargeSize() = %d, want %d", cfg.Transport.LargeSize(), expected) } } ================================================ FILE: modules/zeta/config/validate.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config // ValidateDocument validates that a Document can be successfully // decoded into a valid Config struct. func ValidateDocument(doc Document) error { var cfg Config return ValidateDocumentAs(doc, &cfg) } ================================================ FILE: modules/zeta/config/value.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "errors" "fmt" ) // Kind represents the type of a Value. type Kind int const ( KindInvalid Kind = iota KindString KindInt64 KindBool KindFloat64 KindStringSlice KindInt64Slice KindBoolSlice KindFloat64Slice ) // String returns the string representation of Kind. func (k Kind) String() string { switch k { case KindInvalid: return "invalid" case KindString: return "string" case KindInt64: return "int64" case KindBool: return "bool" case KindFloat64: return "float64" case KindStringSlice: return "[]string" case KindInt64Slice: return "[]int64" case KindBoolSlice: return "[]bool" case KindFloat64Slice: return "[]float64" default: return "unknown" } } // Value represents a typed configuration value. // It is the core value model for the dynamic editing layer. // Zero value is invalid and should not be used. type Value struct { kind Kind value any } // NewStringValue creates a string Value. func NewStringValue(s string) Value { return Value{kind: KindString, value: s} } // NewInt64Value creates an int64 Value. func NewInt64Value(i int64) Value { return Value{kind: KindInt64, value: i} } // NewBoolValue creates a bool Value. func NewBoolValue(b bool) Value { return Value{kind: KindBool, value: b} } // NewFloat64Value creates a float64 Value. func NewFloat64Value(f float64) Value { return Value{kind: KindFloat64, value: f} } // NewStringSliceValue creates a []string Value. // Makes a copy of the input slice to avoid sharing the underlying array. func NewStringSliceValue(s []string) Value { if s == nil { s = []string{} } // Copy to avoid sharing underlying array copied := make([]string, len(s)) copy(copied, s) return Value{kind: KindStringSlice, value: copied} } // NewInt64SliceValue creates a []int64 Value. // Makes a copy of the input slice to avoid sharing the underlying array. func NewInt64SliceValue(i []int64) Value { if i == nil { i = []int64{} } // Copy to avoid sharing underlying array copied := make([]int64, len(i)) copy(copied, i) return Value{kind: KindInt64Slice, value: copied} } // NewBoolSliceValue creates a []bool Value. // Makes a copy of the input slice to avoid sharing the underlying array. func NewBoolSliceValue(b []bool) Value { if b == nil { b = []bool{} } // Copy to avoid sharing underlying array copied := make([]bool, len(b)) copy(copied, b) return Value{kind: KindBoolSlice, value: copied} } // NewFloat64SliceValue creates a []float64 Value. // Makes a copy of the input slice to avoid sharing the underlying array. func NewFloat64SliceValue(f []float64) Value { if f == nil { f = []float64{} } // Copy to avoid sharing underlying array copied := make([]float64, len(f)) copy(copied, f) return Value{kind: KindFloat64Slice, value: copied} } // Kind returns the kind of the value. func (v Value) Kind() Kind { return v.kind } // IsZero returns true if the value is zero/invalid. func (v Value) IsZero() bool { return v.kind == KindInvalid } // FromAny creates a Value from an any type. // Returns an error if the type is not supported or if the slice contains mixed types. func FromAny(a any) (Value, error) { switch val := a.(type) { case string: return NewStringValue(val), nil case int: return NewInt64Value(int64(val)), nil case int8: return NewInt64Value(int64(val)), nil case int16: return NewInt64Value(int64(val)), nil case int32: return NewInt64Value(int64(val)), nil case int64: return NewInt64Value(val), nil case bool: return NewBoolValue(val), nil case float32: return NewFloat64Value(float64(val)), nil case float64: return NewFloat64Value(val), nil case []string: return NewStringSliceValue(val), nil case []int64: return NewInt64SliceValue(val), nil case []bool: return NewBoolSliceValue(val), nil case []float64: return NewFloat64SliceValue(val), nil case []any: // Convert []any to typed slice if len(val) == 0 { // Empty []any cannot infer type return Value{}, errors.New("empty []any cannot infer type") } // Infer type from first element switch val[0].(type) { case string: slice := make([]string, 0, len(val)) for i, elem := range val { s, ok := elem.(string) if !ok { return Value{}, fmt.Errorf("mixed types in slice: element 0 is string, element %d is %T", i, elem) } slice = append(slice, s) } return NewStringSliceValue(slice), nil case int: slice := make([]int64, 0, len(val)) for i, elem := range val { n, ok := elem.(int) if !ok { return Value{}, fmt.Errorf("mixed types in slice: element 0 is int, element %d is %T", i, elem) } slice = append(slice, int64(n)) } return NewInt64SliceValue(slice), nil case int64: slice := make([]int64, 0, len(val)) for i, elem := range val { n, ok := elem.(int64) if !ok { return Value{}, fmt.Errorf("mixed types in slice: element 0 is int64, element %d is %T", i, elem) } slice = append(slice, n) } return NewInt64SliceValue(slice), nil case bool: slice := make([]bool, 0, len(val)) for i, elem := range val { b, ok := elem.(bool) if !ok { return Value{}, fmt.Errorf("mixed types in slice: element 0 is bool, element %d is %T", i, elem) } slice = append(slice, b) } return NewBoolSliceValue(slice), nil case float64: slice := make([]float64, 0, len(val)) for i, elem := range val { f, ok := elem.(float64) if !ok { return Value{}, fmt.Errorf("mixed types in slice: element 0 is float64, element %d is %T", i, elem) } slice = append(slice, f) } return NewFloat64SliceValue(slice), nil default: return Value{}, fmt.Errorf("unsupported slice element type: %T", val[0]) } default: return Value{}, fmt.Errorf("unsupported type: %T", a) } } // ToAny returns the underlying value as any. func (v Value) ToAny() any { switch v.kind { case KindString: return v.value.(string) case KindInt64: return v.value.(int64) case KindBool: return v.value.(bool) case KindFloat64: return v.value.(float64) case KindStringSlice: return v.value.([]string) case KindInt64Slice: return v.value.([]int64) case KindBoolSlice: return v.value.([]bool) case KindFloat64Slice: return v.value.([]float64) default: return nil } } // First returns the first element for slice values, or the value itself for scalar values. // Returns false if the value is invalid or the slice is empty. func (v Value) First() (any, bool) { switch v.kind { case KindString: return v.value.(string), true case KindInt64: return v.value.(int64), true case KindBool: return v.value.(bool), true case KindFloat64: return v.value.(float64), true case KindStringSlice: slice := v.value.([]string) if len(slice) == 0 { return nil, false } return slice[0], true case KindInt64Slice: slice := v.value.([]int64) if len(slice) == 0 { return nil, false } return slice[0], true case KindBoolSlice: slice := v.value.([]bool) if len(slice) == 0 { return nil, false } return slice[0], true case KindFloat64Slice: slice := v.value.([]float64) if len(slice) == 0 { return nil, false } return slice[0], true default: return nil, false } } // All returns all elements as []any. // For scalar values, returns a single-element slice. // For invalid values, returns nil. func (v Value) All() []any { switch v.kind { case KindString: return []any{v.value.(string)} case KindInt64: return []any{v.value.(int64)} case KindBool: return []any{v.value.(bool)} case KindFloat64: return []any{v.value.(float64)} case KindStringSlice: slice := v.value.([]string) result := make([]any, len(slice)) for i, s := range slice { result[i] = s } return result case KindInt64Slice: slice := v.value.([]int64) result := make([]any, len(slice)) for i, n := range slice { result[i] = n } return result case KindBoolSlice: slice := v.value.([]bool) result := make([]any, len(slice)) for i, b := range slice { result[i] = b } return result case KindFloat64Slice: slice := v.value.([]float64) result := make([]any, len(slice)) for i, f := range slice { result[i] = f } return result default: return nil } } // Append appends another value to this value. // Both values must be of compatible types. // Scalar + Scalar -> typed slice // Slice + Scalar -> typed slice (append) // Slice + Slice -> error (not supported) // Returns error for type mismatch. func (v Value) Append(other Value) (Value, error) { if other.IsZero() { return v, nil } if v.IsZero() { return other, nil } // Both are scalars of the same type -> create slice if v.isScalar() && other.isScalar() { if v.kind != other.kind { return Value{}, fmt.Errorf("type mismatch: cannot append %s to %s", other.kind, v.kind) } switch v.kind { case KindString: return NewStringSliceValue([]string{v.value.(string), other.value.(string)}), nil case KindInt64: return NewInt64SliceValue([]int64{v.value.(int64), other.value.(int64)}), nil case KindBool: return NewBoolSliceValue([]bool{v.value.(bool), other.value.(bool)}), nil case KindFloat64: return NewFloat64SliceValue([]float64{v.value.(float64), other.value.(float64)}), nil } } // v is slice, other is scalar -> append if v.isSlice() && other.isScalar() { elementKind := v.sliceElementKind() if other.kind != elementKind { return Value{}, fmt.Errorf("type mismatch: cannot append %s to %s", other.kind, v.kind) } switch v.kind { case KindStringSlice: oldSlice := v.value.([]string) newSlice := make([]string, len(oldSlice)+1) copy(newSlice, oldSlice) newSlice[len(oldSlice)] = other.value.(string) return Value{kind: KindStringSlice, value: newSlice}, nil case KindInt64Slice: oldSlice := v.value.([]int64) newSlice := make([]int64, len(oldSlice)+1) copy(newSlice, oldSlice) newSlice[len(oldSlice)] = other.value.(int64) return Value{kind: KindInt64Slice, value: newSlice}, nil case KindBoolSlice: oldSlice := v.value.([]bool) newSlice := make([]bool, len(oldSlice)+1) copy(newSlice, oldSlice) newSlice[len(oldSlice)] = other.value.(bool) return Value{kind: KindBoolSlice, value: newSlice}, nil case KindFloat64Slice: oldSlice := v.value.([]float64) newSlice := make([]float64, len(oldSlice)+1) copy(newSlice, oldSlice) newSlice[len(oldSlice)] = other.value.(float64) return Value{kind: KindFloat64Slice, value: newSlice}, nil } } // v is scalar, other is slice -> error (cannot append slice to scalar) if v.isScalar() && other.isSlice() { return Value{}, errors.New("cannot append slice to scalar") } // Both are slices -> error (not supported in current semantics) if v.isSlice() && other.isSlice() { return Value{}, errors.New("cannot append slice to slice") } return Value{}, errors.New("unsupported append operation") } // isScalar returns true if the value is a scalar type. func (v Value) isScalar() bool { switch v.kind { case KindString, KindInt64, KindBool, KindFloat64: return true default: return false } } // isSlice returns true if the value is a slice type. func (v Value) isSlice() bool { switch v.kind { case KindStringSlice, KindInt64Slice, KindBoolSlice, KindFloat64Slice: return true default: return false } } // sliceElementKind returns the Kind of slice elements. func (v Value) sliceElementKind() Kind { switch v.kind { case KindStringSlice: return KindString case KindInt64Slice: return KindInt64 case KindBoolSlice: return KindBool case KindFloat64Slice: return KindFloat64 default: return KindInvalid } } ================================================ FILE: modules/zeta/config/value_test.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package config import ( "testing" ) func TestFromAny(t *testing.T) { tests := []struct { name string input any wantKind Kind wantValue any wantError bool }{ // Scalar types { name: "string", input: "hello", wantKind: KindString, wantValue: "hello", }, { name: "int", input: 42, wantKind: KindInt64, wantValue: int64(42), }, { name: "int64", input: int64(123), wantKind: KindInt64, wantValue: int64(123), }, { name: "bool true", input: true, wantKind: KindBool, wantValue: true, }, { name: "bool false", input: false, wantKind: KindBool, wantValue: false, }, { name: "float64", input: 3.14, wantKind: KindFloat64, wantValue: 3.14, }, // Slice types { name: "[]string", input: []string{"a", "b", "c"}, wantKind: KindStringSlice, wantValue: []string{"a", "b", "c"}, }, { name: "[]int64", input: []int64{1, 2, 3}, wantKind: KindInt64Slice, wantValue: []int64{1, 2, 3}, }, { name: "[]bool", input: []bool{true, false}, wantKind: KindBoolSlice, wantValue: []bool{true, false}, }, { name: "[]float64", input: []float64{1.1, 2.2}, wantKind: KindFloat64Slice, wantValue: []float64{1.1, 2.2}, }, // []any same type { name: "[]any string", input: []any{"a", "b"}, wantKind: KindStringSlice, wantValue: []string{"a", "b"}, }, { name: "[]any int64", input: []any{int64(1), int64(2)}, wantKind: KindInt64Slice, wantValue: []int64{1, 2}, }, // Mixed type error { name: "[]any mixed types", input: []any{"a", 1}, wantError: true, }, // Unsupported type { name: "unsupported type", input: struct{}{}, wantError: true, }, // Empty []any should error { name: "empty []any", input: []any{}, wantError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { v, err := FromAny(tt.input) if tt.wantError { if err == nil { t.Errorf("FromAny(%v) expected error, got nil", tt.input) } return } if err != nil { t.Errorf("FromAny(%v) unexpected error: %v", tt.input, err) return } if v.Kind() != tt.wantKind { t.Errorf("FromAny(%v) kind = %v, want %v", tt.input, v.Kind(), tt.wantKind) } // Compare values got := v.ToAny() if !compareValues(got, tt.wantValue) { t.Errorf("FromAny(%v) = %v, want %v", tt.input, got, tt.wantValue) } }) } } func TestValueAppend(t *testing.T) { tests := []struct { name string v1 Value v2 Value wantKind Kind wantValue any wantError bool }{ // scalar + scalar -> slice { name: "string + string", v1: NewStringValue("a"), v2: NewStringValue("b"), wantKind: KindStringSlice, wantValue: []string{"a", "b"}, }, { name: "int64 + int64", v1: NewInt64Value(1), v2: NewInt64Value(2), wantKind: KindInt64Slice, wantValue: []int64{1, 2}, }, { name: "bool + bool", v1: NewBoolValue(true), v2: NewBoolValue(false), wantKind: KindBoolSlice, wantValue: []bool{true, false}, }, // slice + scalar -> slice { name: "[]string + string", v1: NewStringSliceValue([]string{"a", "b"}), v2: NewStringValue("c"), wantKind: KindStringSlice, wantValue: []string{"a", "b", "c"}, }, { name: "[]int64 + int64", v1: NewInt64SliceValue([]int64{1, 2}), v2: NewInt64Value(3), wantKind: KindInt64Slice, wantValue: []int64{1, 2, 3}, }, // Type mismatch error { name: "string + int64", v1: NewStringValue("a"), v2: NewInt64Value(1), wantError: true, }, { name: "[]string + int64", v1: NewStringSliceValue([]string{"a"}), v2: NewInt64Value(1), wantError: true, }, // slice + slice -> error { name: "[]string + []string", v1: NewStringSliceValue([]string{"a"}), v2: NewStringSliceValue([]string{"b"}), wantError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result, err := tt.v1.Append(tt.v2) if tt.wantError { if err == nil { t.Errorf("Append() expected error, got nil") } return } if err != nil { t.Errorf("Append() unexpected error: %v", err) return } if result.Kind() != tt.wantKind { t.Errorf("Append() kind = %v, want %v", result.Kind(), tt.wantKind) } got := result.ToAny() if !compareValues(got, tt.wantValue) { t.Errorf("Append() = %v, want %v", got, tt.wantValue) } }) } } func TestValueFirstAll(t *testing.T) { tests := []struct { name string value Value wantFirst any wantFirstOk bool wantAll []any }{ { name: "string scalar", value: NewStringValue("hello"), wantFirst: "hello", wantFirstOk: true, wantAll: []any{"hello"}, }, { name: "int64 scalar", value: NewInt64Value(42), wantFirst: int64(42), wantFirstOk: true, wantAll: []any{int64(42)}, }, { name: "[]string slice", value: NewStringSliceValue([]string{"a", "b", "c"}), wantFirst: "a", wantFirstOk: true, wantAll: []any{"a", "b", "c"}, }, { name: "[]int64 slice", value: NewInt64SliceValue([]int64{1, 2, 3}), wantFirst: int64(1), wantFirstOk: true, wantAll: []any{int64(1), int64(2), int64(3)}, }, { name: "empty slice", value: NewStringSliceValue([]string{}), wantFirst: nil, wantFirstOk: false, wantAll: []any{}, }, { name: "invalid value", value: Value{}, wantFirst: nil, wantFirstOk: false, wantAll: nil, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gotFirst, gotOk := tt.value.First() if gotOk != tt.wantFirstOk { t.Errorf("First() ok = %v, want %v", gotOk, tt.wantFirstOk) } if !compareValues(gotFirst, tt.wantFirst) { t.Errorf("First() = %v, want %v", gotFirst, tt.wantFirst) } gotAll := tt.value.All() if !compareSlices(gotAll, tt.wantAll) { t.Errorf("All() = %v, want %v", gotAll, tt.wantAll) } }) } } func TestValueSliceCopy(t *testing.T) { // Test that slice constructors copy the input slice original := []string{"a", "b", "c"} v := NewStringSliceValue(original) // Modify original original[0] = "modified" // Value should not be affected got := v.ToAny().([]string) if got[0] != "a" { t.Errorf("NewStringSliceValue did not copy: got %v, want 'a'", got[0]) } // Test that Append creates a new slice v1 := NewStringSliceValue([]string{"a", "b"}) v2 := NewStringValue("c") result, err := v1.Append(v2) if err != nil { t.Fatalf("Append() error: %v", err) } // Modify original v1's underlying slice v1Slice := v1.ToAny().([]string) v1Slice[0] = "modified" // Result should not be affected resultSlice := result.ToAny().([]string) if resultSlice[0] != "a" { t.Errorf("Append did not copy: got %v, want 'a'", resultSlice[0]) } } // Helper functions for comparison func compareValues(a, b any) bool { if a == nil && b == nil { return true } if a == nil || b == nil { return false } switch a := a.(type) { case []string: bb, ok := b.([]string) if !ok { return false } if len(a) != len(bb) { return false } for i := range a { if a[i] != bb[i] { return false } } return true case []int64: bb, ok := b.([]int64) if !ok { return false } if len(a) != len(bb) { return false } for i := range a { if a[i] != bb[i] { return false } } return true case []bool: bb, ok := b.([]bool) if !ok { return false } if len(a) != len(bb) { return false } for i := range a { if a[i] != bb[i] { return false } } return true case []float64: bb, ok := b.([]float64) if !ok { return false } if len(a) != len(bb) { return false } for i := range a { if a[i] != bb[i] { return false } } return true default: return a == b } } func compareSlices(a, b []any) bool { if a == nil && b == nil { return true } if a == nil || b == nil { return false } if len(a) != len(b) { return false } for i := range a { if !compareValues(a[i], b[i]) { return false } } return true } ================================================ FILE: modules/zeta/error.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package zeta import ( "errors" "fmt" ) var ( ErrUnsupportedCompressMethod = errors.New("unsupported compress method") ErrMistakeHashText = errors.New("mistake hash text") ErrUnsupportedObject = errors.New("unsupported object type") ErrMismatchedMagic = errors.New("mismatched magic") ErrMismatchedVersion = errors.New("mismatched version") ) type ErrMismatchedObject struct { Want string Got string } func (err *ErrMismatchedObject) Error() string { return fmt.Sprintf("mismatched object want '%s' got '%s'", err.Want, err.Got) } func IsErrMismatchedObject(err error) bool { var e *ErrMismatchedObject return errors.As(err, &e) } type ErrNotExist struct { T string OID string } func (err *ErrNotExist) Error() string { return fmt.Sprintf("%s '%s' not exist", err.T, err.OID) } func NewErrNotExist(t string, oid string) error { return &ErrNotExist{T: t, OID: oid} } func IsErrNotExist(err error) bool { if errors.Is(err, ErrMistakeHashText) { // NOT FOUND return true } var e *ErrNotExist return errors.As(err, &e) } type ErrStatusCode struct { Code int Message string } func (err *ErrStatusCode) Error() string { return err.Message } func IsErrStatusCode(err error) bool { var e *ErrStatusCode return errors.As(err, &e) } func NewErrStatusCode(statusCode int, format string, a ...any) error { return &ErrStatusCode{Code: statusCode, Message: fmt.Sprintf(format, a...)} } type ErrExitCode struct { Code int Message string } func (err *ErrExitCode) Error() string { return err.Message } func IsErrExitCode(err error) bool { var e *ErrExitCode return errors.As(err, &e) } ================================================ FILE: modules/zeta/object/blob.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package object import ( "bytes" "encoding/binary" "errors" "fmt" "io" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" ) type CompressMethod uint16 const ( BLOB_CURRENT_VERSION uint16 = 1 BLOB_CACHE_SIZE_LIMIT = 1024 * 1024 STORE CompressMethod = 0 ZSTD CompressMethod = 1 BROTLI CompressMethod = 2 DEFLATE CompressMethod = 3 XZ CompressMethod = 4 BZ2 CompressMethod = 5 ) var ( BLOB_MAGIC = [4]byte{'Z', 'B', 0x00, 0x01} BLANK_BLOB_BYTES = [16]byte{'Z', 'B', 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} ) var ( ErrMismatchedMagic = errors.New("mismatched magic") ErrMismatchedVersion = errors.New("mismatched version") ) type Blob struct { Contents io.Reader Size int64 closeFn func() error } func (b *Blob) Close() error { if b.closeFn == nil { return nil } return b.closeFn() } func NewBlob(raw io.ReadCloser) (*Blob, error) { var hdr [16]byte if _, err := io.ReadFull(raw, hdr[:]); err != nil { return nil, err } if !bytes.Equal(BLOB_MAGIC[:], hdr[:4]) { return nil, ErrMismatchedMagic } if version := binary.BigEndian.Uint16(hdr[4:6]); version != BLOB_CURRENT_VERSION { return nil, ErrMismatchedVersion } method := CompressMethod(binary.BigEndian.Uint16(hdr[6:8])) uncompressedSize := int64(binary.BigEndian.Uint64(hdr[8:16])) switch method { case STORE: return &Blob{Contents: raw, Size: uncompressedSize, closeFn: func() error { return raw.Close() }}, nil case ZSTD: zr, err := streamio.GetZstdReader(raw) if err != nil { return nil, fmt.Errorf("unable new zstd decoder: %w", err) } return &Blob{Contents: zr, Size: uncompressedSize, closeFn: func() error { streamio.PutZstdReader(zr) return raw.Close() }}, nil case DEFLATE: zr, err := streamio.GetZlibReader(raw) if err != nil { return nil, fmt.Errorf("unable new zlib decoder: %w", err) } return &Blob{Contents: zr.Reader, Size: uncompressedSize, closeFn: func() error { streamio.PutZlibReader(zr) return raw.Close() }}, nil } return nil, fmt.Errorf("unsupported method: '%d'", method) } func HashFrom(r io.Reader) (plumbing.Hash, error) { br, err := NewBlob(io.NopCloser(r)) if err != nil { return plumbing.ZeroHash, err } defer br.Close() // nolint hasher := plumbing.NewHasher() if _, err := io.Copy(hasher, br.Contents); err != nil { return plumbing.ZeroHash, err } return hasher.Sum(), nil } ================================================ FILE: modules/zeta/object/change.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "bytes" "context" "errors" "fmt" "strings" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/merkletrie" ) // Change values represent a detected change between two git trees. For // modifications, From is the original status of the node and To is its // final status. For insertions, From is the zero value and for // deletions To is the zero value. type Change struct { From ChangeEntry To ChangeEntry } var ( empty ChangeEntry ErrMalformedChange = errors.New("malformed change: empty from and to") ) func (c *Change) Name() string { return c.name() } // Action returns the kind of action represented by the change, an // insertion, a deletion or a modification. func (c *Change) Action() (merkletrie.Action, error) { if c.From.Equal(&empty) && c.To.Equal(&empty) { return merkletrie.Action(0), ErrMalformedChange } if c.From.Equal(&empty) { return merkletrie.Insert, nil } if c.To.Equal(&empty) { return merkletrie.Delete, nil } return merkletrie.Modify, nil } // Files returns the files before and after a change. // For insertions from will be nil. For deletions to will be nil. func (c *Change) Files() (from, to *File, err error) { action, err := c.Action() if err != nil { return } if action == merkletrie.Insert || action == merkletrie.Modify { if !c.To.TreeEntry.Mode.IsFile() { return nil, nil, nil } e := &c.To.TreeEntry to = newFile(e.Name, c.To.Name, e.Mode, e.Hash, e.Size, c.To.Tree.b) } if action == merkletrie.Delete || action == merkletrie.Modify { if !c.From.TreeEntry.Mode.IsFile() { return nil, nil, nil } e := &c.From.TreeEntry from = newFile(e.Name, c.From.Name, e.Mode, e.Hash, e.Size, c.From.Tree.b) } return } func (c *Change) String() string { action, err := c.Action() if err != nil { return "malformed change" } return fmt.Sprintf("", action, c.name()) } func (c *Change) name() string { if !c.From.Equal(&empty) { return c.From.Name } return c.To.Name } // ChangeEntry values represent a node that has suffered a change. type ChangeEntry struct { // Full path of the node using "/" as separator. Name string // Parent tree of the node that has changed. Tree *Tree // The entry of the node. TreeEntry TreeEntry } func (e *ChangeEntry) Equal(o *ChangeEntry) bool { return e.Name == o.Name && e.Tree.Equal(o.Tree) && e.TreeEntry.Equal(&o.TreeEntry) } // Changes represents a collection of changes between two git trees. // Implements sort.Interface lexicographically over the path of the // changed files. type Changes []*Change func (c Changes) Len() int { return len(c) } func (c Changes) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c Changes) Less(i, j int) bool { return strings.Compare(c[i].name(), c[j].name()) < 0 } func (c Changes) String() string { var buffer bytes.Buffer buffer.WriteString("[") comma := "" for _, v := range c { buffer.WriteString(comma) buffer.WriteString(v.String()) comma = ", " } buffer.WriteString("]") return buffer.String() } func (c Changes) Stats(ctx context.Context, opts *PatchOptions) (FileStats, error) { return getStatsContext(ctx, opts, c...) } // Patch returns a Patch with all the changes in chunks. This // representation can be used to create several diff outputs. func (c Changes) Patch(ctx context.Context, opts *PatchOptions) ([]*diferenco.Patch, error) { return getPatchContext(ctx, opts, c...) } ================================================ FILE: modules/zeta/object/change_adaptor.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "fmt" "github.com/antgroup/hugescm/modules/merkletrie" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // The following functions transform changes types form the merkletrie // package to changes types from this package. func newChange(c merkletrie.Change) (*Change, error) { ret := &Change{} var err error if ret.From, err = newChangeEntry(c.From); err != nil { return nil, fmt.Errorf("from field: %w", err) } if ret.To, err = newChangeEntry(c.To); err != nil { return nil, fmt.Errorf("to field: %w", err) } return ret, nil } func newChangeEntry(p noder.Path) (ChangeEntry, error) { if p == nil { return empty, nil } asTreeNoder, ok := p.Last().(*TreeNoder) if !ok { return ChangeEntry{}, errors.New("cannot transform non-TreeNoders") } return ChangeEntry{ Name: p.String(), Tree: asTreeNoder.parent, TreeEntry: TreeEntry{ Name: asTreeNoder.name, Size: asTreeNoder.size, Mode: asTreeNoder.TrueMode(), Hash: asTreeNoder.HashRaw(), }, }, nil } func newChanges(src merkletrie.Changes) (Changes, error) { ret := make(Changes, len(src)) var err error for i, e := range src { ret[i], err = newChange(e) if err != nil { return nil, fmt.Errorf("change #%d: %w", i, err) } } return ret, nil } ================================================ FILE: modules/zeta/object/commit.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "bytes" "context" "fmt" "io" "strconv" "strings" "time" "github.com/antgroup/hugescm/modules/merkletrie/noder" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" ) var ( COMMIT_MAGIC = [4]byte{'Z', 'C', 0x00, 0x01} ) // DateFormat is the format being used in the original git implementation const DateFormat = "Mon Jan 02 15:04:05 2006 -0700" const ( BlobInlineMaxBytes = 4096 ) type Signature struct { Name string `json:"name"` Email string `json:"email"` When time.Time `json:"when"` } var timeZoneLength = 5 func (s *Signature) decodeTimeAndTimeZone(b []byte) { space := bytes.IndexByte(b, ' ') if space == -1 { space = len(b) } ts, err := strconv.ParseInt(string(b[:space]), 10, 64) if err != nil { return } s.When = time.Unix(ts, 0).In(time.UTC) var tzStart = space + 1 if tzStart >= len(b) || tzStart+timeZoneLength > len(b) { return } timezone := string(b[tzStart : tzStart+timeZoneLength]) tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64) tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64) if err1 != nil || err2 != nil { return } if tzhours < 0 { tzmins *= -1 } tz := time.FixedZone("", int(tzhours*60*60+tzmins*60)) s.When = s.When.In(tz) } // Decode decodes a byte slice into a signature func (s *Signature) Decode(b []byte) { open := bytes.LastIndexByte(b, '<') closeIdx := bytes.LastIndexByte(b, '>') if open == -1 || closeIdx == -1 { return } if closeIdx < open { return } s.Name = string(bytes.Trim(b[:open], " ")) s.Email = string(b[open+1 : closeIdx]) hasTime := closeIdx+2 < len(b) if hasTime { s.decodeTimeAndTimeZone(b[closeIdx+2:]) } } const ( formatTimeZoneOnly = "-0700" ) // String implements the fmt.Stringer interface and formats a Signature as // expected in the Git commit internal object format. For instance: // // Taylor Blau 1494258422 -0600 func (s *Signature) String() string { at := s.When.Unix() zone := s.When.Format(formatTimeZoneOnly) return fmt.Sprintf("%s <%s> %d %s", s.Name, s.Email, at, zone) } // ExtraHeader encapsulates a key-value pairing of header key to header value. // It is stored as a struct{string, string} in memory as opposed to a // map[string]string to maintain ordering in a byte-for-byte encode/decode round // trip. type ExtraHeader struct { // K is the header key, or the first run of bytes up until a ' ' (\x20) // character. K string // V is the header value, or the remaining run of bytes in the line, // stripping off the above "K" field as a prefix. V string } type Commit struct { Hash plumbing.Hash `json:"hash"` // commit oid // Author is the Author this commit, or the original writer of the // contents. // // NOTE: this field is stored as a string to ensure any extra "cruft" // bytes are preserved through migration. Author Signature `json:"author"` // Committer is the individual or entity that added this commit to the // history. // // NOTE: this field is stored as a string to ensure any extra "cruft" // bytes are preserved through migration. Committer Signature `json:"committer"` // ParentIDs are the IDs of all parents for which this commit is a // linear child. Parents []plumbing.Hash `json:"parents"` // Tree is the root Tree associated with this commit. Tree plumbing.Hash `json:"tree"` // ExtraHeaders stores headers not listed above, for instance // "encoding", "gpgsig", or "mergetag" (among others). ExtraHeaders []*ExtraHeader `json:"-"` // Message is the commit message, including any signing information // associated with this commit. Message string `json:"message"` b Backend } func (c *Commit) Encode(w io.Writer) error { _, err := w.Write(COMMIT_MAGIC[:]) if err != nil { return err } if _, err = fmt.Fprintf(w, "tree %s\n", c.Tree.String()); err != nil { return err } for _, parent := range c.Parents { if _, err = fmt.Fprintf(w, "parent %s\n", parent.String()); err != nil { return err } } if _, err = fmt.Fprintf(w, "author %s\ncommitter %s\n", c.Author.String(), c.Committer.String()); err != nil { return err } for _, hdr := range c.ExtraHeaders { if _, err = fmt.Fprintf(w, "%s %s\n", hdr.K, strings.ReplaceAll(hdr.V, "\n", "\n ")); err != nil { return err } } // c.Message is built from messageParts in the Decode() function. // // Since each entry in messageParts _does not_ contain its trailing LF, // append an empty string to capture the final newline. if _, err = fmt.Fprintf(w, "\n%s", c.Message); err != nil { return err } return nil } func (c *Commit) Decode(reader Reader) error { if reader.Type() != CommitObject { return ErrUnsupportedObject } c.Hash = reader.Hash() r := streamio.GetBufioReader(reader) defer streamio.PutBufioReader(r) var message strings.Builder var finishedHeaders bool for { line, readErr := r.ReadString('\n') if readErr != nil && readErr != io.EOF { return readErr } text := strings.TrimSuffix(line, "\n") if len(text) == 0 && !finishedHeaders { finishedHeaders = true continue } if !finishedHeaders { // Check if this is a continuation line (starts with space) // Do this before strings.Cut to avoid unnecessary parsing if len(text) > 0 && text[0] == ' ' && len(c.ExtraHeaders) != 0 { last := c.ExtraHeaders[len(c.ExtraHeaders)-1] last.V += "\n" + text[1:] continue } key, value, ok := strings.Cut(text, " ") switch key { case "tree": if !ok || len(value) == 0 { continue } c.Tree = plumbing.NewHash(value) case "parent": if !ok || len(value) == 0 { continue } c.Parents = append(c.Parents, plumbing.NewHash(value)) case "author": if !ok || len(value) == 0 { continue } c.Author.Decode([]byte(value)) case "committer": if !ok || len(value) == 0 { continue } c.Committer.Decode([]byte(value)) default: // Skip malformed header lines (no space separator) or empty key if !ok || len(key) == 0 { continue } // New header c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{ K: key, V: value, }) } } else { _, _ = message.WriteString(line) } if readErr == io.EOF { break } } c.Message = message.String() return nil } // Less defines a compare function to determine which commit is 'earlier' by: // - First use Committer.When // - If Committer.When are equal then use Author.When // - If Author.When also equal then compare the string value of the hash func (c *Commit) Less(rhs *Commit) bool { return c.Committer.When.Before(rhs.Committer.When) || (c.Committer.When.Equal(rhs.Committer.When) && (c.Author.When.Before(rhs.Author.When) || (c.Author.When.Equal(rhs.Author.When) && bytes.Compare(c.Hash[:], rhs.Hash[:]) < 0))) } func indent(t string) string { var output []string for line := range strings.SplitSeq(t, "\n") { if len(line) != 0 { line = " " + line } output = append(output, line) } return strings.Join(output, "\n") } func (c *Commit) String() string { return fmt.Sprintf( "%s %s\nAuthor: %s\nDate: %s\n\n%s\n", CommitObject, c.Hash, c.Author.String(), c.Author.When.Format(DateFormat), indent(c.Message), ) } func (c *Commit) Subject() string { if i := strings.IndexAny(c.Message, "\r\n"); i != -1 { return c.Message[0:i] } return c.Message } // Root returns the Tree from the commit. func (c *Commit) Root(ctx context.Context) (*Tree, error) { return resolveTree(ctx, c.b, c.Tree) } // File returns the file with the specified "path" in the commit and a // nil error if the file exists. If the file does not exist, it returns // a nil file and the ErrFileNotFound error. func (c *Commit) File(ctx context.Context, path string) (*File, error) { tree, err := c.Root(ctx) if err != nil { return nil, err } return tree.File(ctx, path) } // StatsContext returns the stats of a commit. Error will be return if context // expires. Provided context must be non-nil. func (c *Commit) StatsContext(ctx context.Context, m noder.Matcher, opts *PatchOptions) (FileStats, error) { from, err := c.Root(ctx) if err != nil { return nil, err } to := &Tree{} if len(c.Parents) != 0 { firstParent, err := c.b.Commit(ctx, c.Parents[0]) if err != nil { return nil, err } to, err = firstParent.Root(ctx) if err != nil { return nil, err } } return to.StatsContext(ctx, from, m, opts) } // CommitIter is a generic closable interface for iterating over commits. type CommitIter interface { Next(context.Context) (*Commit, error) ForEach(context.Context, func(*Commit) error) error Close() } // Parents return a CommitIter to the parent Commits. func (c *Commit) MakeParents() CommitIter { return NewCommitIter(c.b, c.Parents) } // NumParents returns the number of parents in a commit. func (c *Commit) NumParents() int { return len(c.Parents) } // GetCommit gets a commit from an object storer and decodes it. func GetCommit(ctx context.Context, b Backend, oid plumbing.Hash) (*Commit, error) { return b.Commit(ctx, oid) } ================================================ FILE: modules/zeta/object/commit_test.go ================================================ package object import ( "bytes" "fmt" "io" "os" "strings" "testing" "time" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" "github.com/emirpasic/gods/trees/binaryheap" ) func TestCommitCompress(t *testing.T) { s := Signature{ Name: "Linus Dev", Email: "linux@dev.io", When: time.Now(), } cc := &Commit{ Author: s, Committer: s, Tree: plumbing.NewHash("04ca0feb68cb19158e0078227a989798600a0701b8d729f2edb09b5dcdbc79ac"), Message: `To list information about all objects in a bucket, you must have the oss:ListObjects permission. The user metadata of objects is not returned for ListObjectsV2 (GetBucketV2) requests. If you have enabled Logging and Real-time log query, the operation field in the access logs generated by calling the ListObjects (GetBucket) operation is GetBucket. You are charged based on the number of PUT requests when you call the ListObjectsV2 (GetBucketV2) operation. For more information, see PUT requests.`, } var b bytes.Buffer _ = cc.Encode(&b) var zb bytes.Buffer zw := streamio.GetZstdWriter(&zb) defer streamio.PutZstdWriter(zw) n, _ := io.Copy(zw, bytes.NewReader(b.Bytes())) _ = zw.Close() fmt.Fprintf(os.Stderr, "%d --> %d | %d\n", b.Len(), zb.Len(), n) zr, err := streamio.GetZstdReader(bytes.NewReader(zb.Bytes())) if err != nil { return } defer streamio.PutZstdReader(zr) var zbb bytes.Buffer K, err := io.Copy(&zbb, zr) if err != nil { return } fmt.Fprintf(os.Stderr, "%d, %d\n", zbb.Len(), K) } func TestCommitDecode(t *testing.T) { s := Signature{ Name: "Linus Dev", Email: "linux@dev.io", When: time.Now(), } cc := &Commit{ Author: s, Committer: s, Tree: plumbing.NewHash("04ca0feb68cb19158e0078227a989798600a0701b8d729f2edb09b5dcdbc79ac"), Message: `To list information about all objects in a bucket, you must have the oss:ListObjects permission. The user metadata of objects is not returned for ListObjectsV2 (GetBucketV2) requests. If you have enabled Logging and Real-time log query, the operation field in the access logs generated by calling the ListObjects (GetBucket) operation is GetBucket. You are charged based on the number of PUT requests when you call the ListObjectsV2 (GetBucketV2) operation. For more information, see PUT requests.`, } var b bytes.Buffer h := plumbing.NewHasher() _ = cc.Encode(io.MultiWriter(&b, h)) oid := h.Sum() var zb bytes.Buffer zw := streamio.GetZstdWriter(&zb) defer streamio.PutZstdWriter(zw) _, _ = io.Copy(zw, bytes.NewReader(b.Bytes())) _ = zw.Close() a, err := Decode(bytes.NewReader(zb.Bytes()), oid, nil) if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) return } fmt.Fprintf(os.Stderr, "%v\n", a) } func TestSignature(t *testing.T) { s := "ZETA 1706262944 +0800" var signature Signature signature.Decode([]byte(s)) fmt.Fprintf(os.Stderr, "%v\n", signature) } func TestBinaryHeap(t *testing.T) { numbers := []int{1, 2, 3, 5, 6, 128, 8, 4, 7} p := binaryheap.NewWithIntComparator() for _, i := range numbers { p.Push(i) } for { v, ok := p.Pop() if !ok { break } fmt.Fprintf(os.Stderr, "%v\n", v) } } // TestCommitDecodeWithMultipleParents tests decoding a commit with multiple parents func TestCommitDecodeWithMultipleParents(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2 parent b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3 parent c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4 author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 test message` // Create a reader that implements the Reader interface r := strings.NewReader(input) reader := &testReader{ Reader: r, hash: plumbing.NewHash("test"), objType: CommitObject, } commit := new(Commit) err := commit.Decode(reader) if err != nil { t.Fatalf("Decode error: %v", err) } if len(commit.Parents) != 3 { t.Errorf("Expected 3 parents, got %d", len(commit.Parents)) } } // TestCommitDecodeWithSpecialCharacters tests decoding a commit with special characters func TestCommitDecodeWithSpecialCharacters(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb author 张三 1337892984 +0800 committer 张三 1337892984 +0800 custom value with spaces & special!@#$%^&*()_+-=[]{}|;':",./<>? test message with 中文 and 日本語` // Create a reader that implements the Reader interface r := strings.NewReader(input) reader := &testReader{ Reader: r, hash: plumbing.NewHash("test"), objType: CommitObject, } commit := new(Commit) err := commit.Decode(reader) if err != nil { t.Fatalf("Decode error: %v", err) } if !strings.Contains(commit.Author.String(), "张三") { t.Error("Expected author to contain 张三") } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected 1 extra header, got %d", len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "custom" { t.Errorf("Expected key 'custom', got %s", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "value with spaces & special!@#$%^&*()_+-=[]{}|;':\",./<>?" { t.Errorf("Unexpected extra header value") } if !strings.Contains(commit.Message, "中文") { t.Error("Expected message to contain 中文") } if !strings.Contains(commit.Message, "日本語") { t.Error("Expected message to contain 日本語") } } // TestCommitDecodeWithExtraHeaderBeforeStandard tests decoding a commit with extra header before standard headers func TestCommitDecodeWithExtraHeaderBeforeStandard(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb custom extra header before standard author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 test message` // Create a reader that implements the Reader interface r := strings.NewReader(input) reader := &testReader{ Reader: r, hash: plumbing.NewHash("test"), objType: CommitObject, } commit := new(Commit) err := commit.Decode(reader) if err != nil { t.Fatalf("Decode error: %v", err) } if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected 1 extra header, got %d", len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "custom" { t.Errorf("Expected key 'custom', got %s", commit.ExtraHeaders[0].K) } if commit.ExtraHeaders[0].V != "extra header before standard" { t.Errorf("Expected 'extra header before standard', got %s", commit.ExtraHeaders[0].V) } } // TestCommitDecodeWithComplexHeaders tests decoding a commit with complex multi-line headers func TestCommitDecodeWithComplexHeaders(t *testing.T) { input := `tree e8ad84c41c2acde27c77fa212b8865cd3acfe6fb parent b343c8beec664ef6f0e9964d3001c7c7966331ae author Pat Doe 1337892984 -0700 committer Pat Doe 1337892984 -0700 mergetag object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd type commit tag random tagger J. Roe 1337889148 -0600 Random changes` // Create a reader that implements the Reader interface r := strings.NewReader(input) reader := &testReader{ Reader: r, hash: plumbing.NewHash("test"), objType: CommitObject, } commit := new(Commit) err := commit.Decode(reader) if err != nil { t.Fatalf("Decode error: %v", err) } // Verify ExtraHeaders if len(commit.ExtraHeaders) != 1 { t.Errorf("Expected 1 extra header, got %d", len(commit.ExtraHeaders)) } if commit.ExtraHeaders[0].K != "mergetag" { t.Errorf("Expected key 'mergetag', got %s", commit.ExtraHeaders[0].K) } if !strings.Contains(commit.ExtraHeaders[0].V, "object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd") { t.Error("Expected extra header to contain 'object 1e8a52e18cfb381bc9cc1f0b720540364d2a6edd'") } if !strings.Contains(commit.ExtraHeaders[0].V, "type commit") { t.Error("Expected extra header to contain 'type commit'") } if !strings.Contains(commit.ExtraHeaders[0].V, "tag random") { t.Error("Expected extra header to contain 'tag random'") } if !strings.Contains(commit.ExtraHeaders[0].V, "tagger J. Roe 1337889148 -0600") { t.Error("Expected extra header to contain 'tagger J. Roe 1337889148 -0600'") } } // testReader implements the Reader interface for testing purposes type testReader struct { io.Reader hash plumbing.Hash objType ObjectType } func (r *testReader) Hash() plumbing.Hash { return r.hash } func (r *testReader) Type() ObjectType { return r.objType } ================================================ FILE: modules/zeta/object/commit_walker.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "container/list" "context" "errors" "io" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/refs" ) // lookupIter implements CommitIter by looking up commits from a Backend // based on a predefined list of commit hashes. This is useful when you already // know the exact commit hashes you want to traverse and don't need to discover // the commit graph dynamically. type lookupIter struct { b Backend // Backend to fetch commits from series []plumbing.Hash // List of commit hashes to iterate over pos int // Current position in the series } // NewCommitIter creates a new CommitIter that iterates over commits with the // given hashes in the specified order. This is a simple iterator that directly // fetches commits from the backend without any graph traversal logic. // // Parameters: // - b: Backend to fetch commits from // - hashes: Ordered list of commit hashes to iterate over // // Returns: // - CommitIter that yields commits in the order provided func NewCommitIter(b Backend, hashes []plumbing.Hash) CommitIter { return &lookupIter{b: b, series: hashes} } // Next returns the next commit in the series. If all commits have been returned // or a commit cannot be found in the backend (ErrNoSuchObject), it returns io.EOF. // // This method is designed to be called repeatedly until io.EOF is returned, // indicating that there are no more commits to iterate over. // // Parameters: // - ctx: Context for cancellation and timeout // // Returns: // - *Commit: The next commit in the series // - error: io.EOF if no more commits, or an error if the commit cannot be fetched func (iter *lookupIter) Next(ctx context.Context) (*Commit, error) { if iter.pos >= len(iter.series) { return nil, io.EOF } oid := iter.series[iter.pos] cc, err := iter.b.Commit(ctx, oid) if plumbing.IsNoSuchObject(err) { // If the commit doesn't exist in the backend, treat it as EOF // This is important for shallow clone scenarios where some commits // may be missing return nil, io.EOF } if err == nil { iter.pos++ } return cc, err } // ForEach iterates over all commits in the series, calling the provided callback // function for each commit. The iteration stops when the callback returns an error // or when all commits have been processed. // // Special handling for error returns: // - plumbing.ErrStop: Stops iteration without error // - io.EOF: Marks the end of iteration, not an error // - Other errors: Stops iteration and returns the error // // Parameters: // - ctx: Context for cancellation and timeout // - cb: Callback function called for each commit // // Returns: // - error: Any error returned by the callback, or nil if iteration completes func (iter *lookupIter) ForEach(ctx context.Context, cb func(*Commit) error) error { defer iter.Close() for { cc, err := iter.Next(ctx) if err != nil { if errors.Is(err, io.EOF) { return nil } return err } if err := cb(cc); err != nil { if errors.Is(err, plumbing.ErrStop) { return nil } return err } } } // Close marks the iterator as closed by advancing the position to the end // of the series. After calling Close, subsequent calls to Next will return io.EOF. func (iter *lookupIter) Close() { iter.pos = len(iter.series) } // commitPreIterator implements CommitIter with pre-order traversal of the commit graph. // Pre-order means that a commit is visited before its parents. This iterator uses // a depth-first search (DFS) approach with an explicit stack to avoid recursion. // // Deduplication: Each commit is visited at most once using two seen maps: // - seen: Commits already visited by this iterator // - seenExternal: Commits already visited by other iterators (for complex traversals) // // Shallow clone support: Missing commits (ErrNoSuchObject) are handled gracefully, // allowing the traversal to continue with available commits. type commitPreIterator struct { seenExternal map[plumbing.Hash]bool // Commits seen by external iterators seen map[plumbing.Hash]bool // Commits already visited by this iterator stack []CommitIter // Stack for DFS traversal start *Commit // Starting commit to process first } // NewCommitPreorderIter creates a new CommitIter that walks the commit history // in pre-order (depth-first), starting at the given commit and visiting its parents. // // Pre-order traversal characteristics: // - Commits are visited before their parents // - Uses depth-first search with explicit stack // - Each commit is visited exactly once (deduplication) // - Handles missing commits gracefully (shallow clone support) // // Parameters: // - c: Starting commit for the traversal // - seenExternal: Map of commits already seen by other iterators (can be nil) // - ignore: List of commit hashes to skip during traversal // // Returns: // - CommitIter that yields commits in pre-order func NewCommitPreorderIter( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPreIterator{ seenExternal: seenExternal, seen: seen, stack: make([]CommitIter, 0), start: c, } } // Next returns the next commit in pre-order. This method implements depth-first // traversal using an explicit stack to avoid recursion. // // Algorithm: // 1. If this is the first call, return the start commit // 2. Pop the top iterator from the stack and get its next commit // 3. If the iterator is exhausted, pop it and continue // 4. If the commit has already been seen, skip it // 5. Mark the commit as seen and push its parents onto the stack // 6. Return the commit // // Parameters: // - ctx: Context for cancellation and timeout // // Returns: // - *Commit: The next commit in pre-order // - error: io.EOF if no more commits, or an error if traversal fails func (w *commitPreIterator) Next(ctx context.Context) (*Commit, error) { var c *Commit for { if w.start != nil { c = w.start w.start = nil } else { current := len(w.stack) - 1 if current < 0 { return nil, io.EOF } var err error c, err = w.stack[current].Next(ctx) if errors.Is(err, io.EOF) { w.stack = w.stack[:current] continue } if err != nil { return nil, err } } if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true if c.NumParents() > 0 { w.stack = append(w.stack, filteredParentIter(c, w.seen)) } return c, nil } } // filteredParentIter creates an iterator for a commit's parents, excluding any // commits that have already been seen. This is a key optimization for commit graph // traversal that prevents revisiting the same commit multiple times. // // This function is particularly important for merge commits, which have multiple // parents. By filtering out already-seen parents, we avoid redundant work and // ensure that each commit is visited exactly once. // // Parameters: // - c: The commit whose parents should be iterated // - seen: Map of commit hashes that have already been visited // // Returns: // - CommitIter that yields the commit's unseen parents func filteredParentIter(c *Commit, seen map[plumbing.Hash]bool) CommitIter { var hashes []plumbing.Hash for _, h := range c.Parents { if !seen[h] { hashes = append(hashes, h) } } return NewCommitIter(c.b, hashes) } // ForEach iterates over all commits reachable from the starting commit in pre-order, // calling the provided callback function for each commit. The iteration stops when // the callback returns an error or when all reachable commits have been processed. // // Special handling for error returns: // - plumbing.ErrStop: Stops iteration without error // - io.EOF: Marks the end of iteration, not an error // - Other errors: Stops iteration and returns the error // // Parameters: // - ctx: Context for cancellation and timeout // - cb: Callback function called for each commit // // Returns: // - error: Any error returned by the callback, or nil if iteration completes func (w *commitPreIterator) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } // Close is a no-op for commitPreIterator as it doesn't hold any external // resources that need to be explicitly cleaned up. func (w *commitPreIterator) Close() {} // commitPostIterator implements CommitIter with post-order traversal of the commit graph. // Post-order means that a commit is visited after all its descendants (parents in git's // terminology). This is useful when you want to see the history in chronological order, // where older commits are visited after newer commits. // // Post-order traversal characteristics: // - Commits are visited after their parents // - Uses depth-first search with explicit stack // - Each commit is visited exactly once (deduplication) // - Particularly useful for chronological history viewing type commitPostIterator struct { stack []*Commit // Stack for DFS traversal seen map[plumbing.Hash]bool // Commits already visited } // NewCommitPostorderIter creates a new CommitIter that walks the commit history // in post-order (depth-first), starting at the given commit. // // Post-order traversal characteristics: // - Commits are visited after their parents // - Useful for chronological history viewing (older commits after newer ones) // - Uses depth-first search with explicit stack // - Each commit is visited exactly once (deduplication) // // Example: // // For a commit graph: C3 <- C2 <- C1 // Pre-order visits: C3, C2, C1 // Post-order visits: C1, C2, C3 // // Parameters: // - c: Starting commit for the traversal // - ignore: List of commit hashes to skip during traversal // // Returns: // - CommitIter that yields commits in post-order func NewCommitPostorderIter(c *Commit, ignore []plumbing.Hash) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPostIterator{ stack: []*Commit{c}, seen: seen, } } func (w *commitPostIterator) Next(ctx context.Context) (*Commit, error) { for { if len(w.stack) == 0 { return nil, io.EOF } c := w.stack[len(w.stack)-1] w.stack = w.stack[:len(w.stack)-1] if w.seen[c.Hash] { continue } w.seen[c.Hash] = true return c, c.MakeParents().ForEach(ctx, func(p *Commit) error { w.stack = append(w.stack, p) return nil }) } } func (w *commitPostIterator) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } func (w *commitPostIterator) Close() {} // commitAllIterator stands for commit iterator for all refs. type commitAllIterator struct { // currCommit points to the current commit. currCommit *list.Element } // NewCommitAllIter returns a new commit iterator for all refs. // repoStorer is a repo Storer used to get commits and references. // commitIterFunc is a commit iterator function, used to iterate through ref commits in chosen order func NewCommitAllIter(ctx context.Context, rdb refs.Backend, odb Backend, commitIterFunc func(*Commit) CommitIter) (CommitIter, error) { commitsPath := list.New() commitsLookup := make(map[plumbing.Hash]*list.Element) head, err := refs.ReferenceResolve(rdb, plumbing.HEAD) if err == nil { err = addReference(ctx, odb, commitIterFunc, head, commitsPath, commitsLookup) } if err != nil && !errors.Is(err, plumbing.ErrReferenceNotFound) { return nil, err } // add all references along with the HEAD refIter, err := refs.NewReferenceIter(rdb) if err != nil { return nil, err } defer refIter.Close() for { ref, err := refIter.Next() if errors.Is(err, io.EOF) { break } if errors.Is(err, plumbing.ErrReferenceNotFound) { continue } if err != nil { return nil, err } if err = addReference(ctx, odb, commitIterFunc, ref, commitsPath, commitsLookup); err != nil { return nil, err } } return &commitAllIterator{commitsPath.Front()}, nil } func addReference( ctx context.Context, b Backend, commitIterFunc func(*Commit) CommitIter, ref *plumbing.Reference, commitsPath *list.List, commitsLookup map[plumbing.Hash]*list.Element) error { _, exists := commitsLookup[ref.Hash()] if exists { // we already have it - skip the reference. return nil } refCommit, _ := GetCommit(ctx, b, ref.Hash()) if refCommit == nil { // if it's not a commit - skip it. return nil } var ( refCommits []*Commit parent *list.Element ) // collect all ref commits to add commitIter := commitIterFunc(refCommit) for c, e := commitIter.Next(ctx); e == nil; { parent, exists = commitsLookup[c.Hash] if exists { break } refCommits = append(refCommits, c) c, e = commitIter.Next(ctx) } commitIter.Close() if parent == nil { // common parent - not found // add all commits to the path from this ref (maybe it's a HEAD and we don't have anything, yet) for _, c := range refCommits { parent = commitsPath.PushBack(c) commitsLookup[c.Hash] = parent } } else { // add ref's commits to the path in reverse order (from the latest) for i := len(refCommits) - 1; i >= 0; i-- { c := refCommits[i] // insert before found common parent parent = commitsPath.InsertBefore(c, parent) commitsLookup[c.Hash] = parent } } return nil } func (it *commitAllIterator) Next(ctx context.Context) (*Commit, error) { if it.currCommit == nil { return nil, io.EOF } c := it.currCommit.Value.(*Commit) it.currCommit = it.currCommit.Next() return c, nil } func (it *commitAllIterator) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := it.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } func (it *commitAllIterator) Close() { it.currCommit = nil } type commitPostIteratorFirstParent struct { stack []*Commit seen map[plumbing.Hash]bool } // NewCommitPostorderIterFirstParent returns a CommitIter that walks the commit // history like WalkCommitHistory but in post-order. // // This option gives a better overview when viewing the evolution of a particular // topic branch, because merges into a topic branch tend to be only about // adjusting to updated upstream from time to time, and this option allows // you to ignore the individual commits brought in to your history by such // a merge. // // Ignore allows to skip some commits from being iterated. func NewCommitPostorderIterFirstParent(c *Commit, ignore []plumbing.Hash) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPostIteratorFirstParent{ stack: []*Commit{c}, seen: seen, } } func (w *commitPostIteratorFirstParent) Next(ctx context.Context) (*Commit, error) { for { if len(w.stack) == 0 { return nil, io.EOF } c := w.stack[len(w.stack)-1] w.stack = w.stack[:len(w.stack)-1] if w.seen[c.Hash] { continue } w.seen[c.Hash] = true return c, c.MakeParents().ForEach(ctx, func(p *Commit) error { if len(c.Parents) > 0 && p.Hash == c.Parents[0] { w.stack = append(w.stack, p) } return nil }) } } func (w *commitPostIteratorFirstParent) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } func (w *commitPostIteratorFirstParent) Close() {} ================================================ FILE: modules/zeta/object/commit_walker_atime.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "context" "io" "github.com/antgroup/hugescm/modules/plumbing" "github.com/emirpasic/gods/trees/binaryheap" ) // commitIteratorByATime implements a commit walker that orders commits by author timestamp. // This is similar to CTime ordering but uses the author timestamp instead of committer timestamp. type commitIteratorByATime struct { // seenExternal contains commits that have been seen in other iterators and should be skipped seenExternal map[plumbing.Hash]bool // seen tracks commits that have already been processed to avoid duplicates seen map[plumbing.Hash]bool // heap is a max-heap ordered by author timestamp (newest first) heap *binaryheap.Heap } // NewCommitIterATime returns a CommitIter that walks the commit history, // starting at the given commit and visiting its parents while preserving Author Time order. // This orders commits by the author's timestamp (when the commit was originally authored), // rather than the committer timestamp (when it was applied). // // The iterator will visit each commit only once. If the callback returns an error, // walking will stop and return the error. Missing commits (in shallow clones) are silently skipped. // // Parameters: // - c: The starting commit // - seenExternal: Commits already seen in other traversals // - ignore: List of commits to skip func NewCommitIterATime( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } // Create a max-heap ordered by author timestamp (newest first) heap := binaryheap.NewWith(func(a, b any) int { if a.(*Commit).Author.When.Before(b.(*Commit).Author.When) { return 1 } return -1 }) heap.Push(c) return &commitIteratorByATime{ seenExternal: seenExternal, seen: seen, heap: heap, } } // Next returns the next commit in author timestamp order (newest first). // It pops from the heap, marks the commit as seen, and pushes all unseen parents // to the heap. Missing commits (in shallow clones) are silently skipped. func (w *commitIteratorByATime) Next(ctx context.Context) (*Commit, error) { var c *Commit for { cIn, ok := w.heap.Pop() if !ok { return nil, io.EOF } c = cIn.(*Commit) // Skip commits that have already been seen if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true // Add all parent commits to the heap for later processing for _, h := range c.Parents { if w.seen[h] || w.seenExternal[h] { continue } pc, err := c.b.Commit(ctx, h) if plumbing.IsNoSuchObject(err) { // Skip missing commits in shallow clone scenarios continue } if err != nil { return nil, err } w.heap.Push(pc) } return c, nil } } // ForEach iterates through all commits in author timestamp order, calling the callback for each one. // Iteration stops if the callback returns an error or ErrStop. func (w *commitIteratorByATime) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } // Close is a no-op for the ATime iterator as it doesn't hold any external resources. func (w *commitIteratorByATime) Close() {} ================================================ FILE: modules/zeta/object/commit_walker_bfs.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "context" "io" "github.com/antgroup/hugescm/modules/plumbing" ) // bfsCommitIterator implements a breadth-first search (BFS) traversal of the commit graph. // It uses a queue to process commits level by level, visiting all commits at depth n // before moving to depth n+1. This is useful when you want to process commits in // chronological order (newest to oldest by generation). type bfsCommitIterator struct { // seenExternal contains commits that have been seen in other iterators and should be skipped seenExternal map[plumbing.Hash]bool // seen tracks commits that have already been processed to avoid duplicates seen map[plumbing.Hash]bool // queue holds the commits to be processed in BFS order (FIFO) queue []*Commit } // NewCommitIterBFS returns a CommitIter that walks the commit history, // starting at the given commit and visiting its parents in pre-order. // The given callback will be called for each visited commit. Each commit will // be visited only once. If the callback returns an error, walking will stop // and will return the error. Other errors might be returned if the history // cannot be traversed (e.g. missing objects). Ignore allows to skip some // commits from being iterated. func NewCommitIterBFS( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &bfsCommitIterator{ seenExternal: seenExternal, seen: seen, queue: []*Commit{c}, } } // appendHash adds a commit hash to the BFS queue if it hasn't been seen before. // If the commit is not found in the backend (shallow clone scenario), it's silently skipped. func (w *bfsCommitIterator) appendHash(ctx context.Context, b Backend, h plumbing.Hash) error { if w.seen[h] || w.seenExternal[h] { return nil } c, err := b.Commit(ctx, h) if err != nil { return err } w.queue = append(w.queue, c) return nil } // Next returns the next commit in BFS order. It processes commits by dequeueing // from the front of the queue and enqueuing all unseen parents at the back. // Missing commits (in shallow clones) are silently skipped. func (w *bfsCommitIterator) Next(ctx context.Context) (*Commit, error) { var c *Commit for { if len(w.queue) == 0 { return nil, io.EOF } c = w.queue[0] w.queue = w.queue[1:] if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true // Add all parent commits to the queue for later processing for _, h := range c.Parents { err := w.appendHash(ctx, c.b, h) if plumbing.IsNoSuchObject(err) { // Skip missing commits in shallow clone scenarios continue } if err != nil { return nil, err } } return c, nil } } // ForEach iterates through all commits in BFS order, calling the callback for each one. // Iteration stops if the callback returns an error or ErrStop. func (w *bfsCommitIterator) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } // Close is a no-op for the BFS iterator as it doesn't hold any external resources. func (w *bfsCommitIterator) Close() {} ================================================ FILE: modules/zeta/object/commit_walker_bfs_filtered.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "errors" "io" "github.com/antgroup/hugescm/modules/plumbing" ) // NewFilterCommitIter returns a CommitIter that walks the commit history, // starting at the passed commit and visiting its parents in Breadth-first order. // The commits returned by the CommitIter will validate the passed CommitFilter. // The history won't be traversed beyond a commit if isLimit is true for it. // Each commit will be visited only once. // If the commit history can not be traversed, or the Close() method is called, // the CommitIter won't return more commits. // If no isValid is passed, all ancestors of from commit will be valid. // If no isLimit is limit, all ancestors of all commits will be visited. func NewFilterCommitIter( from *Commit, isValid *CommitFilter, isLimit *CommitFilter, ) CommitIter { var validFilter CommitFilter if isValid == nil { validFilter = func(_ *Commit) bool { return true } } else { validFilter = *isValid } var limitFilter CommitFilter if isLimit == nil { limitFilter = func(_ *Commit) bool { return false } } else { limitFilter = *isLimit } return &filterCommitIter{ isValid: validFilter, isLimit: limitFilter, visited: map[plumbing.Hash]struct{}{}, queue: []*Commit{from}, } } // CommitFilter is a predicate function that determines whether a commit should be // included in iteration results. Returns true if the commit passes the filter. type CommitFilter func(*Commit) bool // filterCommitIter implements CommitIter with BFS traversal and custom filtering. // It supports two types of filters: // - isValid: Determines if a commit should be yielded to the caller // - isLimit: Determines if traversal should stop at a commit (don't visit its parents) // // This is used to implement commands like "git log --merges-only" or "git log --no-merges". type filterCommitIter struct { // isValid determines if a commit should be yielded to the caller isValid CommitFilter // isLimit determines if traversal should stop at a commit (don't visit parents) isLimit CommitFilter // visited tracks commits that have already been processed to avoid duplicates visited map[plumbing.Hash]struct{} // queue holds commits to be processed in BFS order (FIFO) queue []*Commit // lastErr stores the last error encountered during iteration lastErr error } // Next returns the next commit of the CommitIter. // It will return io.EOF if there are no more commits to visit, // or an error if the history could not be traversed. func (w *filterCommitIter) Next(ctx context.Context) (*Commit, error) { var commit *Commit var err error for { commit, err = w.popNewFromQueue() if err != nil { return nil, w.close(err) } w.visited[commit.Hash] = struct{}{} if !w.isLimit(commit) { err = w.addToQueue(ctx, commit.b, commit.Parents...) if err != nil { return nil, w.close(err) } } if w.isValid(commit) { return commit, nil } } } // ForEach runs the passed callback over each Commit returned by the CommitIter // until the callback returns an error or there is no more commits to traverse. func (w *filterCommitIter) ForEach(ctx context.Context, cb func(*Commit) error) error { for { commit, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } if err := cb(commit); errors.Is(err, plumbing.ErrStop) { break } else if err != nil { return err } } return nil } // Error returns the error that caused that the CommitIter is no longer returning commits func (w *filterCommitIter) Error() error { return w.lastErr } // Close cleans up the iterator's internal state, releasing references to commits // and filters. After calling Close, the iterator cannot be used further. func (w *filterCommitIter) Close() { w.visited = map[plumbing.Hash]struct{}{} w.queue = []*Commit{} w.isLimit = nil w.isValid = nil } // close is an internal helper that closes the iterator and records an error. // This is used when an error occurs during iteration. // // Parameters: // - err: The error to record // // Returns: // - error: The same error passed in func (w *filterCommitIter) close(err error) error { w.Close() w.lastErr = err return err } // popNewFromQueue removes and returns the first unvisited commit from the FIFO queue. // // This method implements the FIFO queue behavior for BFS traversal: // - Returns the first commit in the queue (oldest) // - Skips commits that have already been visited (deduplication) // - Returns io.EOF when the queue is empty // // Returns: // - *Commit: The first unvisited commit // - error: io.EOF if queue is empty, or the last error if one occurred func (w *filterCommitIter) popNewFromQueue() (*Commit, error) { var first *Commit for { if len(w.queue) == 0 { if w.lastErr != nil { return nil, w.lastErr } return nil, io.EOF } first = w.queue[0] w.queue = w.queue[1:] if _, ok := w.visited[first.Hash]; ok { continue } return first, nil } } // addToQueue adds the passed commits to the internal fifo queue if they weren't seen // or returns an error if the passed hashes could not be used to get valid commits // In shallow clone scenarios (where some commits are missing), missing commits are // skipped instead of returning an error, allowing the traversal to continue. func (w *filterCommitIter) addToQueue( ctx context.Context, b Backend, hashes ...plumbing.Hash, ) error { for _, hash := range hashes { if _, ok := w.visited[hash]; ok { continue } commit, err := b.Commit(ctx, hash) if plumbing.IsNoSuchObject(err) { // In shallow clone scenarios, missing commits are skipped // instead of returning an error. This allows the traversal // to continue with available commits. continue } if err != nil { return err } w.queue = append(w.queue, commit) } return nil } ================================================ FILE: modules/zeta/object/commit_walker_ctime.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "context" "io" "github.com/antgroup/hugescm/modules/plumbing" "github.com/emirpasic/gods/trees/binaryheap" ) // commitIteratorByCTime implements a commit walker that orders commits by committer timestamp. // This is the closest to "git log" default ordering, showing commits from newest to oldest. type commitIteratorByCTime struct { // seenExternal contains commits that have been seen in other iterators and should be skipped seenExternal map[plumbing.Hash]bool // seen tracks commits that have already been processed to avoid duplicates seen map[plumbing.Hash]bool // heap is a max-heap ordered by committer timestamp (newest first) heap *binaryheap.Heap } // NewCommitIterCTime returns a CommitIter that walks the commit history, // starting at the given commit and visiting its parents while preserving Committer Time order. // This appears to be the closest order to `git log` (newest commits first). // // The iterator will visit each commit only once. If the callback returns an error, // walking will stop and return the error. Missing commits (in shallow clones) are silently skipped. // // Parameters: // - c: The starting commit // - seenExternal: Commits already seen in other traversals // - ignore: List of commits to skip func NewCommitIterCTime( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } // Create a max-heap ordered by committer timestamp (newest first) heap := binaryheap.NewWith(func(a, b any) int { if a.(*Commit).Committer.When.Before(b.(*Commit).Committer.When) { return 1 } return -1 }) heap.Push(c) return &commitIteratorByCTime{ seenExternal: seenExternal, seen: seen, heap: heap, } } // Next returns the next commit in committer timestamp order (newest first). // It pops from the heap, marks the commit as seen, and pushes all unseen parents // to the heap. Missing commits (in shallow clones) are silently skipped. func (w *commitIteratorByCTime) Next(ctx context.Context) (*Commit, error) { var c *Commit for { cIn, ok := w.heap.Pop() if !ok { return nil, io.EOF } c = cIn.(*Commit) // Skip commits that have already been seen if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true // Add all parent commits to the heap for later processing for _, h := range c.Parents { if w.seen[h] || w.seenExternal[h] { continue } pc, err := c.b.Commit(ctx, h) if plumbing.IsNoSuchObject(err) { // Skip missing commits in shallow clone scenarios continue } if err != nil { return nil, err } w.heap.Push(pc) } return c, nil } } // ForEach iterates through all commits in committer timestamp order, calling the callback for each one. // Iteration stops if the callback returns an error or ErrStop. func (w *commitIteratorByCTime) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } // Close is a no-op for the CTime iterator as it doesn't hold any external resources. func (w *commitIteratorByCTime) Close() {} ================================================ FILE: modules/zeta/object/commit_walker_limit.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "errors" "io" "time" "github.com/antgroup/hugescm/modules/plumbing" ) // commitLimitIter implements a commit iterator that filters commits by time range. // This is similar to "git log --since=... --until=...". type commitLimitIter struct { // sourceIter is the underlying commit iterator providing commits sourceIter CommitIter // limitOptions contains the time range constraints for filtering commits limitOptions LogLimitOptions } // LogLimitOptions defines time-based filtering options for commit iteration. type LogLimitOptions struct { // Only include commits after this timestamp (inclusive) Since *time.Time // Only include commits before this timestamp (inclusive) Until *time.Time } // NewCommitLimitIterFromIter creates a new commit iterator that filters commits // by the specified time range. This is used to implement "git log --since=... --until=...". func NewCommitLimitIterFromIter(commitIter CommitIter, limitOptions LogLimitOptions) CommitIter { iterator := new(commitLimitIter) iterator.sourceIter = commitIter iterator.limitOptions = limitOptions return iterator } // Next returns the next commit that falls within the specified time range. // Commits outside the time range are silently skipped. func (c *commitLimitIter) Next(ctx context.Context) (*Commit, error) { for { commit, err := c.sourceIter.Next(ctx) if err != nil { return nil, err } // Skip commits before the Since time if c.limitOptions.Since != nil && commit.Committer.When.Before(*c.limitOptions.Since) { continue } // Skip commits after the Until time if c.limitOptions.Until != nil && commit.Committer.When.After(*c.limitOptions.Until) { continue } return commit, nil } } // ForEach iterates through all commits within the time range, calling the callback for each one. // Iteration stops if the callback returns an error or ErrStop. func (c *commitLimitIter) ForEach(ctx context.Context, cb func(*Commit) error) error { for { commit, nextErr := c.Next(ctx) if errors.Is(nextErr, io.EOF) { break } if nextErr != nil { return nextErr } err := cb(commit) if errors.Is(err, plumbing.ErrStop) { return nil } else if err != nil { return err } } return nil } // Close closes the underlying source iterator. func (c *commitLimitIter) Close() { c.sourceIter.Close() } ================================================ FILE: modules/zeta/object/commit_walker_path.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "errors" "io" "slices" "github.com/antgroup/hugescm/modules/plumbing" ) // commitPathIter implements a commit iterator that filters commits by file path. // It performs tree diffing between consecutive commits to find commits that modified // specific files matching a path filter. This is similar to "git log -- ". type commitPathIter struct { // pathFilter is a function that returns true for file paths we're interested in pathFilter func(string) bool // sourceIter is the underlying commit iterator providing commits in chronological order sourceIter CommitIter // currentCommit is the commit currently being processed currentCommit *Commit // checkParent if true, verifies that the parent commit is actually in the commit tree // This is used for "git log --all" to filter commits that are not ancestors checkParent bool } // NewCommitPathIterFromIter returns a commit iterator which performs diffTree between // successive trees returned from the commit iterator. The purpose of this is to find // the commits that explain how the files that match the path came to be. // // If checkParent is true, the function double checks if the potential parent (next commit in a path) // is one of the parents in the commit tree (used by "git log --all"). // // Parameters: // - pathFilter: A function that takes a file path and returns true if we want commits that modified it // - commitIter: The source commit iterator to filter // - checkParent: If true, verify parent relationship (for "git log --all") func NewCommitPathIterFromIter(pathFilter func(string) bool, commitIter CommitIter, checkParent bool) CommitIter { iterator := new(commitPathIter) iterator.sourceIter = commitIter iterator.pathFilter = pathFilter iterator.checkParent = checkParent return iterator } // NewCommitFileIterFromIter is kept for backward compatibility. // It creates a path iterator that filters for a single specific file. // Can be replaced with NewCommitPathIterFromIter. func NewCommitFileIterFromIter(fileName string, commitIter CommitIter, checkParent bool) CommitIter { return NewCommitPathIterFromIter( func(path string) bool { return path == fileName }, commitIter, checkParent, ) } func (c *commitPathIter) Next(ctx context.Context) (*Commit, error) { if c.currentCommit == nil { var err error c.currentCommit, err = c.sourceIter.Next(ctx) if err != nil { return nil, err } } commit, commitErr := c.getNextFileCommit(ctx) // Setting current-commit to nil to prevent unwanted states when errors are raised if commitErr != nil { c.currentCommit = nil } return commit, commitErr } func (c *commitPathIter) getNextFileCommit(ctx context.Context) (*Commit, error) { var parentTree, currentTree *Tree for { // Parent-commit can be nil if the current-commit is the initial commit parentCommit, parentCommitErr := c.sourceIter.Next(ctx) if parentCommitErr != nil { // If the parent-commit is beyond the initial commit, keep it nil if !errors.Is(parentCommitErr, io.EOF) { return nil, parentCommitErr } parentCommit = nil } if parentTree == nil { var currTreeErr error currentTree, currTreeErr = c.currentCommit.Root(ctx) if currTreeErr != nil { return nil, currTreeErr } } else { currentTree = parentTree parentTree = nil } if parentCommit != nil { var parentTreeErr error parentTree, parentTreeErr = parentCommit.Root(ctx) if parentTreeErr != nil { return nil, parentTreeErr } } // Find diff between current and parent trees changes, diffErr := DiffTreeContext(ctx, currentTree, parentTree, nil) if diffErr != nil { return nil, diffErr } // Check if any changes match our path filter found := c.hasFileChange(changes, parentCommit) // Save current commit for return, update for next iteration prevCommit := c.currentCommit c.currentCommit = parentCommit if found { return prevCommit, nil } // If no match and no more parent commits, we're done if parentCommit == nil { return nil, io.EOF } } } // hasFileChange checks if any of the changes match the path filter and, if checkParent is true, // verifies the parent relationship. func (c *commitPathIter) hasFileChange(changes Changes, parent *Commit) bool { for _, change := range changes { if !c.pathFilter(change.name()) { continue } // File path matches, now verify parent if needed if c.checkParent { // Check if parent is beyond the initial commit or is an actual parent if parent == nil || isParentHash(parent.Hash, c.currentCommit) { return true } continue } return true } return false } // isParentHash checks if the given hash is one of the commit's parent hashes. func isParentHash(hash plumbing.Hash, commit *Commit) bool { return slices.Contains(commit.Parents, hash) } // ForEach iterates through all commits that modified files matching the path filter, // calling the callback for each one. Iteration stops if the callback returns an error or ErrStop. func (c *commitPathIter) ForEach(ctx context.Context, cb func(*Commit) error) error { for { commit, nextErr := c.Next(ctx) if errors.Is(nextErr, io.EOF) { break } if nextErr != nil { return nextErr } err := cb(commit) if errors.Is(err, plumbing.ErrStop) { return nil } else if err != nil { return err } } return nil } // Close closes the underlying source iterator. func (c *commitPathIter) Close() { c.sourceIter.Close() } ================================================ FILE: modules/zeta/object/commit_walker_test.go ================================================ package object import ( "errors" "context" "io" "testing" "time" "github.com/antgroup/hugescm/modules/plumbing" ) // MockBackend is a test implementation of Backend interface for testing commit walkers type MockBackend struct { commits map[plumbing.Hash]*Commit } func NewMockBackend() *MockBackend { return &MockBackend{ commits: make(map[plumbing.Hash]*Commit), } } func (m *MockBackend) AddCommit(commit *Commit) { commit.b = m // Set the backend on the commit m.commits[commit.Hash] = commit } func (m *MockBackend) Commit(ctx context.Context, hash plumbing.Hash) (*Commit, error) { c, ok := m.commits[hash] if !ok { return nil, plumbing.NoSuchObject(hash) } return c, nil } func (m *MockBackend) Tree(ctx context.Context, hash plumbing.Hash) (*Tree, error) { return nil, plumbing.NoSuchObject(hash) } func (m *MockBackend) Fragments(ctx context.Context, hash plumbing.Hash) (*Fragments, error) { return nil, plumbing.NoSuchObject(hash) } func (m *MockBackend) Tag(ctx context.Context, hash plumbing.Hash) (*Tag, error) { return nil, plumbing.NoSuchObject(hash) } func (m *MockBackend) Blob(ctx context.Context, hash plumbing.Hash) (*Blob, error) { return nil, plumbing.NoSuchObject(hash) } // NewTestCommit creates a test commit with the given parameters func NewTestCommit(hash string, message string, parents ...*Commit) *Commit { c := &Commit{ Hash: plumbing.NewHash(hash), Parents: make([]plumbing.Hash, len(parents)), Message: message, Author: Signature{Name: "Test Author", Email: "test@example.com", When: time.Now()}, Committer: Signature{Name: "Test Author", Email: "test@example.com", When: time.Now()}, } for i, p := range parents { c.Parents[i] = p.Hash } return c } // TestCommitPreorderIter tests basic preorder traversal of commits func TestCommitPreorderIter(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a simple commit graph: C3 <- C2 <- C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) iter := NewCommitPreorderIter(c3, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if len(commits) != 3 { t.Errorf("Expected 3 commits, got %d", len(commits)) } if commits[0].Message != "C3" { t.Errorf("Expected C3, got %s", commits[0].Message) } if commits[1].Message != "C2" { t.Errorf("Expected C2, got %s", commits[1].Message) } if commits[2].Message != "C1" { t.Errorf("Expected C1, got %s", commits[2].Message) } } // TestCommitPreorderIterWithMerge tests preorder traversal with merge commits func TestCommitPreorderIterWithMerge(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a merge commit graph: // M (merge) // / \ // C2 C3 // \ / // C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c1) m := NewTestCommit("4444444444444444444444444444444444444444", "M", c2, c3) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) backend.AddCommit(m) iter := NewCommitPreorderIter(m, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if len(commits) != 4 { t.Errorf("Expected 4 commits, got %d", len(commits)) } // Check if commits contain the expected values foundM := false foundC2 := false foundC3 := false foundC1 := false for _, c := range commits { if c == m { foundM = true } if c == c2 { foundC2 = true } if c == c3 { foundC3 = true } if c == c1 { foundC1 = true } } if !foundM { t.Error("Expected to find m in commits") } if !foundC2 { t.Error("Expected to find c2 in commits") } if !foundC3 { t.Error("Expected to find c3 in commits") } if !foundC1 { t.Error("Expected to find c1 in commits") } } // TestCommitPreorderIterDeduplication tests that commits are not visited twice func TestCommitPreorderIterDeduplication(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a diamond graph: // M // / \ // C2 C3 // \ / // C1 // C1 should be visited only once c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c1) m := NewTestCommit("4444444444444444444444444444444444444444", "M", c2, c3) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) backend.AddCommit(m) iter := NewCommitPreorderIter(m, nil, nil) defer iter.Close() var c1Count int err := iter.ForEach(ctx, func(commit *Commit) error { if commit.Hash == c1.Hash { c1Count++ } return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if c1Count != 1 { t.Errorf("Expected C1 to be visited exactly once, got %d", c1Count) } } // TestCommitBFSIter tests breadth-first search traversal func TestCommitBFSIter(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a linear graph: C4 <- C3 <- C2 <- C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) c4 := NewTestCommit("4444444444444444444444444444444444444444", "C4", c3) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) backend.AddCommit(c4) iter := NewCommitIterBFS(c4, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if len(commits) != 4 { t.Errorf("Expected 4 commits, got %d", len(commits)) } // BFS visits level by level if commits[0].Message != "C4" { t.Errorf("Expected C4, got %s", commits[0].Message) } if commits[1].Message != "C3" { t.Errorf("Expected C3, got %s", commits[1].Message) } if commits[2].Message != "C2" { t.Errorf("Expected C2, got %s", commits[2].Message) } if commits[3].Message != "C1" { t.Errorf("Expected C1, got %s", commits[3].Message) } } // TestCommitBFSIterWithMerge tests BFS traversal with merge commits func TestCommitBFSIterWithMerge(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a merge commit graph: // M // / \ // C2 C3 // \ / // C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c1) m := NewTestCommit("4444444444444444444444444444444444444444", "M", c2, c3) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) backend.AddCommit(m) iter := NewCommitIterBFS(m, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if len(commits) != 4 { t.Errorf("Expected 4 commits, got %d", len(commits)) } // Check if commits contain expected values foundM := false foundC2 := false foundC3 := false foundC1 := false for _, c := range commits { if c == m { foundM = true } if c == c2 { foundC2 = true } if c == c3 { foundC3 = true } if c == c1 { foundC1 = true } } if !foundM { t.Error("Expected to find m in commits") } if !foundC2 { t.Error("Expected to find c2 in commits") } if !foundC3 { t.Error("Expected to find c3 in commits") } if !foundC1 { t.Error("Expected to find c1 in commits") } } // TestCommitTopoOrderIter tests topological order traversal func TestCommitTopoOrderIter(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a simple commit graph: C3 <- C2 <- C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) iter := NewCommitIterCTime(c3, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if len(commits) != 3 { t.Errorf("Expected 3 commits, got %d", len(commits)) } // Topological order should visit children before parents if commits[0].Message != "C3" { t.Errorf("Expected C3, got %s", commits[0].Message) } if commits[1].Message != "C2" { t.Errorf("Expected C2, got %s", commits[1].Message) } if commits[2].Message != "C1" { t.Errorf("Expected C1, got %s", commits[2].Message) } } // TestFilterCommitIter tests filtering commits during traversal func TestFilterCommitIter(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a linear graph: C4 <- C3 <- C2 <- C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) c4 := NewTestCommit("4444444444444444444444444444444444444444", "C4", c3) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) backend.AddCommit(c4) // Filter: only return commits with even message length (C2 and C4 have length 2) var isValid CommitFilter = func(c *Commit) bool { return len(c.Message)%2 == 0 } iter := NewFilterCommitIter(c4, &isValid, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } // Only C2 and C4 should be returned (both have length 2) // C1 and C3 have length 2 as well, but let's check actual values for _, c := range commits { if len(c.Message) != 2 { t.Errorf("Expected message length 2, got %d", len(c.Message)) } } // C2 and C4 are definitely in the list foundC2 := false foundC4 := false for _, c := range commits { if c == c2 { foundC2 = true } if c == c4 { foundC4 = true } } if !foundC2 { t.Error("Expected to find c2 in commits") } if !foundC4 { t.Error("Expected to find c4 in commits") } } // TestFilterCommitIterWithLimit tests limiting commit traversal func TestFilterCommitIterWithLimit(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a linear graph: C4 <- C3 <- C2 <- C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) c4 := NewTestCommit("4444444444444444444444444444444444444444", "C4", c3) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) backend.AddCommit(c4) // Limit: stop traversal at C2 (don't visit its parents) var isLimit CommitFilter = func(c *Commit) bool { return c.Hash == c2.Hash } iter := NewFilterCommitIter(c4, nil, &isLimit) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } // BFS order: C4, C3, C2 // C2 is a limit, so C1 should not be visited if len(commits) != 3 { t.Errorf("Expected 3 commits, got %d", len(commits)) } foundC4 := false foundC3 := false foundC2 := false foundC1 := false for _, c := range commits { if c == c4 { foundC4 = true } if c == c3 { foundC3 = true } if c == c2 { foundC2 = true } if c == c1 { foundC1 = true } } if !foundC4 { t.Error("Expected to find c4 in commits") } if !foundC3 { t.Error("Expected to find c3 in commits") } if !foundC2 { t.Error("Expected to find c2 in commits") } if foundC1 { t.Error("C1 should not be visited as it's beyond the limit") } } // TestCommitWalkerShallowClone tests that commit walkers handle missing commits gracefully // This is critical for zeta's default shallow clone behavior func TestCommitWalkerShallowClone(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create commits c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) // Simulate shallow clone: only C3 and C2 are available, C1 is missing backend.AddCommit(c2) backend.AddCommit(c3) // Test with FilterCommitIter iter := NewFilterCommitIter(c3, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("Should not error on missing commits in shallow clone: %v", err) } // Should traverse C3 and C2, skipping missing C1 gracefully if len(commits) != 2 { t.Errorf("Expected 2 commits, got %d", len(commits)) } foundC3 := false foundC2 := false for _, c := range commits { if c == c3 { foundC3 = true } if c == c2 { foundC2 = true } } if !foundC3 { t.Error("Expected to find c3 in commits") } if !foundC2 { t.Error("Expected to find c2 in commits") } } // TestCommitWalkerShallowCloneWithMerge tests shallow clone with merge commits func TestCommitWalkerShallowCloneWithMerge(t *testing.T) { ctx := t.Context() backend := NewMockBackend() // Create a merge commit graph: // M // / \ // C2 C3 // \ / // C1 c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c1) m := NewTestCommit("4444444444444444444444444444444444444444", "M", c2, c3) // Simulate shallow clone: only M and C2 are available, C3 and C1 are missing backend.AddCommit(m) backend.AddCommit(c2) // Test with FilterCommitIter iter := NewFilterCommitIter(m, nil, nil) defer iter.Close() var commits []*Commit err := iter.ForEach(ctx, func(commit *Commit) error { commits = append(commits, commit) return nil }) if err != nil { t.Fatalf("Should not error on missing commits in shallow clone: %v", err) } // Should traverse M and C2, skipping missing C3 and C1 gracefully if len(commits) != 2 { t.Errorf("Expected 2 commits, got %d", len(commits)) } foundM := false foundC2 := false for _, c := range commits { if c == m { foundM = true } if c == c2 { foundC2 = true } } if !foundM { t.Error("Expected to find m in commits") } if !foundC2 { t.Error("Expected to find c2 in commits") } } // TestCommitWalkerContextCancellation tests that walkers respect context cancellation func TestCommitWalkerContextCancellation(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) defer cancel() backend := NewMockBackend() // Create a long chain of commits var commits []*Commit for i := range 100 { hash := plumbing.NewHash(string(rune(0x11 + i))) c := NewTestCommit(hash.String(), "C"+string(rune('0'+i))) if len(commits) > 0 { c.Parents = []plumbing.Hash{commits[len(commits)-1].Hash} } commits = append(commits, c) backend.AddCommit(c) } // Start traversal iter := NewCommitPreorderIter(commits[len(commits)-1], nil, nil) defer iter.Close() // Cancel the context immediately cancel() // Try to iterate - should stop quickly or error count := 0 _ = iter.ForEach(ctx, func(commit *Commit) error { count++ return nil }) // Verify that iteration stopped (either immediately or after a few commits) // The exact behavior depends on the implementation if count >= 100 { t.Error("Should not process all commits after cancellation") } } // TestCommitIterForEachStop tests that ErrStop stops traversal func TestCommitIterForEachStop(t *testing.T) { ctx := t.Context() backend := NewMockBackend() c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) c3 := NewTestCommit("3333333333333333333333333333333333333333", "C3", c2) backend.AddCommit(c1) backend.AddCommit(c2) backend.AddCommit(c3) iter := NewCommitPreorderIter(c3, nil, nil) defer iter.Close() count := 0 err := iter.ForEach(ctx, func(commit *Commit) error { count++ // Stop after 2 commits if count == 2 { return plumbing.ErrStop } return nil }) if err != nil { t.Fatalf("ForEach error: %v", err) } if count != 2 { t.Errorf("Expected 2, got %d", count) } } // TestCommitIterNextDirectly tests calling Next() directly func TestCommitIterNextDirectly(t *testing.T) { ctx := t.Context() backend := NewMockBackend() c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") c2 := NewTestCommit("2222222222222222222222222222222222222222", "C2", c1) backend.AddCommit(c1) backend.AddCommit(c2) iter := NewCommitPreorderIter(c2, nil, nil) defer iter.Close() // First commit c, err := iter.Next(ctx) if err != nil { t.Fatalf("Next error: %v", err) } if c.Message != "C2" { t.Errorf("Expected C2, got %s", c.Message) } // Second commit c, err = iter.Next(ctx) if err != nil { t.Fatalf("Next error: %v", err) } if c.Message != "C1" { t.Errorf("Expected C1, got %s", c.Message) } // EOF c, err = iter.Next(ctx) if !errors.Is(err, io.EOF) { t.Errorf("Expected io.EOF, got %v", err) } if c != nil { t.Error("Expected nil commit") } } // TestCommitIterClose tests that Close() properly cleans up resources func TestCommitIterClose(t *testing.T) { ctx := t.Context() backend := NewMockBackend() c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") backend.AddCommit(c1) iter := NewCommitPreorderIter(c1, nil, nil) // Get a commit c, err := iter.Next(ctx) if err != nil { t.Fatalf("Next error: %v", err) } if c == nil { t.Fatal("Expected non-nil commit") } // Close the iterator iter.Close() // Try to get another commit after close c, err = iter.Next(ctx) if err == nil { t.Error("Expected error after close") } if c != nil { t.Error("Expected nil commit after close") } } // TestCommitWalkerErrorPropagation tests that errors are properly propagated func TestCommitWalkerErrorPropagation(t *testing.T) { ctx := t.Context() backend := NewMockBackend() c1 := NewTestCommit("1111111111111111111111111111111111111111", "C1") backend.AddCommit(c1) iter := NewCommitPreorderIter(c1, nil, nil) defer iter.Close() // Return an error from the callback expectedErr := io.EOF err := iter.ForEach(ctx, func(commit *Commit) error { return expectedErr }) if !errors.Is(err, expectedErr) { t.Errorf("Expected %v, got %v", expectedErr, err) } } ================================================ FILE: modules/zeta/object/commit_walker_topo_order.go ================================================ package object import ( "errors" "context" "io" "github.com/antgroup/hugescm/modules/plumbing" "github.com/emirpasic/gods/trees/binaryheap" ) // commitStacker is an interface for commit collection data structures used by // the topological order iterator. It provides basic stack/heap operations. type commitStacker interface { Push(c *Commit) Pop() (*Commit, bool) Peek() (*Commit, bool) Size() int } // commitStack implements a LIFO stack for commits. type commitStack struct { stack []*Commit } func (cs *commitStack) Push(c *Commit) { cs.stack = append(cs.stack, c) } // Pop removes and returns the most recently added commit from the stack. // Returns false if the stack is empty. func (cs *commitStack) Pop() (*Commit, bool) { if len(cs.stack) == 0 { return nil, false } c := cs.stack[len(cs.stack)-1] cs.stack = cs.stack[:len(cs.stack)-1] return c, true } // Peek returns the most recently added commit from the stack without removing it. // Returns false if the stack is empty. func (cs *commitStack) Peek() (*Commit, bool) { if len(cs.stack) == 0 { return nil, false } return cs.stack[len(cs.stack)-1], true } // Size returns the number of commits currently in the stack. func (cs *commitStack) Size() int { return len(cs.stack) } // commitHeap implements commitStacker using a binary heap (priority queue). // The heap is ordered by commit timestamp to ensure commits are visited // in chronological order. type commitHeap struct { *binaryheap.Heap } // Push adds a new commit to the heap. func (h *commitHeap) Push(c *Commit) { h.Heap.Push(c) } // Pop removes and returns the top element from the heap. // Returns false if the heap is empty. func (h *commitHeap) Pop() (*Commit, bool) { c, ok := h.Heap.Pop() if !ok { return nil, false } return c.(*Commit), true } // Peek returns the top element from the heap without removing it. // Returns false if the heap is empty. func (h *commitHeap) Peek() (*Commit, bool) { c, ok := h.Heap.Peek() if !ok { return nil, false } return c.(*Commit), true } // Size returns the number of elements in the heap. func (h *commitHeap) Size() int { return h.Heap.Size() } // composeIgnores combines the explicit ignore list with the seenExternal set // to create a unified map of commits to skip during traversal. func composeIgnores(ignore []plumbing.Hash, seenExternal map[plumbing.Hash]bool) map[plumbing.Hash]bool { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } for h := range seenExternal { seen[h] = true } return seen } // commitTopoOrderIterator implements topological sorting of commits in the commit graph. // It ensures that a commit is only visited after all commits that point to it have been // visited (i.e., parent commits are visited before their children). // This is the standard "git log --topo-order" behavior. type commitTopoOrderIterator struct { // explorerStack is a heap ordered by commit time, used to discover commits explorerStack commitStacker // visitStack is a LIFO stack that holds commits ready to be visited visitStack commitStacker // inCounts tracks how many unvisited children each commit has // A commit with inCount == 0 is ready to visit inCounts map[plumbing.Hash]int // seen tracks commits that should be skipped (ignore list or seenExternal) seen map[plumbing.Hash]bool } // NewCommitIterTopoOrder creates a new iterator that walks commits in topological order. // This means commits are output such that they appear in reverse chronological order, // but with a constraint that a commit appears before any of its descendants. // This is similar to "git log --topo-order". func NewCommitIterTopoOrder(c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash) *commitTopoOrderIterator { // Create a heap ordered by commit timestamp (newest first) heap := &commitHeap{ Heap: binaryheap.NewWith(func(a, b any) int { return b.(*Commit).Committer.When.Compare(a.(*Commit).Committer.When) }), } stack := &commitStack{ stack: make([]*Commit, 0, 8), } seen := composeIgnores(ignore, seenExternal) if !seen[c.Hash] { heap.Push(c) stack.Push(c) } return &commitTopoOrderIterator{ explorerStack: heap, visitStack: stack, inCounts: make(map[plumbing.Hash]int), seen: seen, } } // Next returns the next commit in topological order. // // Algorithm: // 1. Pop from visitStack until we find a commit with inCount == 0 // 2. Load the commit's parents (nil if missing in shallow clone) // 3. EXPLORE phase: Pop from explorerStack, increment inCounts for all parents // This counts how many unvisited children each parent has // 4. Decrement inCounts for the current commit's parents // If a parent's inCount reaches 0, it's ready to visit, so push to visitStack // // This ensures a commit is only visited after all commits pointing to it have been visited. func (w *commitTopoOrderIterator) Next(ctx context.Context) (*Commit, error) { var next *Commit // Step 1: Find a commit ready to visit (inCount == 0) for { var ok bool next, ok = w.visitStack.Pop() if !ok { return nil, io.EOF } if w.inCounts[next.Hash] == 0 { break } } // Step 2: Load parent commits (nil if missing in shallow clone) parents := make([]*Commit, 0, len(next.Parents)) for _, h := range next.Parents { pc, err := next.b.Commit(ctx, h) if plumbing.IsNoSuchObject(err) { parents = append(parents, nil) // Missing commit in shallow clone continue } if err != nil { return nil, err } parents = append(parents, pc) } // Step 3: EXPLORE phase - discover commits and count references // Pop commits from explorerStack until we're at the same level as next for { toExplore, ok := w.explorerStack.Peek() if !ok { break } if toExplore.Hash != next.Hash && w.explorerStack.Size() == 1 { break } w.explorerStack.Pop() // For each parent, increment inCount (counting how many children reference it) for _, h := range toExplore.Parents { if w.seen[h] { continue } w.inCounts[h]++ if w.inCounts[h] == 1 { // First time seeing this commit, add to explorerStack pc, err := toExplore.b.Commit(ctx, h) if plumbing.IsNoSuchObject(err) { // Skip missing commits in shallow clone continue } if err != nil { return nil, err } w.explorerStack.Push(pc) } } } // Step 4: Decrement inCounts for current commit's parents // If inCount reaches 0, the parent is ready to visit for i, h := range next.Parents { if w.seen[h] { continue } w.inCounts[h]-- if w.inCounts[h] == 0 { if pc := parents[i]; pc != nil { w.visitStack.Push(pc) } } } delete(w.inCounts, next.Hash) return next, nil } // ForEach iterates through all commits in topological order, calling the callback for each one. // Iteration stops if the callback returns an error or ErrStop. func (w *commitTopoOrderIterator) ForEach(ctx context.Context, cb func(*Commit) error) error { for { c, err := w.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if errors.Is(err, plumbing.ErrStop) { break } if err != nil { return err } } return nil } // Close is a no-op for the topological order iterator. func (w *commitTopoOrderIterator) Close() {} ================================================ FILE: modules/zeta/object/difftree.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "bytes" "context" "github.com/antgroup/hugescm/modules/merkletrie" "github.com/antgroup/hugescm/modules/merkletrie/noder" ) // DiffTree compares the content and mode of the blobs found via two // tree objects. // DiffTree does not perform rename detection, use DiffTreeWithOptions // instead to detect renames. func DiffTree(a, b *Tree, m noder.Matcher) (Changes, error) { return DiffTreeContext(context.Background(), a, b, m) } // DiffTreeContext compares the content and mode of the blobs found via two // tree objects. Provided context must be non-nil. // An error will be returned if context expires. func DiffTreeContext(ctx context.Context, a, b *Tree, m noder.Matcher) (Changes, error) { return DiffTreeWithOptions(ctx, a, b, nil, m) } // DiffTreeOptions are the configurable options when performing a diff tree. type DiffTreeOptions struct { // DetectRenames is whether the diff tree will use rename detection. DetectRenames bool // RenameScore is the threshold to of similarity between files to consider // that a pair of delete and insert are a rename. The number must be // exactly between 0 and 100. RenameScore uint // RenameLimit is the maximum amount of files that can be compared when // detecting renames. The number of comparisons that have to be performed // is equal to the number of deleted files * the number of added files. // That means, that if 100 files were deleted and 50 files were added, 5000 // file comparisons may be needed. So, if the rename limit is 50, the number // of both deleted and added needs to be equal or less than 50. // A value of 0 means no limit. RenameLimit uint // OnlyExactRenames performs only detection of exact renames and will not perform // any detection of renames based on file similarity. OnlyExactRenames bool } // DefaultDiffTreeOptions are the default and recommended options for the // diff tree. var DefaultDiffTreeOptions = &DiffTreeOptions{ DetectRenames: true, RenameScore: 60, RenameLimit: 0, OnlyExactRenames: false, } // DiffTreeWithOptions compares the content and mode of the blobs found // via two tree objects with the given options. The provided context // must be non-nil. // If no options are passed, no rename detection will be performed. The // recommended options are DefaultDiffTreeOptions. // An error will be returned if the context expires. // This function will be deprecated and removed in v6 so the default // behavior of DiffTree is to detect renames. func DiffTreeWithOptions( ctx context.Context, a, b *Tree, opts *DiffTreeOptions, m noder.Matcher, ) (Changes, error) { from := NewTreeRootNode(a, m, false) to := NewTreeRootNode(b, m, false) hashEqual := func(a, b noder.Hasher) bool { return bytes.Equal(a.Hash(), b.Hash()) } merkletrieChanges, err := merkletrie.DiffTreeContext(ctx, from, to, hashEqual) if err != nil { return nil, err } changes, err := newChanges(merkletrieChanges) if err != nil { return nil, err } if opts == nil { opts = new(DiffTreeOptions) } if opts.DetectRenames { return DetectRenames(ctx, changes, opts) } return changes, nil } ================================================ FILE: modules/zeta/object/file.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "bytes" "context" "io" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" "github.com/antgroup/hugescm/modules/streamio" ) type File struct { // Name is the path of the file. It might be relative to a tree, // depending of the function that generates it. Name string // path Path string // Mode is the file mode. Mode filemode.FileMode // Hash of the blob. Hash plumbing.Hash // Size of the (uncompressed) blob. Size int64 b Backend } func newFile(name string, p string, m filemode.FileMode, hash plumbing.Hash, size int64, b Backend) *File { return &File{Name: name, Path: p, Mode: m, Hash: hash, Size: size, b: b} } type readCloser struct { io.Reader io.Closer } func (f *File) IsFragments() bool { if f == nil { return false } return f.Mode.IsFragments() } func (f *File) asFile() *diferenco.File { if f == nil { return nil } return &diferenco.File{Name: f.Path, Hash: f.Hash.String(), Mode: uint32(f.Mode.Origin())} } // OriginReader return ReadCloser func (f *File) OriginReader(ctx context.Context) (io.ReadCloser, int64, error) { if f.b == nil { return nil, 0, io.ErrUnexpectedEOF } br, err := f.b.Blob(ctx, f.Hash) if err != nil { return nil, 0, err } return &readCloser{Reader: br.Contents, Closer: br}, br.Size, nil } const ( sniffLen = 8000 ) func (f *File) Reader(ctx context.Context) (io.ReadCloser, bool, error) { if f.b == nil { return nil, false, io.ErrUnexpectedEOF } br, err := f.b.Blob(ctx, f.Hash) if err != nil { return nil, false, err } sniffBytes, err := streamio.ReadMax(br.Contents, sniffLen) if err != nil { _ = br.Close() return nil, false, err } bin := bytes.IndexByte(sniffBytes, 0) != -1 return &readCloser{Reader: io.MultiReader(bytes.NewReader(sniffBytes), br.Contents), Closer: br}, bin, nil } func (f *File) UnifiedText(ctx context.Context, codecvt bool) (content string, err error) { if f == nil { // NO CONTENT DELETE OR NEWFILE return "", nil } r, _, err := f.OriginReader(ctx) if err != nil { return "", err } defer r.Close() // nolint content, _, err = diferenco.ReadUnifiedText(r, f.Size, codecvt) return content, err } // FileIter provides an iterator for the files in a tree. type FileIter struct { b Backend w *TreeWalker } // NewFileIter takes a Backend and a Tree and returns a // *FileIter that iterates over all files contained in the tree, recursively. func NewFileIter(b Backend, t *Tree) *FileIter { return &FileIter{b: b, w: NewTreeWalker(t, true, nil)} } // Next moves the iterator to the next file and returns a pointer to it. If // there are no more files, it returns io.EOF. func (iter *FileIter) Next(ctx context.Context) (*File, error) { for { name, entry, err := iter.w.Next(ctx) if err != nil { return nil, err } if entry.Mode == filemode.Dir || entry.Mode == filemode.Submodule || entry.IsFragments() { continue } return newFile(name, "", entry.Mode, entry.Hash, entry.Size, iter.b), nil } } // ForEach call the cb function for each file contained in this iter until // an error happens or the end of the iter is reached. If plumbing.ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *FileIter) ForEach(ctx context.Context, cb func(*File) error) error { defer iter.Close() for { f, err := iter.Next(ctx) if err != nil { if errors.Is(err, io.EOF) { return nil } return err } if err := cb(f); err != nil { if errors.Is(err, plumbing.ErrStop) { return nil } return err } } } func (iter *FileIter) Close() { iter.w.Close() } ================================================ FILE: modules/zeta/object/fragments.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "io" "sort" "github.com/antgroup/hugescm/modules/binary" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" ) var ( FRAGMENTS_MAGIC = [4]byte{'Z', 'F', 0x00, 0x01} ) type Fragment struct { Index uint32 `json:"index"` Size uint64 `json:"size"` Hash plumbing.Hash `json:"hash"` } type FragmentsOrder []*Fragment // Len implements sort.Interface.Len() and return the length of the underlying // slice. func (s FragmentsOrder) Len() int { return len(s) } // Swap implements sort.Interface.Swap() and swaps the two elements at i and j. func (s FragmentsOrder) Swap(i, j int) { s[i], s[j] = s[j], s[i] } // Less implements sort.Interface.Less() and returns whether the element at "i" // is compared as "less" than the element at "j". In other words, it returns if // the element at "i" should be sorted ahead of that at "j". // // It performs this comparison in lexicographic byte-order according to the // rules above (see FragmentsOrder). func (s FragmentsOrder) Less(i, j int) bool { return s[i].Index < s[j].Index } type Fragments struct { Hash plumbing.Hash // NOT Encode Size uint64 Origin plumbing.Hash // origin file hash checksum Entries []*Fragment b Backend } func (f *Fragments) Encode(w io.Writer) error { sort.Sort(FragmentsOrder(f.Entries)) // sort _, err := w.Write(FRAGMENTS_MAGIC[:]) if err != nil { return err } if err := binary.WriteUint64(w, f.Size); err != nil { return err } if _, err = w.Write(f.Origin[:]); err != nil { return err } for _, entry := range f.Entries { if err := binary.WriteUint32(w, entry.Index); err != nil { return err } if err := binary.WriteUint64(w, entry.Size); err != nil { return err } if _, err = w.Write(entry.Hash[:]); err != nil { return err } } return nil } func (f *Fragments) Decode(reader Reader) error { if reader.Type() != FragmentsObject { return ErrUnsupportedObject } f.Hash = reader.Hash() r := streamio.GetBufioReader(reader) defer streamio.PutBufioReader(r) f.Entries = nil var err error if f.Size, err = binary.ReadUint64(r); err != nil { return err } if _, err = io.ReadFull(r, f.Origin[:]); err != nil { return err } for { entry := new(Fragment) if entry.Index, err = binary.ReadUint32(r); err != nil { if errors.Is(err, io.EOF) { break } return err } if entry.Size, err = binary.ReadUint64(r); err != nil { return err } if _, err = io.ReadFull(r, entry.Hash[:]); err != nil { return err } f.Entries = append(f.Entries, entry) } return nil } ================================================ FILE: modules/zeta/object/merge_base.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "errors" "sort" "github.com/antgroup/hugescm/modules/plumbing" ) // errIsReachable is thrown when first commit is an ancestor of the second var errIsReachable = errors.New("first is reachable from second") // MergeBase mimics the behavior of `git merge-base actual other`, returning the // best common ancestor between the actual and the passed one. // The best common ancestors can not be reached from other common ancestors. func (c *Commit) MergeBase(ctx context.Context, other *Commit) ([]*Commit, error) { // use sortedByCommitDateDesc strategy sorted := sortByCommitDateDesc(c, other) newer := sorted[0] older := sorted[1] newerHistory, err := ancestorsIndex(ctx, older, newer) if errors.Is(err, errIsReachable) { return []*Commit{older}, nil } if err != nil { return nil, err } var res []*Commit inNewerHistory := isInIndexCommitFilter(newerHistory) resIter := NewFilterCommitIter(older, &inNewerHistory, &inNewerHistory) _ = resIter.ForEach(ctx, func(commit *Commit) error { res = append(res, commit) return nil }) return Independents(ctx, res) } // IsAncestor returns true if the actual commit is ancestor of the passed one. // It returns an error if the history is not traversable // It mimics the behavior of `zeta merge --is-ancestor actual other` func (c *Commit) IsAncestor(ctx context.Context, other *Commit) (bool, error) { found := false iter := NewCommitPreorderIter(other, nil, nil) err := iter.ForEach(ctx, func(comm *Commit) error { if comm.Hash != c.Hash { return nil } found = true return plumbing.ErrStop }) return found, err } // ancestorsIndex returns a map with the ancestors of the starting commit if the // excluded one is not one of them. It returns errIsReachable if the excluded commit // is ancestor of the starting, or another error if the history is not traversable. func ancestorsIndex(ctx context.Context, excluded, starting *Commit) (map[plumbing.Hash]bool, error) { if excluded.Hash.String() == starting.Hash.String() { return nil, errIsReachable } startingHistory := make(map[plumbing.Hash]bool) startingIter := NewCommitIterBFS(starting, nil, nil) err := startingIter.ForEach(ctx, func(commit *Commit) error { if commit.Hash == excluded.Hash { return errIsReachable } startingHistory[commit.Hash] = true return nil }) if err != nil { return nil, err } return startingHistory, nil } // Independents returns a subset of the passed commits, that are not reachable the others // It mimics the behavior of `git merge-base --independent commit...`. func Independents(ctx context.Context, commits []*Commit) ([]*Commit, error) { // use sortedByCommitDateDesc strategy candidates := sortByCommitDateDesc(commits...) candidates = removeDuplicated(candidates) seen := map[plumbing.Hash]struct{}{} var isLimit CommitFilter = func(commit *Commit) bool { _, ok := seen[commit.Hash] return ok } if len(candidates) < 2 { return candidates, nil } pos := 0 for { from := candidates[pos] others := remove(candidates, from) fromHistoryIter := NewFilterCommitIter(from, nil, &isLimit) err := fromHistoryIter.ForEach(ctx, func(fromAncestor *Commit) error { for _, other := range others { if fromAncestor.Hash == other.Hash { candidates = remove(candidates, other) others = remove(others, other) } } if len(candidates) == 1 { return plumbing.ErrStop } seen[fromAncestor.Hash] = struct{}{} return nil }) if err != nil { return nil, err } nextPos := indexOf(candidates, from) + 1 if nextPos >= len(candidates) { break } pos = nextPos } return candidates, nil } // sortByCommitDateDesc returns the passed commits, sorted by `committer.When desc` // // Following this strategy, it is tried to reduce the time needed when walking // the history from one commit to reach the others. It is assumed that ancestors // use to be committed before its descendant; // That way `Independents(A^, A)` will be processed as being `Independents(A, A^)`; // so starting by `A` it will be reached `A^` way sooner than walking from `A^` // to the initial commit, and then from `A` to `A^`. func sortByCommitDateDesc(commits ...*Commit) []*Commit { sorted := make([]*Commit, len(commits)) copy(sorted, commits) sort.Slice(sorted, func(i, j int) bool { return sorted[i].Committer.When.After(sorted[j].Committer.When) }) return sorted } // indexOf returns the first position where target was found in the passed commits func indexOf(commits []*Commit, target *Commit) int { for i, commit := range commits { if target.Hash == commit.Hash { return i } } return -1 } // remove returns the passed commits excluding the commit toDelete func remove(commits []*Commit, toDelete *Commit) []*Commit { res := make([]*Commit, len(commits)) j := 0 for _, commit := range commits { if commit.Hash == toDelete.Hash { continue } res[j] = commit j++ } return res[:j] } // removeDuplicated removes duplicated commits from the passed slice of commits func removeDuplicated(commits []*Commit) []*Commit { seen := make(map[plumbing.Hash]bool, len(commits)) res := make([]*Commit, len(commits)) j := 0 for _, commit := range commits { if _, ok := seen[commit.Hash]; ok { continue } seen[commit.Hash] = true res[j] = commit j++ } return res[:j] } // isInIndexCommitFilter returns a commitFilter that returns true // if the commit is in the passed index. func isInIndexCommitFilter(index map[plumbing.Hash]bool) CommitFilter { return func(c *Commit) bool { return index[c.Hash] } } ================================================ FILE: modules/zeta/object/object.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package object import ( "bytes" "encoding/base64" "encoding/binary" "encoding/json" "errors" "fmt" "io" "strings" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" "github.com/antgroup/hugescm/modules/strengthen" ) var ( ErrUnsupportedObject = errors.New("unsupported object type") ) type ObjectType int8 const ( InvalidObject ObjectType = 0 CommitObject ObjectType = 1 TreeObject ObjectType = 2 BlobObject ObjectType = 3 TagObject ObjectType = 4 // 5 reserved for future expansion OFSDeltaObject ObjectType = 6 REFDeltaObject ObjectType = 7 FragmentsObject ObjectType = 8 // File Fragmentation Extension AnyObject ObjectType = -127 ) func (t ObjectType) String() string { switch t { case CommitObject: return "commit" case TreeObject: return "tree" case BlobObject: return "blob" case TagObject: return "tag" case FragmentsObject: return "fragments" case AnyObject: return "any" case OFSDeltaObject: return "ofs-delta" case REFDeltaObject: return "ref-delta" default: return "unknown" } } // ObjectTypeFromString converts from a given string to an ObjectType // enumeration instance. func ObjectTypeFromString(s string) ObjectType { switch strings.ToLower(s) { case "blob": return BlobObject case "tree": return TreeObject case "commit": return CommitObject case "tag": return TagObject case "fragments": return FragmentsObject case "any": return AnyObject case "ofs-delta": return OFSDeltaObject case "ref-delta": return REFDeltaObject default: return InvalidObject } } func (t ObjectType) MarshalJSON() ([]byte, error) { return strengthen.BufferCat("\"", t.String(), "\""), nil } func (t *ObjectType) UnmarshalJSON(b []byte) error { var s string if err := json.Unmarshal(b, &s); err != nil { return err } *t = ObjectTypeFromString(s) return nil } type Reader interface { io.Reader Hash() plumbing.Hash Type() ObjectType } type reader struct { io.Reader hash plumbing.Hash objectType ObjectType } func (r *reader) Hash() plumbing.Hash { return r.hash } func (r *reader) Type() ObjectType { return r.objectType } const ( // ZSTD_MAGIC: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames ZSTD_MAGIC = 0xFD2FB528 ) func isZstdMagic(magic [4]byte) bool { return binary.LittleEndian.Uint32(magic[:]) == ZSTD_MAGIC } func Decode(r io.Reader, oid plumbing.Hash, b Backend) (any, error) { var magic [4]byte var err error if _, err = io.ReadFull(r, magic[:]); err != nil { return nil, err } if isZstdMagic(magic) { zr, err := streamio.GetZstdReader(io.MultiReader(bytes.NewReader(magic[:]), r)) if err != nil { return nil, err } defer streamio.PutZstdReader(zr) r = zr if _, err = io.ReadFull(r, magic[:]); err != nil { return nil, err } } if bytes.Equal(magic[:], COMMIT_MAGIC[:]) { c := &Commit{b: b} err = c.Decode(&reader{Reader: r, hash: oid, objectType: CommitObject}) return c, err } if bytes.Equal(magic[:], TREE_MAGIC[:]) { t := &Tree{b: b} err = t.Decode(&reader{Reader: r, hash: oid, objectType: TreeObject}) return t, err } if bytes.Equal(magic[:], FRAGMENTS_MAGIC[:]) { f := &Fragments{b: b} err = f.Decode(&reader{Reader: r, hash: oid, objectType: FragmentsObject}) return f, err } if bytes.Equal(magic[:], TAG_MAGIC[:]) { t := &Tag{} err = t.Decode(&reader{Reader: r, hash: oid, objectType: TagObject}) return t, err } return nil, ErrUnsupportedObject } func Base64Decode(input string, oid plumbing.Hash, b Backend) (any, error) { rawBytes, err := base64.StdEncoding.DecodeString(input) if err != nil { return nil, err } return Decode(bytes.NewReader(rawBytes), oid, b) } func Base64DecodeAs[T Commit | Tree | Fragments | Tag](input string, oid plumbing.Hash, b Backend) (*T, error) { rawBytes, err := base64.StdEncoding.DecodeString(input) if err != nil { return nil, err } a, err := Decode(bytes.NewReader(rawBytes), oid, b) if err != nil { return nil, err } if v, ok := a.(*T); ok { return v, nil } return nil, ErrUnsupportedObject } func HashObject(r io.Reader) (plumbing.Hash, ObjectType, error) { var magic [4]byte var err error if _, err = io.ReadFull(r, magic[:]); err != nil { return plumbing.ZeroHash, InvalidObject, err } if isZstdMagic(magic) { zr, err := streamio.GetZstdReader(io.MultiReader(bytes.NewReader(magic[:]), r)) if err != nil { return plumbing.ZeroHash, InvalidObject, err } defer streamio.PutZstdReader(zr) r = zr if _, err = io.ReadFull(r, magic[:]); err != nil { return plumbing.ZeroHash, InvalidObject, err } } var t ObjectType switch { case bytes.Equal(magic[:], TREE_MAGIC[:]): t = TreeObject case bytes.Equal(magic[:], COMMIT_MAGIC[:]): t = CommitObject case bytes.Equal(magic[:], FRAGMENTS_MAGIC[:]): t = FragmentsObject case bytes.Equal(magic[:], TAG_MAGIC[:]): t = TagObject default: return plumbing.ZeroHash, InvalidObject, fmt.Errorf("unsupported magic '%08x'", magic[:]) } hasher := plumbing.NewHasher() if _, err := io.Copy(hasher, io.MultiReader(bytes.NewReader(magic[:]), r)); err != nil { return plumbing.ZeroHash, InvalidObject, err } return hasher.Sum(), t, nil } type Encoder interface { Encode(io.Writer) error } func Base64Encode(e Encoder) (string, error) { var b bytes.Buffer if err := e.Encode(&b); err != nil { return "", err } return base64.StdEncoding.EncodeToString(b.Bytes()), nil } func Hash(e Encoder) plumbing.Hash { h := plumbing.NewHasher() if err := e.Encode(h); err != nil { return plumbing.ZeroHash } return h.Sum() } func NewSnapshotCommit(cc *Commit, b Backend) *Commit { return &Commit{ Hash: cc.Hash, Author: cc.Author, Committer: cc.Committer, Parents: cc.Parents, Tree: cc.Tree, ExtraHeaders: cc.ExtraHeaders, Message: cc.Message, b: b, } } func NewSnapshotTree(t *Tree, b Backend) *Tree { entries := make([]*TreeEntry, 0, len(t.Entries)) for _, e := range t.Entries { entries = append(entries, e.Clone()) } return &Tree{ Hash: t.Hash, Entries: entries, b: b, } } func NewEmptyTree(b Backend) *Tree { return &Tree{ Hash: plumbing.EmptyTree, b: b, } } ================================================ FILE: modules/zeta/object/patch.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "errors" "fmt" "io" "path" "strconv" "strings" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/plumbing" ) type PatchOptions struct { Algorithm diferenco.Algorithm Textconv bool Match func(string) bool } func sizeOverflow(f *File) bool { return f != nil && f.Size > diferenco.MAX_DIFF_SIZE } func PathRenameCombine(from, to string) string { fromPaths := strings.Split(from, "/") toPaths := strings.Split(to, "/") n := min(len(fromPaths), len(toPaths)) i := 0 for i < n && fromPaths[i] == toPaths[i] { i++ } if i == 0 { return fmt.Sprintf("%s => %s", from, to) } return fmt.Sprintf("%s/{%s => %s}", path.Join(fromPaths[0:i]...), path.Join(fromPaths[i:]...), path.Join(toPaths[i:]...)) } func fileStatName(from, to *File) string { if from == nil { // New File is created. return to.Path } if to == nil { // File is deleted. return from.Path } if from.Path != to.Path { // File is renamed. return PathRenameCombine(from.Path, to.Path) } return from.Path } func fileStatWithContext(ctx context.Context, opts *PatchOptions, c *Change) (*FileStat, error) { from, to, err := c.Files() if err != nil { return nil, err } if from == nil && to == nil { return nil, ErrMalformedChange } s := &FileStat{ Name: fileStatName(from, to), } if from.IsFragments() || to.IsFragments() { return s, nil } // --- check size limit if sizeOverflow(from) || sizeOverflow(to) { return s, nil } fromContent, err := from.UnifiedText(ctx, opts.Textconv) if plumbing.IsNoSuchObject(err) || errors.Is(err, diferenco.ErrBinaryData) { return s, nil } if err != nil { return nil, err } toContent, err := to.UnifiedText(ctx, opts.Textconv) if plumbing.IsNoSuchObject(err) || errors.Is(err, diferenco.ErrBinaryData) { return s, nil } if err != nil { return nil, err } stat, err := diferenco.Stat(ctx, &diferenco.Options{S1: fromContent, S2: toContent, A: opts.Algorithm}) if err != nil { return nil, err } s.Addition = stat.Addition s.Deletion = stat.Deletion return s, nil } func getStatsContext(ctx context.Context, opts *PatchOptions, changes ...*Change) ([]FileStat, error) { if opts.Match == nil { opts.Match = func(s string) bool { return true } } stats := make([]FileStat, 0, 100) for _, c := range changes { if !opts.Match(c.name()) { continue } s, err := fileStatWithContext(ctx, opts, c) if err != nil { return nil, err } stats = append(stats, *s) } return stats, nil } func filePatchWithContext(ctx context.Context, opts *PatchOptions, c *Change) (*diferenco.Patch, error) { from, to, err := c.Files() if err != nil { return nil, err } if from == nil && to == nil { return nil, ErrMalformedChange } if from.IsFragments() || to.IsFragments() { return &diferenco.Patch{From: from.asFile(), To: to.asFile(), IsFragments: true}, nil } // --- check size limit if sizeOverflow(from) || sizeOverflow(to) { return &diferenco.Patch{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } fromContent, err := from.UnifiedText(ctx, opts.Textconv) if plumbing.IsNoSuchObject(err) || errors.Is(err, diferenco.ErrBinaryData) { return &diferenco.Patch{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } if err != nil { return nil, err } toContent, err := to.UnifiedText(ctx, opts.Textconv) if plumbing.IsNoSuchObject(err) || errors.Is(err, diferenco.ErrBinaryData) { return &diferenco.Patch{From: from.asFile(), To: to.asFile(), IsBinary: true}, nil } if err != nil { return nil, err } return diferenco.Unified(ctx, &diferenco.Options{From: from.asFile(), To: to.asFile(), S1: fromContent, S2: toContent, A: opts.Algorithm}) } func getPatchContext(ctx context.Context, opts *PatchOptions, changes ...*Change) ([]*diferenco.Patch, error) { if opts.Match == nil { opts.Match = func(s string) bool { return true } } patch := make([]*diferenco.Patch, 0, len(changes)) for _, c := range changes { if !opts.Match(c.name()) { continue } p, err := filePatchWithContext(ctx, opts, c) if err != nil { return nil, err } patch = append(patch, p) } return patch, nil } // FileStat stores the status of changes in content of a file. type FileStat struct { Name string `json:"name"` Addition int `json:"addition"` Deletion int `json:"deletion"` } func (fs FileStat) String() string { var b strings.Builder StatsWriteTo(&b, []FileStat{fs}, false) return b.String() } // FileStats is a collection of FileStat. type FileStats []FileStat func (fileStats FileStats) String() string { var b strings.Builder StatsWriteTo(&b, fileStats, false) return b.String() } // StatsWriteTo prints the stats of changes in content of files. // Original implementation: https://github.com/git/git/blob/1a87c842ece327d03d08096395969aca5e0a6996/diff.c#L2615 // Parts of the output: // |<+++/---> // example: " main.go | 10 +++++++--- " func StatsWriteTo(w io.Writer, fileStats []FileStat, isColorSupported bool) { maxGraphWidth := uint(53) maxNameLen := 0 maxChangeLen := 0 scaleLinear := func(it, width, maxVal uint) uint { if it == 0 || maxVal == 0 { return 0 } return 1 + (it * (width - 1) / maxVal) } for _, fs := range fileStats { if len(fs.Name) > maxNameLen { maxNameLen = len(fs.Name) } changes := strconv.Itoa(fs.Addition + fs.Deletion) if len(changes) > maxChangeLen { maxChangeLen = len(changes) } } for _, fs := range fileStats { add := uint(fs.Addition) del := uint(fs.Deletion) np := maxNameLen - len(fs.Name) cp := maxChangeLen - len(strconv.Itoa(fs.Addition+fs.Deletion)) total := add + del if total > maxGraphWidth { add = scaleLinear(add, maxGraphWidth, total) del = scaleLinear(del, maxGraphWidth, total) } adds := strings.Repeat("+", int(add)) dels := strings.Repeat("-", int(del)) namePad := strings.Repeat(" ", np) changePad := strings.Repeat(" ", cp) if isColorSupported { _, _ = fmt.Fprintf(w, " %s%s | %s%d \x1b[32m%s\x1b[31m%s\x1b[0m\n", fs.Name, namePad, changePad, total, adds, dels) continue } _, _ = fmt.Fprintf(w, " %s%s | %s%d %s%s\n", fs.Name, namePad, changePad, total, adds, dels) } } ================================================ FILE: modules/zeta/object/patch_test.go ================================================ package object import ( "fmt" "os" "testing" ) func TestPathRenameCombine(t *testing.T) { pp := []struct { A, B string }{ { "a.txt", "b.txt", }, { "pkg/command/command_merge_file.go", "pkg/command/merge.go", }, { "pkg/command/command_merge_file.go", "pkg/merge.go", }, } for _, i := range pp { d := PathRenameCombine(i.A, i.B) fmt.Fprintf(os.Stderr, "%s => %s|%s\n", i.A, i.B, d) } } ================================================ FILE: modules/zeta/object/rename.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "errors" "io" "sort" "strings" "github.com/antgroup/hugescm/modules/merkletrie" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" ) // DetectRenames detects the renames in the given changes on two trees with // the given options. It will return the given changes grouping additions and // deletions into modifications when possible. // If options is nil, the default diff tree options will be used. func DetectRenames( ctx context.Context, changes Changes, opts *DiffTreeOptions, ) (Changes, error) { if opts == nil { opts = DefaultDiffTreeOptions } detector := &renameDetector{ renameScore: int(opts.RenameScore), renameLimit: int(opts.RenameLimit), onlyExact: opts.OnlyExactRenames, } for _, c := range changes { action, err := c.Action() if err != nil { return nil, err } switch action { case merkletrie.Insert: detector.added = append(detector.added, c) case merkletrie.Delete: detector.deleted = append(detector.deleted, c) default: detector.modified = append(detector.modified, c) } } return detector.detect(ctx) } // renameDetector will detect and resolve renames in a set of changes. // see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java type renameDetector struct { added []*Change deleted []*Change modified []*Change renameScore int renameLimit int onlyExact bool } // detectExactRenames detects matches files that were deleted with files that // were added where the hash is the same on both. If there are multiple targets // the one with the most similar path will be chosen as the rename and the // rest as either deletions or additions. func (d *renameDetector) detectExactRenames() { added := groupChangesByHash(d.added) deletes := groupChangesByHash(d.deleted) var uniqueAdds []*Change var nonUniqueAdds [][]*Change var addedLeft []*Change for _, cs := range added { if len(cs) == 1 { uniqueAdds = append(uniqueAdds, cs[0]) } else { nonUniqueAdds = append(nonUniqueAdds, cs) } } for _, c := range uniqueAdds { hash := changeHash(c) deleted := deletes[hash] if len(deleted) == 1 { if sameMode(c, deleted[0]) { d.modified = append(d.modified, &Change{From: deleted[0].From, To: c.To}) delete(deletes, hash) } else { addedLeft = append(addedLeft, c) } } else if len(deleted) > 1 { bestMatch := bestNameMatch(c, deleted) if bestMatch != nil && sameMode(c, bestMatch) { d.modified = append(d.modified, &Change{From: bestMatch.From, To: c.To}) delete(deletes, hash) var newDeletes = make([]*Change, 0, len(deleted)-1) for _, d := range deleted { if d != bestMatch { newDeletes = append(newDeletes, d) } } deletes[hash] = newDeletes } } else { addedLeft = append(addedLeft, c) } } for _, added := range nonUniqueAdds { hash := changeHash(added[0]) deleted := deletes[hash] if len(deleted) == 1 { deleted := deleted[0] bestMatch := bestNameMatch(deleted, added) if bestMatch != nil && sameMode(deleted, bestMatch) { d.modified = append(d.modified, &Change{From: deleted.From, To: bestMatch.To}) delete(deletes, hash) for _, c := range added { if c != bestMatch { addedLeft = append(addedLeft, c) } } } else { addedLeft = append(addedLeft, added...) } } else if len(deleted) > 1 { maxSize := len(deleted) * len(added) if d.renameLimit > 0 && d.renameLimit < maxSize { maxSize = d.renameLimit } matrix := make(similarityMatrix, 0, maxSize) for delIdx, del := range deleted { deletedName := changeName(del) for addIdx, add := range added { addedName := changeName(add) score := nameSimilarityScore(addedName, deletedName) matrix = append(matrix, similarityPair{added: addIdx, deleted: delIdx, score: score}) if len(matrix) >= maxSize { break } } if len(matrix) >= maxSize { break } } sort.Stable(matrix) usedAdds := make(map[*Change]struct{}) usedDeletes := make(map[*Change]struct{}) for i := len(matrix) - 1; i >= 0; i-- { del := deleted[matrix[i].deleted] add := added[matrix[i].added] if add == nil || del == nil { // it was already matched continue } usedAdds[add] = struct{}{} usedDeletes[del] = struct{}{} d.modified = append(d.modified, &Change{From: del.From, To: add.To}) added[matrix[i].added] = nil deleted[matrix[i].deleted] = nil } for _, c := range added { if _, ok := usedAdds[c]; !ok && c != nil { addedLeft = append(addedLeft, c) } } var newDeletes = make([]*Change, 0, len(deleted)-len(usedDeletes)) for _, c := range deleted { if _, ok := usedDeletes[c]; !ok && c != nil { newDeletes = append(newDeletes, c) } } deletes[hash] = newDeletes } else { addedLeft = append(addedLeft, added...) } } d.added = addedLeft d.deleted = nil for _, dels := range deletes { d.deleted = append(d.deleted, dels...) } } // detectContentRenames detects renames based on the similarity of the content // in the files by building a matrix of pairs between sources and destinations // and matching by the highest score. // see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java func (d *renameDetector) detectContentRenames(ctx context.Context) error { cnt := max(len(d.added), len(d.deleted)) if d.renameLimit > 0 && cnt > d.renameLimit { return nil } srcs, dsts := d.deleted, d.added matrix, err := buildSimilarityMatrix(ctx, srcs, dsts, d.renameScore) if err != nil { return err } renames := make([]*Change, 0, min(len(matrix), len(dsts))) // Match rename pairs on a first come, first serve basis until // we have looked at everything that is above the minimum score. for i := len(matrix) - 1; i >= 0; i-- { pair := matrix[i] src := srcs[pair.deleted] dst := dsts[pair.added] if dst == nil || src == nil { // It was already matched before continue } renames = append(renames, &Change{From: src.From, To: dst.To}) // Claim destination and source as matched dsts[pair.added] = nil srcs[pair.deleted] = nil } d.modified = append(d.modified, renames...) d.added = compactChanges(dsts) d.deleted = compactChanges(srcs) return nil } func (d *renameDetector) detect(ctx context.Context) (Changes, error) { if len(d.added) > 0 && len(d.deleted) > 0 { d.detectExactRenames() if !d.onlyExact { if err := d.detectContentRenames(ctx); err != nil { return nil, err } } } result := make(Changes, 0, len(d.added)+len(d.deleted)+len(d.modified)) result = append(result, d.added...) result = append(result, d.deleted...) result = append(result, d.modified...) sort.Stable(result) return result, nil } func bestNameMatch(change *Change, changes []*Change) *Change { var best *Change var bestScore int cname := changeName(change) for _, c := range changes { score := nameSimilarityScore(cname, changeName(c)) if score > bestScore { bestScore = score best = c } } return best } func nameSimilarityScore(a, b string) int { aDirLen := strings.LastIndexByte(a, '/') + 1 bDirLen := strings.LastIndexByte(b, '/') + 1 dirMin := min(aDirLen, bDirLen) dirMax := max(aDirLen, bDirLen) var dirScoreLtr, dirScoreRtl int if dirMax == 0 { dirScoreLtr = 100 dirScoreRtl = 100 } else { var dirSim int for ; dirSim < dirMin; dirSim++ { if a[dirSim] != b[dirSim] { break } } dirScoreLtr = dirSim * 100 / dirMax if dirScoreLtr == 100 { dirScoreRtl = 100 } else { for dirSim = range dirMin { if a[aDirLen-1-dirSim] != b[bDirLen-1-dirSim] { break } } dirScoreRtl = dirSim * 100 / dirMax } } fileMin := min(len(a)-aDirLen, len(b)-bDirLen) fileMax := max(len(a)-aDirLen, len(b)-bDirLen) fileSim := 0 for ; fileSim < fileMin; fileSim++ { if a[len(a)-1-fileSim] != b[len(b)-1-fileSim] { break } } fileScore := fileSim * 100 / fileMax return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100 } func changeName(c *Change) string { if !c.To.Equal(&empty) { return c.To.Name } return c.From.Name } func changeHash(c *Change) plumbing.Hash { if !c.To.Equal(&empty) { return c.To.TreeEntry.Hash } return c.From.TreeEntry.Hash } func changeMode(c *Change) filemode.FileMode { if !c.To.Equal(&empty) { return c.To.TreeEntry.Mode } return c.From.TreeEntry.Mode } func sameMode(a, b *Change) bool { return changeMode(a) == changeMode(b) } func groupChangesByHash(changes []*Change) map[plumbing.Hash][]*Change { var result = make(map[plumbing.Hash][]*Change) for _, c := range changes { hash := changeHash(c) result[hash] = append(result[hash], c) } return result } type similarityMatrix []similarityPair func (m similarityMatrix) Len() int { return len(m) } func (m similarityMatrix) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m similarityMatrix) Less(i, j int) bool { if m[i].score == m[j].score { if m[i].added == m[j].added { return m[i].deleted < m[j].deleted } return m[i].added < m[j].added } return m[i].score < m[j].score } type similarityPair struct { // index of the added file added int // index of the deleted file deleted int // similarity score score int } const maxMatrixSize = 10000 func buildSimilarityMatrix(ctx context.Context, srcs, dsts []*Change, renameScore int) (similarityMatrix, error) { // Allocate for the worst-case scenario where every pair has a score // that we need to consider. We might not need that many. matrixSize := min(len(srcs)*len(dsts), maxMatrixSize) matrix := make(similarityMatrix, 0, matrixSize) srcSizes := make([]int64, len(srcs)) dstSizes := make([]int64, len(dsts)) dstTooLarge := make(map[int]bool) // Consider each pair of files, if the score is above the minimum // threshold we need to record that scoring in the matrix so we can // later find the best matches. outerLoop: for srcIdx, src := range srcs { if changeMode(src) != filemode.Regular { continue } // Declare the from file and the similarity index here to be able to // reuse it inside the inner loop. The reason to not initialize them // here is so we can skip the initialization in case they happen to // not be needed later. They will be initialized inside the inner // loop if and only if they're needed and reused in subsequent passes. var from *File var s *similarityIndex var err error for dstIdx, dst := range dsts { if changeMode(dst) != filemode.Regular { continue } if dstTooLarge[dstIdx] { continue } var to *File srcSize := srcSizes[srcIdx] if srcSize == 0 { from, _, err = src.Files() if err != nil { return nil, err } srcSize = from.Size + 1 srcSizes[srcIdx] = srcSize } dstSize := dstSizes[dstIdx] if dstSize == 0 { _, to, err = dst.Files() if err != nil { return nil, err } dstSize = to.Size + 1 dstSizes[dstIdx] = dstSize } minVal, maxVal := srcSize, dstSize if dstSize < srcSize { minVal = dstSize maxVal = srcSize } if int(minVal*100/maxVal) < renameScore { // File sizes are too different to be a match continue } if s == nil { s, err = fileSimilarityIndex(ctx, from) if err != nil { if errors.Is(err, errIndexFull) { continue outerLoop } return nil, err } } if to == nil { _, to, err = dst.Files() if err != nil { return nil, err } } di, err := fileSimilarityIndex(ctx, to) if err != nil { if errors.Is(err, errIndexFull) { dstTooLarge[dstIdx] = true } return nil, err } contentScore := s.score(di, 10000) // The name score returns a value between 0 and 100, so we need to // convert it to the same range as the content score. nameScore := nameSimilarityScore(src.From.Name, dst.To.Name) * 100 score := (contentScore*99 + nameScore*1) / 10000 if score < renameScore { continue } matrix = append(matrix, similarityPair{added: dstIdx, deleted: srcIdx, score: score}) } } sort.Stable(matrix) return matrix, nil } func compactChanges(changes []*Change) []*Change { var result []*Change for _, c := range changes { if c != nil { result = append(result, c) } } return result } const ( keyShift = 32 maxCountValue = (1 << keyShift) - 1 ) var errIndexFull = errors.New("index is full") // similarityIndex is an index structure of lines/blocks in one file. // This structure can be used to compute an approximation of the similarity // between two files. // To save space in memory, this index uses a space efficient encoding which // will not exceed 1MiB per instance. The index starts out at a smaller size // (closer to 2KiB), but may grow as more distinct blocks within the scanned // file are discovered. // see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java type similarityIndex struct { hashed uint64 // number of non-zero entries in hashes numHashes int growAt int hashes []keyCountPair hashBits int } func fileSimilarityIndex(ctx context.Context, f *File) (*similarityIndex, error) { idx := newSimilarityIndex() if err := idx.hash(ctx, f); err != nil { return nil, err } sort.Stable(keyCountPairs(idx.hashes)) return idx, nil } func newSimilarityIndex() *similarityIndex { return &similarityIndex{ hashBits: 8, hashes: make([]keyCountPair, 1<<8), growAt: shouldGrowAt(8), } } func (i *similarityIndex) hash(ctx context.Context, f *File) error { r, bin, err := f.Reader(ctx) if err != nil { return err } defer r.Close() // nolint return i.hashContent(r, f.Size, bin) } func (i *similarityIndex) hashContent(r io.Reader, size int64, isBin bool) error { var buf = make([]byte, 4096) var ptr, cnt int remaining := size for 0 < remaining { hash := 5381 var blockHashedCnt uint64 // Hash one line or block, whatever happens first n := int64(0) for { if ptr == cnt { ptr = 0 var err error cnt, err = io.ReadFull(r, buf) if err != nil && !errors.Is(err, io.ErrUnexpectedEOF) { return err } if cnt == 0 { return io.EOF } } n++ c := buf[ptr] & 0xff ptr++ // Ignore CR in CRLF sequence if it's text if !isBin && c == '\r' && ptr < cnt && buf[ptr] == '\n' { continue } blockHashedCnt++ if c == '\n' { break } hash = (hash << 5) + hash + int(c) if n >= 64 || n >= remaining { break } } i.hashed += blockHashedCnt if err := i.add(hash, blockHashedCnt); err != nil { return err } remaining -= n } return nil } // score computes the similarity score between this index and another one. // A region of a file is defined as a line in a text file or a fixed-size // block in a binary file. To prepare an index, each region in the file is // hashed; the values and counts of hashes are retained in a sorted table. // Define the similarity fraction F as the count of matching regions between // the two files divided between the maximum count of regions in either file. // The similarity score is F multiplied by the maxScore constant, yielding a // range [0, maxScore]. It is defined as maxScore for the degenerate case of // two empty files. // The similarity score is symmetrical; i.e. a.score(b) == b.score(a). func (i *similarityIndex) score(other *similarityIndex, maxScore int) int { var maxHashed = i.hashed if maxHashed < other.hashed { maxHashed = other.hashed } if maxHashed == 0 { return maxScore } return int(i.common(other) * uint64(maxScore) / maxHashed) } func (i *similarityIndex) common(dst *similarityIndex) uint64 { srcIdx, dstIdx := 0, 0 if i.numHashes == 0 || dst.numHashes == 0 { return 0 } var common uint64 srcKey, dstKey := i.hashes[srcIdx].key(), dst.hashes[dstIdx].key() for { if srcKey == dstKey { srcCnt, dstCnt := i.hashes[srcIdx].count(), dst.hashes[dstIdx].count() if srcCnt < dstCnt { common += srcCnt } else { common += dstCnt } srcIdx++ if srcIdx == len(i.hashes) { break } srcKey = i.hashes[srcIdx].key() dstIdx++ if dstIdx == len(dst.hashes) { break } dstKey = dst.hashes[dstIdx].key() } else if srcKey < dstKey { // Region of src that is not in dst srcIdx++ if srcIdx == len(i.hashes) { break } srcKey = i.hashes[srcIdx].key() } else { // Region of dst that is not in src dstIdx++ if dstIdx == len(dst.hashes) { break } dstKey = dst.hashes[dstIdx].key() } } return common } func (i *similarityIndex) add(key int, cnt uint64) error { key = int(uint32(key) * 0x9e370001 >> 1) j := i.slot(key) for { v := i.hashes[j] if v == 0 { // It's an empty slot, so we can store it here. if i.growAt <= i.numHashes { if err := i.grow(); err != nil { return err } j = i.slot(key) continue } var err error i.hashes[j], err = newKeyCountPair(key, cnt) if err != nil { return err } i.numHashes++ return nil } else if v.key() == key { // It's the same key, so increment the counter. var err error i.hashes[j], err = newKeyCountPair(key, v.count()+cnt) return err } else if j+1 >= len(i.hashes) { j = 0 } else { j++ } } } type keyCountPair uint64 func newKeyCountPair(key int, cnt uint64) (keyCountPair, error) { if cnt > maxCountValue { return 0, errIndexFull } return keyCountPair((uint64(key) << keyShift) | cnt), nil } func (p keyCountPair) key() int { return int(p >> keyShift) } func (p keyCountPair) count() uint64 { return uint64(p) & maxCountValue } func (i *similarityIndex) slot(key int) int { // We use 31 - hashBits because the upper bit was already forced // to be 0 and we want the remaining high bits to be used as the // table slot. return int(uint32(key) >> uint(31-i.hashBits)) } func shouldGrowAt(hashBits int) int { return (1 << uint(hashBits)) * (hashBits - 3) / hashBits } func (i *similarityIndex) grow() error { if i.hashBits == 30 { return errIndexFull } old := i.hashes i.hashBits++ i.growAt = shouldGrowAt(i.hashBits) // TODO(erizocosmico): find a way to check if it will OOM and return // errIndexFull instead. i.hashes = make([]keyCountPair, 1<= len(i.hashes) { j = 0 } } i.hashes[j] = v } } return nil } type keyCountPairs []keyCountPair func (p keyCountPairs) Len() int { return len(p) } func (p keyCountPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p keyCountPairs) Less(i, j int) bool { return p[i] < p[j] } ================================================ FILE: modules/zeta/object/storage.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package object import ( "context" "github.com/antgroup/hugescm/modules/plumbing" ) type Backend interface { Commit(ctx context.Context, oid plumbing.Hash) (*Commit, error) Tree(ctx context.Context, oid plumbing.Hash) (*Tree, error) Fragments(ctx context.Context, oid plumbing.Hash) (*Fragments, error) Tag(ctx context.Context, oid plumbing.Hash) (*Tag, error) Blob(ctx context.Context, oid plumbing.Hash) (*Blob, error) } ================================================ FILE: modules/zeta/object/tag.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "fmt" "io" "strings" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/streamio" ) var ( TAG_MAGIC = [4]byte{'Z', 'G', 0x00, 0x01} ) type Tag struct { Hash plumbing.Hash `json:"hash"` Object plumbing.Hash `json:"object"` ObjectType ObjectType `json:"type"` Name string `json:"name"` Tagger Signature `json:"tagger"` Content string `json:"content"` } // https://git-scm.com/docs/signature-format // https://github.blog/changelog/2022-08-23-ssh-commit-verification-now-supported/ func (t *Tag) Extract() (message string, signature string) { if i := strings.Index(t.Content, "-----BEGIN"); i > 0 { return t.Content[:i], t.Content[i:] } return t.Content, "" } func (t *Tag) Message() string { m, _ := t.Extract() return m } // Decode implements Object.Decode and decodes the uncompressed tag being // read. It returns the number of uncompressed bytes being consumed off of the // stream, which should be strictly equal to the size given. // // If any error was encountered along the way it will be returned, and the // receiving *Tag is considered invalid. func (t *Tag) Decode(reader Reader) error { if reader.Type() != TagObject { return ErrUnsupportedObject } br := streamio.GetBufioReader(reader) defer streamio.PutBufioReader(br) t.Hash = reader.Hash() var ( finishedHeaders bool ) var message strings.Builder for { line, readErr := br.ReadString('\n') if readErr != nil && readErr != io.EOF { return readErr } if finishedHeaders { message.WriteString(line) } else { text := strings.TrimSuffix(line, "\n") if len(text) == 0 { finishedHeaders = true continue } field, value, ok := strings.Cut(text, " ") if !ok { return fmt.Errorf("zeta: invalid tag header: %s", text) } switch field { case "object": if !plumbing.ValidateHashHex(value) { return fmt.Errorf("zeta: unable to decode BLAKE3: %s", value) } t.Object = plumbing.NewHash(value) case "type": t.ObjectType = ObjectTypeFromString(value) case "tag": t.Name = value case "tagger": t.Tagger.Decode([]byte(value)) default: return fmt.Errorf("zeta: unknown tag header: %s", field) } } if readErr == io.EOF { break } } t.Content = message.String() return nil } // Encode encodes the Tag's contents to the given io.Writer, "w". If there was // any error copying the Tag's contents, that error will be returned. // // Otherwise, the number of bytes written will be returned. func (t *Tag) Encode(w io.Writer) error { _, err := w.Write(TAG_MAGIC[:]) if err != nil { return err } headers := []string{ fmt.Sprintf("object %s", t.Object), fmt.Sprintf("type %s", t.ObjectType), fmt.Sprintf("tag %s", t.Name), fmt.Sprintf("tagger %s", t.Tagger.String()), } _, err = fmt.Fprintf(w, "%s\n\n%s", strings.Join(headers, "\n"), t.Content) return err } // Equal returns whether the receiving and given Tags are equal, or in other // words, whether they are represented by the same SHA-1 when saved to the // object database. func (t *Tag) Equal(other *Tag) bool { if (t == nil) != (other == nil) { return false } if t != nil { return t.Object == other.Object && t.ObjectType == other.ObjectType && t.Name == other.Name && t.Tagger == other.Tagger && t.Content == other.Content } return true } func (t *Tag) Copy() *Tag { newTag := *t return &newTag } ================================================ FILE: modules/zeta/object/tree.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "bytes" "context" "errors" "fmt" "io" "path" "path/filepath" "sort" "strconv" "strings" "github.com/antgroup/hugescm/modules/merkletrie/noder" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" "github.com/antgroup/hugescm/modules/streamio" ) const ( maxTreeDepth = 1024 startingStackSize = 8 ) // New errors defined by this package. var ( TREE_MAGIC = [4]byte{'Z', 'T', 0x00, 0x01} ErrMaxTreeDepth = errors.New("maximum tree depth exceeded") ) type ErrDirectoryNotFound struct { dir string } func (e *ErrDirectoryNotFound) Error() string { return fmt.Sprintf("dir '%s' not found", e.dir) } func IsErrDirectoryNotFound(err error) bool { var e *ErrDirectoryNotFound return errors.As(err, &e) } type ErrEntryNotFound struct { entry string } func (e *ErrEntryNotFound) Error() string { return fmt.Sprintf("entry '%s' not found", e.entry) } func IsErrEntryNotFound(err error) bool { var e *ErrEntryNotFound return errors.As(err, &e) } // TreeEntry represents a file type TreeEntry struct { Name string `json:"name"` Size int64 `json:"size"` Mode filemode.FileMode `json:"mode"` Hash plumbing.Hash `json:"hash"` Payload []byte `json:"-"` } func (e *TreeEntry) Clone() *TreeEntry { return &TreeEntry{ Name: e.Name, Size: e.Size, Mode: e.Mode, Hash: e.Hash, Payload: bytes.Clone(e.Payload), } } // Equal returns whether the receiving and given TreeEntry instances are // identical in name, filemode, and OID. func (e *TreeEntry) Equal(other *TreeEntry) bool { if (e == nil) != (other == nil) { return false } if e != nil { return e.Name == other.Name && bytes.Equal(e.Hash[:], other.Hash[:]) && e.Mode == other.Mode } return true } const ( sIFMT = filemode.FileMode(0170000) sIFREG = filemode.FileMode(0100000) sIFDIR = filemode.FileMode(0040000) sIFLNK = filemode.FileMode(0120000) sIFGITLINK = filemode.FileMode(0160000) sIFRAGMENT = filemode.Fragments ) func (e *TreeEntry) Type() ObjectType { if e.Mode&sIFRAGMENT != 0 { return FragmentsObject } switch e.Mode & sIFMT { case sIFREG: return BlobObject case sIFDIR: return TreeObject case sIFLNK: return BlobObject case sIFGITLINK: return CommitObject default: } return 0 } // IsLink returns true if the given TreeEntry is a blob which represents a // symbolic link (i.e., with a filemode of 0120000. func (e *TreeEntry) IsLink() bool { return e.Mode&sIFMT == sIFLNK } func (e *TreeEntry) IsDir() bool { return e.Mode&sIFMT == sIFDIR } func (e *TreeEntry) IsRegular() bool { return e.Mode&sIFMT == sIFREG } func (e *TreeEntry) IsFragments() bool { return e.Mode&filemode.Fragments != 0 } func (e *TreeEntry) OriginMode() filemode.FileMode { return e.Mode &^ filemode.Fragments } // check if entry renamed func (e *TreeEntry) Renamed(other *TreeEntry) bool { return e.Mode == other.Mode && e.Hash == other.Hash } func (e *TreeEntry) Chmod(other *TreeEntry) bool { return e.Mode != other.Mode && e.Hash == other.Hash && e.Name == other.Name } // entry with same name func (e *TreeEntry) Modified(other *TreeEntry) bool { return e.Name == other.Name } // SubtreeOrder is an implementation of sort.Interface that sorts a set of // `*TreeEntry`'s according to "subtree" order. This ordering is required to // write trees in a correct, readable format to the Git object database. // // The format is as follows: entries are sorted lexicographically in byte-order, // with subtrees (entries of Type() == object.TreeObjectType) being sorted as // if their `Name` fields ended in a "/". // // See: https://github.com/git/git/blob/v2.13.0/fsck.c#L492-L525 for more // details. type SubtreeOrder []*TreeEntry // Len implements sort.Interface.Len() and return the length of the underlying // slice. func (s SubtreeOrder) Len() int { return len(s) } // Swap implements sort.Interface.Swap() and swaps the two elements at i and j. func (s SubtreeOrder) Swap(i, j int) { s[i], s[j] = s[j], s[i] } // Less implements sort.Interface.Less() and returns whether the element at "i" // is compared as "less" than the element at "j". In other words, it returns if // the element at "i" should be sorted ahead of that at "j". // // It performs this comparison in lexicographic byte-order according to the // rules above (see SubtreeOrder). func (s SubtreeOrder) Less(i, j int) bool { return s.Name(i) < s.Name(j) } // Name returns the name for a given entry indexed at "i", which is a C-style // string ('\0' terminated unless it's a subtree), optionally terminated with // '/' if it's a subtree. // // This is done because '/' sorts ahead of '\0', and is compatible with the // tree order in upstream Git. func (s SubtreeOrder) Name(i int) string { if i < 0 || i >= len(s) { return "" } entry := s[i] if entry.Type() == TreeObject { return entry.Name + "/" } return entry.Name + "\x00" } func (t *Tree) Append(others *TreeEntry) { for i, e := range t.Entries { if e.Name == others.Name { t.Entries[i] = others return } } t.Entries = append(t.Entries, others) } // Merge performs a merge operation against the given set of `*TreeEntry`'s by // either replacing existing tree entries of the same name, or appending new // entries in sub-tree order. // // It returns a copy of the tree, and performs the merge in O(n*log(n)) time. func (t *Tree) Merge(others ...*TreeEntry) *Tree { unseen := make(map[string]*TreeEntry) // Build a cache of name to *TreeEntry. for _, other := range others { unseen[other.Name] = other } // Map the existing entries ("t.Entries") into a new set by either // copying an existing entry, or replacing it with a new one. entries := make([]*TreeEntry, 0, len(t.Entries)) for _, entry := range t.Entries { if other, ok := unseen[entry.Name]; ok { entries = append(entries, other) delete(unseen, entry.Name) } else { entries = append(entries, &TreeEntry{ Name: entry.Name, Size: entry.Size, Mode: entry.Mode, Hash: entry.Hash, Payload: bytes.Clone(entry.Payload), }) } } // For all the items we haven't replaced into the new set, append them // to the entries. for _, remaining := range unseen { entries = append(entries, remaining) } // Call sort afterwords, as a tradeoff between speed and spacial // complexity. As a future point of optimization, adding new elements // (see: above) could be done as a linear pass of the "entries" set. // // In order to do that, we must have a constant-time lookup of both // entries in the existing and new sets. This requires building a // map[string]*TreeEntry for the given "others" as well as "t.Entries". // // Trees can be potentially large, so trade this spacial complexity for // an O(n*log(n)) sort. sort.Sort(SubtreeOrder(entries)) return &Tree{Entries: entries} } // Equal returns whether the receiving and given trees are equal, or in other // words, whether they are represented by the same BLAKE3 when saved to the // object database. func (t *Tree) Equal(other *Tree) bool { if (t == nil) != (other == nil) { return false } if t != nil { if len(t.Entries) != len(other.Entries) { return false } for i := range t.Entries { e1 := t.Entries[i] e2 := other.Entries[i] if !e1.Equal(e2) { return false } } } return true } // Tree is basically like a directory - it references a bunch of other trees // and/or blobs (i.e. files and sub-directories) type Tree struct { Hash plumbing.Hash `json:"hash"` Entries []*TreeEntry `json:"entries"` m map[string]*TreeEntry t map[string]*Tree // tree path cache b Backend } func NewTree(entries []*TreeEntry) *Tree { return &Tree{Entries: entries} } // Tree returns the tree identified by the `path` argument. // The path is interpreted as relative to the tree receiver. func (t *Tree) Tree(ctx context.Context, path string) (*Tree, error) { if len(path) == 0 { return t, nil } e, err := t.FindEntry(ctx, path) if err != nil { return nil, &ErrDirectoryNotFound{dir: path} } return resolveTree(ctx, t.b, e.Hash) } func (t *Tree) Entry(name string) (*TreeEntry, error) { return t.entry(name) } // FindEntry search a TreeEntry in this tree or any subtree. func (t *Tree) FindEntry(ctx context.Context, relativePath string) (*TreeEntry, error) { if t.t == nil { t.t = make(map[string]*Tree) } relativePath = filepath.ToSlash(relativePath) // fix on windows pathParts := strings.Split(relativePath, "/") startingTree := t pathCurrent := "" // search for the longest path in the tree path cache for i := len(pathParts) - 1; i >= 1; i-- { path := path.Join(pathParts[:i]...) tree, ok := t.t[path] if ok { startingTree = tree pathParts = pathParts[i:] pathCurrent = path break } } var tree *Tree var err error for tree = startingTree; len(pathParts) > 1; pathParts = pathParts[1:] { if tree, err = tree.dir(ctx, pathParts[0]); err != nil { return nil, err } pathCurrent = path.Join(pathCurrent, pathParts[0]) t.t[pathCurrent] = tree } return tree.entry(pathParts[0]) } func (t *Tree) dir(ctx context.Context, baseName string) (*Tree, error) { entry, err := t.entry(baseName) if err != nil { return nil, &ErrDirectoryNotFound{dir: baseName} } if t.b == nil { return nil, &ErrDirectoryNotFound{dir: baseName} } tree, err := t.b.Tree(ctx, entry.Hash) if err != nil { return nil, err } tree.b = t.b return tree, nil } func (t *Tree) entry(baseName string) (*TreeEntry, error) { if t.m == nil { t.buildMap() } entry, ok := t.m[baseName] if !ok { return nil, &ErrEntryNotFound{entry: baseName} } return entry, nil } // Files returns a FileIter allowing to iterate over the Tree func (t *Tree) Files() *FileIter { return NewFileIter(t.b, t) } func (t *Tree) buildMap() { t.m = make(map[string]*TreeEntry) for i := range t.Entries { t.m[t.Entries[i].Name] = t.Entries[i] } } func (t *Tree) SpacePadding() int { var hasFragments bool for _, e := range t.Entries { if e.Type() == FragmentsObject { hasFragments = true } } if hasFragments { return 5 } return 0 } func (t *Tree) SizePadding() int { var v int64 var hasFragments bool for _, e := range t.Entries { v = max(v, e.Size) if e.Type() == FragmentsObject { hasFragments = true } } sizeMax := len(strconv.FormatInt(v, 10)) if hasFragments { // blob/fragments 4/9 d5 return max(5, sizeMax) } return sizeMax } func (t *Tree) Encode(w io.Writer) error { _, err := w.Write(TREE_MAGIC[:]) if err != nil { return err } for _, entry := range t.Entries { size := entry.Size if len(entry.Payload) > 0 { if size > BlobInlineMaxBytes { return fmt.Errorf("tree entry '%s' inline blob '%s' too large", t.Hash, entry.Hash) } size = -entry.Size } if _, err = fmt.Fprintf(w, "%o %d %s", entry.Mode, size, entry.Name); err != nil { return err } if _, err = w.Write([]byte{0x00}); err != nil { return err } if _, err = w.Write(entry.Hash[:]); err != nil { return err } if len(entry.Payload) > 0 { if _, err = w.Write(entry.Payload); err != nil { return err } } } return nil } func (t *Tree) Decode(reader Reader) error { if reader.Type() != TreeObject { return ErrUnsupportedObject } t.Hash = reader.Hash() r := streamio.GetBufioReader(reader) defer streamio.PutBufioReader(r) t.Entries = nil for { str, err := r.ReadString(' ') if err != nil { if errors.Is(err, io.EOF) { break } return err } str = str[:len(str)-1] // strip last byte (' ') mode, err := filemode.New(str) if err != nil { return err } if str, err = r.ReadString(' '); err != nil { if errors.Is(err, io.EOF) { break } return err } size, err := strconv.ParseInt(str[:len(str)-1], 10, 64) if err != nil { return err } name, err := r.ReadString(0) if err != nil && !errors.Is(err, io.EOF) { return err } var hash plumbing.Hash if _, err = io.ReadFull(r, hash[:]); err != nil { return err } var payload []byte if size < 0 { size = -size if size > BlobInlineMaxBytes { return fmt.Errorf("tree entry '%s' inline blob '%s' too large", t.Hash, hash) } payload = make([]byte, size) if _, err := io.ReadFull(r, payload); err != nil { return err } } baseName := name[:len(name)-1] t.Entries = append(t.Entries, &TreeEntry{ Name: baseName, Size: size, Mode: mode, Hash: hash, Payload: payload, }) } return nil } // resolveTree gets a tree from an object storer and decodes it. func resolveTree(ctx context.Context, b Backend, h plumbing.Hash) (*Tree, error) { if b == nil { return nil, plumbing.NoSuchObject(h) } t, err := b.Tree(ctx, h) if err != nil { return nil, err } return t, nil } // File returns the hash of the file identified by the `path` argument. // The path is interpreted as relative to the tree receiver. func (t *Tree) File(ctx context.Context, path string) (*File, error) { e, err := t.FindEntry(ctx, path) if err != nil { return nil, &ErrEntryNotFound{entry: path} } return newFile(e.Name, path, e.Mode, e.Hash, e.Size, t.b), nil } // Diff returns a list of changes between this tree and the provided one func (t *Tree) Diff(to *Tree, m noder.Matcher) (Changes, error) { return t.DiffContext(context.Background(), to, m) } // DiffContext returns a list of changes between this tree and the provided one // Error will be returned if context expires. Provided context must be non nil. // // NOTE: Since version 5.1.0 the renames are correctly handled, the settings // used are the recommended options DefaultDiffTreeOptions. func (t *Tree) DiffContext(ctx context.Context, to *Tree, m noder.Matcher) (Changes, error) { return DiffTreeWithOptions(ctx, t, to, DefaultDiffTreeOptions, m) } // StatsContext: stats func (t *Tree) StatsContext(ctx context.Context, to *Tree, m noder.Matcher, opts *PatchOptions) (FileStats, error) { changes, err := t.DiffContext(ctx, to, m) if err != nil { return nil, err } return changes.Stats(ctx, opts) } // treeEntryIter facilitates iterating through the TreeEntry objects in a Tree. type treeEntryIter struct { t *Tree pos int } func (iter *treeEntryIter) Next() (*TreeEntry, error) { if iter.pos >= len(iter.t.Entries) { return &TreeEntry{}, io.EOF } iter.pos++ return iter.t.Entries[iter.pos-1], nil } // TreeWalker provides a means of walking through all of the entries in a Tree. type TreeWalker struct { stack []*treeEntryIter base string recursive bool seen map[plumbing.Hash]bool b Backend t *Tree } // NewTreeWalker returns a new TreeWalker for the given tree. // // It is the caller's responsibility to call Close() when finished with the // tree walker. func NewTreeWalker(t *Tree, recursive bool, seen map[plumbing.Hash]bool) *TreeWalker { stack := make([]*treeEntryIter, 0, startingStackSize) stack = append(stack, &treeEntryIter{t, 0}) return &TreeWalker{ stack: stack, recursive: recursive, seen: seen, b: t.b, t: t, } } // Next returns the next object from the tree. Objects are returned in order // and subtrees are included. After the last object has been returned further // calls to Next() will return io.EOF. // // In the current implementation any objects which cannot be found in the // underlying repository will be skipped automatically. It is possible that this // may change in future versions. func (w *TreeWalker) Next(ctx context.Context) (name string, entry *TreeEntry, err error) { var obj *Tree for { current := len(w.stack) - 1 if current < 0 { // Nothing left on the stack so we're finished err = io.EOF return } if current > maxTreeDepth { // We're probably following bad data or some self-referencing tree err = ErrMaxTreeDepth return } entry, err = w.stack[current].Next() if errors.Is(err, io.EOF) { // Finished with the current tree, move back up to the parent w.stack = w.stack[:current] w.base, _ = path.Split(w.base) w.base = strings.TrimSuffix(w.base, "/") continue } if err != nil { return } if w.seen[entry.Hash] { continue } if entry.Mode == filemode.Dir { obj, err = resolveTree(ctx, w.b, entry.Hash) } if plumbing.IsNoSuchObject(err) { continue } name = simpleJoin(w.base, entry.Name) if err != nil { err = io.EOF return } break } if !w.recursive { return } if obj != nil { w.stack = append(w.stack, &treeEntryIter{obj, 0}) w.base = simpleJoin(w.base, entry.Name) } return } // Tree returns the tree that the tree walker most recently operated on. func (w *TreeWalker) Tree() *Tree { current := len(w.stack) - 1 if w.stack[current].pos == 0 { current-- } if current < 0 { return nil } return w.stack[current].t } // Close releases any resources used by the TreeWalker. func (w *TreeWalker) Close() { w.stack = nil } func simpleJoin(parent, child string) string { if len(parent) > 0 { return parent + "/" + child } return child } ================================================ FILE: modules/zeta/object/tree_test.go ================================================ package object import ( "fmt" "os" "testing" "github.com/antgroup/hugescm/modules/plumbing/filemode" ) func ShowType(mode filemode.FileMode) ObjectType { switch mode & sIFMT { case sIFREG: return BlobObject case sIFDIR: return TreeObject case sIFLNK: return BlobObject case sIFGITLINK: return CommitObject default: } return 0 } func TestFragments(t *testing.T) { ee := []*TreeEntry{ { Mode: filemode.Dir, }, { Mode: filemode.Executable, }, { Mode: filemode.Executable | filemode.Fragments, }, { Mode: filemode.Regular | filemode.Fragments, }, } for _, e := range ee { fmt.Fprintf(os.Stderr, "%s %s\n", e.Type(), ShowType(e.Mode)) } } func TestNotFragments(t *testing.T) { e := &TreeEntry{ Mode: filemode.Executable, } fmt.Fprintf(os.Stderr, "%s\n", e.Type()) } ================================================ FILE: modules/zeta/object/treenode.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package object import ( "errors" "context" "io" "runtime" "strings" "github.com/antgroup/hugescm/modules/merkletrie/noder" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/plumbing/filemode" ) // A treenoder is a helper type that wraps git trees into merkletrie // noders. // // As a merkletrie noder doesn't understand the concept of modes (e.g. // file permissions), the treenoder includes the mode of the git tree in // the hash, so changes in the modes will be detected as modifications // to the file contents by the merkletrie difftree algorithm. This is // consistent with how the "git diff-tree" command works. type TreeNoder struct { parent *Tree // the root node is its own parent name string // empty string for the root node mode filemode.FileMode hash plumbing.Hash size int64 fragments plumbing.Hash children []noder.Noder // memoized m noder.Matcher conflictDetection bool } // NewTreeRootNode returns the root node of a Tree func NewTreeRootNode(t *Tree, m noder.Matcher, conflictDetection bool) noder.Noder { if t == nil { return &TreeNoder{} } return &TreeNoder{ parent: t, name: "", mode: filemode.Dir, hash: t.Hash, m: m, conflictDetection: conflictDetection, } } func (t *TreeNoder) Skip() bool { return false } func (t *TreeNoder) isRoot() bool { return t.name == "" } func (t *TreeNoder) String() string { return "treeNoder <" + t.name + ">" } func (t *TreeNoder) Mode() filemode.FileMode { return t.mode } func (t *TreeNoder) TrueMode() filemode.FileMode { if !t.fragments.IsZero() { return t.mode | filemode.Fragments } return t.mode } func (t *TreeNoder) HashRaw() plumbing.Hash { if !t.fragments.IsZero() { return t.fragments } return t.hash } func (t *TreeNoder) IsFragments() bool { return !t.fragments.IsZero() } func (t *TreeNoder) Hash() []byte { if t.mode == filemode.Deprecated { return append(t.hash[:], filemode.Regular.Bytes()...) } return append(t.hash[:], t.mode.Bytes()...) } func (t *TreeNoder) Name() string { return t.name } func (t *TreeNoder) Size() int64 { return t.size } func (t *TreeNoder) IsDir() bool { return t.mode == filemode.Dir } // Children will return the children of a treenoder as treenoders, // building them from the children of the wrapped git tree. func (t *TreeNoder) Children(ctx context.Context) ([]noder.Noder, error) { if t.mode != filemode.Dir { return noder.NoChildren, nil } // children are memoized for efficiency if t.children != nil { return t.children, nil } // the parent of the returned children will be ourself as a tree if // we are a not the root treenoder. The root is special as it // is is own parent. parent := t.parent if !t.isRoot() { var err error if parent, err = t.parent.Tree(ctx, t.name); err != nil { return nil, err } } var err error t.children, err = transformChildren(ctx, parent, t.m, t.conflictDetection) return t.children, err } var ( caseInsensitive = func() bool { return runtime.GOOS == "windows" || runtime.GOOS == "darwin" }() ) func canonicalName(name string) string { if caseInsensitive { return strings.ToLower(name) } return name } const ( dot = "." dotDot = ".." ) // Returns the children of a tree as treenoders. // Efficiency is key here. func transformChildren(ctx context.Context, t *Tree, m noder.Matcher, conflictDetection bool) ([]noder.Noder, error) { var err error var e *TreeEntry // there will be more tree entries than children in the tree, // due to submodules and empty directories, but I think it is still // worth it to pre-allocate the whole array now, even if sometimes // is bigger than needed. noDuplicateEntries := make(map[string]bool) ret := make([]noder.Noder, 0, len(t.Entries)) walker := NewTreeWalker(t, false, nil) // don't recurse // don't defer walker.Close() for efficiency reasons. for { _, e, err = walker.Next(ctx) if errors.Is(err, io.EOF) { break } if err != nil { walker.Close() return nil, err } if e.Name == dot || e.Name == dotDot { // BAD entry continue } var n *TreeNoder switch typ := e.Type(); typ { case TreeObject: var ok bool var sub noder.Matcher if m != nil && m.Len() > 0 { if sub, ok = m.Match(e.Name); !ok { continue } } n = &TreeNoder{ parent: t, name: e.Name, mode: e.Mode, hash: e.Hash, size: e.Size, m: sub, conflictDetection: conflictDetection, } case FragmentsObject: n = &TreeNoder{ parent: t, name: e.Name, mode: e.Mode, hash: e.Hash, size: e.Size, conflictDetection: conflictDetection, } if ff, err := t.b.Fragments(ctx, e.Hash); err == nil { n.mode = e.OriginMode() n.hash = ff.Origin n.fragments = e.Hash n.size = int64(ff.Size) } default: n = &TreeNoder{ parent: t, name: e.Name, mode: e.Mode, hash: e.Hash, size: e.Size, conflictDetection: conflictDetection, } } if conflictDetection { cname := canonicalName(e.Name) if noDuplicateEntries[cname] { continue } noDuplicateEntries[cname] = true } ret = append(ret, n) } walker.Close() return ret, nil } // len(t.tree.Entries) != the number of elements walked by treewalker // for some reason because of empty directories, submodules, etc, so we // have to walk here. func (t *TreeNoder) NumChildren(ctx context.Context) (int, error) { children, err := t.Children(ctx) if err != nil { return 0, err } return len(children), nil } ================================================ FILE: modules/zeta/reflog/reflog.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package reflog import ( "bufio" "errors" "fmt" "io" "os" "path/filepath" "sort" "strings" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/object" ) const ( REFLOG_DIR = "logs" REFLOG_DIR_MOD = 0777 REFLOG_FILE_MODE = 0666 ) type Entry struct { O, N plumbing.Hash Committer object.Signature Message string } type Entries []*Entry type Reflog struct { name plumbing.ReferenceName Entries Entries } func (o *Reflog) Empty() bool { return o == nil || len(o.Entries) == 0 } func (o *Reflog) Clear() { o.Entries = o.Entries[:0] } func (o *Reflog) Drop(index int, rewritePreviousEntry bool) error { count := len(o.Entries) if index < 0 || index >= count { return fmt.Errorf("no reflog entry at index %d", index) } newEntries := make([]*Entry, 0, count-1) for i, e := range o.Entries { if i != index { newEntries = append(newEntries, e) } } switch { case !rewritePreviousEntry || index == 0 || count == 1: case index == count-1: newEntries[len(newEntries)-1].O = plumbing.ZeroHash default: newEntries[index-1].O = newEntries[index].N } o.Entries = newEntries return nil } // Push New Entry func (o *Reflog) Push(oid plumbing.Hash, committer *object.Signature, message string) { e := &Entry{ N: oid, Committer: *committer, Message: message, } newEntries := make([]*Entry, 0, len(o.Entries)+1) if len(o.Entries) > 0 { e.O = o.Entries[0].N } newEntries = append(newEntries, e) newEntries = append(newEntries, o.Entries...) o.Entries = newEntries } type DB struct { root string } func NewDB(root string) *DB { return &DB{root: root} } var ( ErrUnparsableReflogLine = errors.New("unparsable reflog line") ) func newEntry(line string) (*Entry, error) { pos := strings.IndexByte(line, ' ') if pos == -1 { return nil, ErrUnparsableReflogLine } o := line[0:pos] line = line[pos+1:] if pos = strings.IndexByte(line, ' '); pos == -1 { return nil, ErrUnparsableReflogLine } n := line[0:pos] line = line[pos+1:] var message string signature := line if pos = strings.IndexByte(line, '\t'); pos != -1 { message = line[pos+1:] signature = line[:pos] } e := &Entry{ O: plumbing.NewHash(o), N: plumbing.NewHash(n), Message: message, } e.Committer.Decode([]byte(signature)) return e, nil } func (d *DB) parse(r io.Reader) ([]*Entry, error) { br := bufio.NewScanner(r) entries := make([]*Entry, 0, 20) for br.Scan() { line := strings.TrimSpace(br.Text()) e, err := newEntry(line) if err != nil { continue } entries = append(entries, e) } sort.SliceStable(entries, func(i, j int) bool { return true }) return entries, nil } func (d *DB) serialize(w io.Writer, entries []*Entry) error { for i := len(entries) - 1; i >= 0; i-- { e := entries[i] if len(e.Message) == 0 { if _, err := fmt.Fprintf(w, "%s %s %s\n", e.O, e.N, &e.Committer); err != nil { return err } continue } if _, err := fmt.Fprintf(w, "%s %s %s\t%s\n", e.O, e.N, &e.Committer, strings.ReplaceAll(e.Message, "\n", " ")); err != nil { return err } } return nil } func (d *DB) Exists(refname plumbing.ReferenceName) bool { logPath := filepath.Join(d.root, REFLOG_DIR, string(refname)) if _, err := os.Stat(logPath); err == nil { return true } return false } func (d *DB) Read(refname plumbing.ReferenceName) (*Reflog, error) { if !plumbing.ValidateReferenceName([]byte(refname)) { return nil, plumbing.ErrBadReferenceName{Name: refname.String()} } logPath := filepath.Join(d.root, REFLOG_DIR, string(refname)) fd, err := os.Open(logPath) if err != nil { if !os.IsNotExist(err) { return nil, err } if err := os.MkdirAll(filepath.Dir(logPath), REFLOG_DIR_MOD); err != nil { return nil, err } if fd, err = os.OpenFile(logPath, os.O_CREATE, REFLOG_FILE_MODE); err != nil { return nil, err } _ = fd.Close() return &Reflog{name: refname, Entries: make([]*Entry, 0)}, nil } defer fd.Close() // nolint reflog := &Reflog{ name: refname, } if reflog.Entries, err = d.parse(fd); err != nil { return nil, err } return reflog, nil } func (d *DB) Write(o *Reflog) error { logPath := filepath.Join(d.root, REFLOG_DIR, string(o.name)) return d.lockPath(o.name, logPath, func() error { var tempReflog string defer func() { if len(tempReflog) != 0 { _ = os.Remove(tempReflog) } }() fd, err := os.CreateTemp(filepath.Dir(logPath), "temp_reflog") if err != nil { return err } _ = fd.Chmod(0644) tempReflog = fd.Name() w := bufio.NewWriter(fd) if err := d.serialize(w, o.Entries); err != nil { _ = fd.Close() return err } if err := w.Flush(); err != nil { _ = fd.Close() return err } _ = fd.Close() if err := os.Rename(tempReflog, logPath); err != nil { return err } return nil }) } func (d *DB) Rename(oldName, newName plumbing.ReferenceName) error { if !plumbing.ValidateReferenceName([]byte(oldName)) { return plumbing.ErrBadReferenceName{Name: string(oldName)} } if !plumbing.ValidateReferenceName([]byte(newName)) { return plumbing.ErrBadReferenceName{Name: string(newName)} } logPathA := filepath.Join(d.root, REFLOG_DIR, string(oldName)) logPathB := filepath.Join(d.root, REFLOG_DIR, string(newName)) err := d.lockTowPath(oldName, newName, logPathA, logPathB, func() error { return os.Rename(logPathA, logPathB) }) if err == nil || !os.IsExist(err) { return err } logTempPath := filepath.Join(d.root, REFLOG_DIR, "temp_reflog") tempName := plumbing.ReferenceName("temp_reflog") if err = d.lockTowPath(oldName, tempName, logPathA, logTempPath, func() error { return os.Rename(logPathA, logTempPath) }); err != nil { return err } _ = d.prune() return d.lockTowPath(tempName, newName, logTempPath, logPathB, func() error { return os.Rename(logTempPath, logPathA) }) } func (d *DB) Delete(name plumbing.ReferenceName) error { if !plumbing.ValidateReferenceName([]byte(name)) { return plumbing.ErrBadReferenceName{Name: string(name)} } logPath := filepath.Join(d.root, REFLOG_DIR, string(name)) err := d.lockPath(name, logPath, func() error { if err := os.Remove(logPath); err != nil && os.IsNotExist(err) { return err } return nil }) _ = d.prune() return err } func (d *DB) lockPath(refname plumbing.ReferenceName, p string, fn func() error) error { lockName := p + ".lock" fd, err := openNotExists(lockName) if err != nil { if os.IsExist(err) { return plumbing.NewErrResourceLocked("reflog", refname) } return err } err = fn() _ = fd.Close() _ = os.Remove(lockName) return err } func (d *DB) lockTowPath(refnameA, refnameB plumbing.ReferenceName, a, b string, fn func() error) error { lockNameA := a + ".lock" lockNameB := b + ".lock" fd1, err := openNotExists(lockNameA) if err != nil { if os.IsExist(err) { return plumbing.NewErrResourceLocked("reflog", refnameA) } return err } fd2, err := openNotExists(lockNameB) if err != nil { _ = fd1.Close() _ = os.Remove(lockNameA) if os.IsExist(err) { return plumbing.NewErrResourceLocked("reflog", refnameB) } return err } err = fn() _ = fd1.Close() _ = os.Remove(lockNameA) _ = fd2.Close() _ = os.Remove(lockNameB) return err } func openNotExists(name string) (*os.File, error) { _ = os.MkdirAll(filepath.Dir(name), 0755) return os.OpenFile(name, os.O_CREATE|os.O_EXCL|os.O_RDWR|os.O_TRUNC, 0644) } var ( pruneKeeps = map[string]bool{ "heads": true, "tags": true, "remotes": true, } ) func (d *DB) prune() error { logsPath := filepath.Join(d.root, REFLOG_DIR) entries, err := os.ReadDir(logsPath) if err != nil { return err } for _, e := range entries { if !e.IsDir() { continue } absPath := filepath.Join(logsPath, e.Name()) if err := pruneDirsDFS(absPath, pruneKeeps[e.Name()]); err != nil { return err } } return nil } func pruneDirsDFS(dir string, keep bool) error { empty := true entries, err := os.ReadDir(dir) if err != nil { return err } for _, e := range entries { if !e.IsDir() { empty = false continue } absPath := filepath.Join(dir, e.Name()) if err := pruneDirsDFS(absPath, false); err != nil { return err } } if !empty || keep { return nil } return os.Remove(dir) } ================================================ FILE: modules/zeta/reflog/reflog_test.go ================================================ package reflog import ( "fmt" "os" "strings" "testing" "time" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/object" ) func TestReflogRead(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } for _, e := range entries { fmt.Fprintf(os.Stderr, "%s\n", e.Message) } _ = d.serialize(os.Stderr, entries) } func TestReflogWrite(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{root: "/tmp"} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } for _, e := range entries { fmt.Fprintf(os.Stderr, "%s\n", e.Message) } o := &Reflog{name: "stash", Entries: entries} if err := d.Write(o); err != nil { fmt.Fprintf(os.Stderr, "write reflog: %v\n", err) } } func TestReflogDrop(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } log := &Reflog{ name: "refs/stash", Entries: entries, } _ = log.Drop(0, true) _ = d.serialize(os.Stderr, log.Entries) } func TestReflogDrop1(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } log := &Reflog{ name: "refs/stash", Entries: entries, } _ = log.Drop(1, true) _ = d.serialize(os.Stderr, log.Entries) } func TestReflogDrop2(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } log := &Reflog{ name: "refs/stash", Entries: entries, } _ = log.Drop(2, true) _ = d.serialize(os.Stderr, log.Entries) } func TestReflogDrop3(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } log := &Reflog{ name: "refs/stash", Entries: entries, } _ = log.Drop(3, true) _ = d.serialize(os.Stderr, log.Entries) } func TestReflogPush(t *testing.T) { m := `0000000000000000000000000000000000000000000000000000000000000000 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a LBW 1706772738 +0800 WIP on master: 8438002 form-string.md: correct the example 7d93f7dad4160ce2a30e7083e1fbe189b68142bcefd029fdc376f892eedb250a 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 LBW 1706772760 +0800 WIP on master: 8438002 form-string.md: correct the example 46ec16b743c9020366a11f9cb3ea61f1ec04ca6d588132eff4c5028a2a49a815 c0869060ede3e208c464cac81fd78e6f31cecb572a3450b9a7dce4784c6dab5f LBW 1706773202 +0800 WIP on master: d343999 ZZZZ ` d := &DB{} entries, err := d.parse(strings.NewReader(m)) if err != nil { fmt.Fprintf(os.Stderr, "parse error: %s\n", err) return } log := &Reflog{ name: "refs/stash", Entries: entries, } log.Push(plumbing.NewHash("bd9ddb6547b224fd6bb39b7f7fddf833b37f4ddb9ea94be8628c3f7aae465e64"), &object.Signature{ Name: "LBW", Email: "dev@zeta.io", When: time.Now(), }, "PushE") _ = d.serialize(os.Stderr, log.Entries) } ================================================ FILE: modules/zeta/refs/backend.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package refs import ( "errors" "io" "github.com/antgroup/hugescm/modules/plumbing" ) type Backend interface { // Find the current reference HEAD() (*plumbing.Reference, error) // view all references References() (*DB, error) // Look up a reference using the full reference name. Reference(name plumbing.ReferenceName) (*plumbing.Reference, error) // ReferencePrefixMatch match reference prefix // prefix: refs/logs // refs/logs ✅ // refs/logs/211 ✅ // refs/logs.l ❌ ReferencePrefixMatch(prefix plumbing.ReferenceName) (*plumbing.Reference, error) // Update reference Update(r, old *plumbing.Reference) error // remove reference ReferenceRemove(r *plumbing.Reference) error // packed references Packed() error } func ReferencesDB(repoPath string) (*DB, error) { return NewBackend(repoPath).References() } const MaxResolveRecursion = 1024 // ErrMaxResolveRecursion is returned by ResolveReference is MaxResolveRecursion // is exceeded var ErrMaxResolveRecursion = errors.New("max. recursion level reached") func ReferenceResolve(b Backend, name plumbing.ReferenceName) (ref *plumbing.Reference, err error) { for range MaxResolveRecursion { if ref, err = b.Reference(name); err != nil { return nil, err } if ref.Type() != plumbing.SymbolicReference { return ref, nil } name = ref.Target() } return nil, ErrMaxResolveRecursion } // ReferenceIter is a generic closable interface for iterating over references. type ReferenceIter interface { Next() (*plumbing.Reference, error) ForEach(func(*plumbing.Reference) error) error Close() } type ReferenceSliceIter struct { series []*plumbing.Reference pos int } // NewReferenceSliceIter returns a reference iterator for the given slice of // objects. func NewReferenceSliceIter(series []*plumbing.Reference) ReferenceIter { return &ReferenceSliceIter{ series: series, } } // Next returns the next reference from the iterator. If the iterator has // reached the end it will return io.EOF as an error. func (iter *ReferenceSliceIter) Next() (*plumbing.Reference, error) { if iter.pos >= len(iter.series) { return nil, io.EOF } obj := iter.series[iter.pos] iter.pos++ return obj, nil } // ForEach call the cb function for each reference contained on this iter until // an error happens or the end of the iter is reached. If ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *ReferenceSliceIter) ForEach(cb func(*plumbing.Reference) error) error { return forEachReferenceIter(iter, cb) } type bareReferenceIterator interface { Next() (*plumbing.Reference, error) Close() } func forEachReferenceIter(iter bareReferenceIterator, cb func(*plumbing.Reference) error) error { defer iter.Close() for { obj, err := iter.Next() if err != nil { if errors.Is(err, io.EOF) { return nil } return err } if err := cb(obj); err != nil { if errors.Is(err, plumbing.ErrStop) { return nil } return err } } } // Close releases any resources used by the iterator. func (iter *ReferenceSliceIter) Close() { iter.pos = len(iter.series) } func NewReferenceIter(b Backend) (ReferenceIter, error) { d, err := b.References() if err != nil { return nil, err } return NewReferenceSliceIter(d.References()), nil } ================================================ FILE: modules/zeta/refs/error.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package refs import "errors" var ( // ErrNotFound is returned by New when the path is not found. ErrNotFound = errors.New("path not found") // ErrIdxNotFound is returned by Idxfile when the idx file is not found ErrIdxNotFound = errors.New("idx file not found") // ErrPackfileNotFound is returned by Packfile when the packfile is not found ErrPackfileNotFound = errors.New("packfile not found") // ErrConfigNotFound is returned by Config when the config is not found ErrConfigNotFound = errors.New("config file not found") // ErrPackedRefsDuplicatedRef is returned when a duplicated reference is // found in the packed-ref file. This is usually the case for corrupted git // repositories. ErrPackedRefsDuplicatedRef = errors.New("duplicated ref found in packed-ref file") // ErrPackedRefsBadFormat is returned when the packed-ref file corrupt. ErrPackedRefsBadFormat = errors.New("malformed packed-ref") // ErrSymRefTargetNotFound is returned when a symbolic reference is // targeting a non-existing object. This usually means the repository // is corrupt. ErrSymRefTargetNotFound = errors.New("symbolic reference target not found") // ErrIsDir is returned when a reference file is attempting to be read, // but the path specified is a directory. ErrIsDir = errors.New("reference path is a directory") ) ================================================ FILE: modules/zeta/refs/filesystem.go ================================================ // Copyright 2018 Sourced Technologies, S.L. // SPDX-License-Identifier: Apache-2.0 package refs import ( "bufio" "errors" "fmt" "io" "io/fs" "os" "path/filepath" "slices" "strings" "github.com/antgroup/hugescm/modules/plumbing" ) const ( suffix = ".zeta" packedRefsPath = "packed-refs" configPath = "config" indexPath = "index" shallowPath = "shallow" modulePath = "modules" objectsPath = "objects" packPath = "pack" refsPath = "refs" branchesPath = "branches" hooksPath = "hooks" infoPath = "info" remotesPath = "remotes" logsPath = "logs" worktreesPath = "worktrees" tmpPackedRefsPrefix = "._packed-refs" // packPrefix = "pack-" // packExt = ".pack" // idxExt = ".idx" ) var ( ErrReferenceHasChanged = errors.New("reference has changed concurrently") ) type fsBackend struct { repoPath string } func NewBackend(repoPath string) Backend { return &fsBackend{repoPath: repoPath} } func (b *fsBackend) HEAD() (*plumbing.Reference, error) { return b.readRefFromHEAD() } func (b *fsBackend) References() (*DB, error) { db := &DB{cache: make(map[plumbing.ReferenceName]*plumbing.Reference), references: make([]*plumbing.Reference, 0, 100)} var err error if err = b.addRefsFromRefDir(db); err != nil { return nil, err } if err := b.addRefsFromPackedRefs(db); err != nil { return nil, err } if db.head, err = b.readRefFromHEAD(); err != nil { return nil, err } return db, nil } func (b *fsBackend) addRefsFromRefDir(db *DB) error { return b.walkReferencesTree(refsPath, db) } func (b *fsBackend) addRefsFromPackedRefs(db *DB) error { fd, err := os.Open(filepath.Join(b.repoPath, packedRefsPath)) if os.IsNotExist(err) { return nil } if err != nil { return err } defer fd.Close() // nolint s := bufio.NewScanner(fd) for s.Scan() { ref, err := b.processLine(s.Text()) if err != nil { return err } if ref == nil { continue } if _, ok := db.cache[ref.Name()]; !ok { db.references = append(db.references, ref) db.cache[ref.Name()] = ref } } return s.Err() } func (b *fsBackend) readRefFromHEAD() (*plumbing.Reference, error) { ref, err := b.readReferenceFile("HEAD") if os.IsNotExist(err) { return nil, nil } if err != nil { return nil, err } return ref, nil } func (b *fsBackend) walkReferencesTree(prefix string, db *DB) error { files, err := os.ReadDir(filepath.Join(b.repoPath, prefix)) if err != nil { if os.IsNotExist(err) { return nil } return err } for _, f := range files { newPrefix := prefix + "/" + f.Name() // always use unix '/' if f.IsDir() { if err = b.walkReferencesTree(newPrefix, db); err != nil { return err } continue } ref, err := b.readReferenceFile(newPrefix) if os.IsNotExist(err) { continue } if err != nil { return err } if ref != nil { if _, ok := db.cache[ref.Name()]; !ok { db.references = append(db.references, ref) db.cache[ref.Name()] = ref } } } return nil } func (b *fsBackend) readReferenceFile(refname string) (ref *plumbing.Reference, err error) { p := filepath.Join(b.repoPath, refname) si, err := os.Stat(p) if err != nil { return nil, err } if si.IsDir() { return nil, ErrIsDir } fd, err := os.Open(p) if err != nil { return nil, err } defer fd.Close() // nolint return b.readReferenceFrom(fd, refname) } func (b *fsBackend) readReferenceMatchPrefix(prefix string) (ref *plumbing.Reference, err error) { refPath := filepath.Join(b.repoPath, prefix) si, err := os.Stat(refPath) if err != nil { return nil, err } if !si.IsDir() { fd, err := os.Open(refPath) if err != nil { return nil, err } defer fd.Close() // nolint return b.readReferenceFrom(fd, prefix) } var refname string err = filepath.WalkDir(refPath, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } refname, err = filepath.Rel(b.repoPath, path) return err }) if err != nil { return nil, err } if len(refname) == 0 { return nil, nil } fd, err := os.Open(filepath.Join(b.repoPath, refname)) if err != nil { return nil, err } defer fd.Close() // nolint return b.readReferenceFrom(fd, refname) } func (b *fsBackend) readReferenceFrom(rd io.Reader, name string) (ref *plumbing.Reference, err error) { data, err := io.ReadAll(rd) if err != nil { return nil, err } line := strings.TrimSpace(string(data)) return plumbing.NewReferenceFromStrings(name, line), nil } func (b *fsBackend) processLine(line string) (*plumbing.Reference, error) { if len(line) == 0 { return nil, nil } switch line[0] { case '#': // comment - ignore return nil, nil case '^': // annotated tag commit of the previous line - ignore return nil, nil default: target, name, ok := strings.Cut(line, " ") // hash then ref if !ok { return nil, ErrPackedRefsBadFormat } return plumbing.NewReferenceFromStrings(name, target), nil } } func (b *fsBackend) matchReferenceName(line string, want string) (*plumbing.Reference, error) { if len(line) == 0 { return nil, nil } switch line[0] { case '#': // comment - ignore return nil, nil case '^': // annotated tag commit of the previous line - ignore return nil, nil default: target, name, ok := strings.Cut(line, " ") // hash then ref if !ok { return nil, ErrPackedRefsBadFormat } if want != name { return nil, nil } return plumbing.NewReferenceFromStrings(name, target), nil } } func (b *fsBackend) packedRef(name plumbing.ReferenceName) (*plumbing.Reference, error) { fd, err := os.Open(filepath.Join(b.repoPath, packedRefsPath)) if os.IsNotExist(err) { return nil, plumbing.ErrReferenceNotFound } if err != nil { return nil, err } defer fd.Close() // nolint s := bufio.NewScanner(fd) for s.Scan() { ref, err := b.matchReferenceName(s.Text(), string(name)) if err != nil { return nil, err } if ref != nil { return ref, nil } } return nil, plumbing.ErrReferenceNotFound } func prefixMatch(name, prefix string) bool { prefixLen := len(prefix) return len(name) >= prefixLen && name[0:prefixLen] == prefix && (len(name) == prefixLen || name[prefixLen] == '/') } func (b *fsBackend) matchReferenceNamePrefix(line string, prefix string) (*plumbing.Reference, error) { if len(line) == 0 { return nil, nil } switch line[0] { case '#': // comment - ignore return nil, nil case '^': // annotated tag commit of the previous line - ignore return nil, nil default: target, name, ok := strings.Cut(line, " ") // hash then ref if !ok { return nil, ErrPackedRefsBadFormat } if !prefixMatch(name, prefix) { return nil, nil } return plumbing.NewReferenceFromStrings(name, target), nil } } func (b *fsBackend) matchPackedRefPrefix(prefix plumbing.ReferenceName) (*plumbing.Reference, error) { fd, err := os.Open(filepath.Join(b.repoPath, packedRefsPath)) if os.IsNotExist(err) { return nil, plumbing.ErrReferenceNotFound } if err != nil { return nil, err } defer fd.Close() // nolint s := bufio.NewScanner(fd) for s.Scan() { ref, err := b.matchReferenceNamePrefix(s.Text(), string(prefix)) if err != nil { return nil, err } if ref != nil { return ref, nil } } return nil, plumbing.ErrReferenceNotFound } func (b *fsBackend) Reference(name plumbing.ReferenceName) (*plumbing.Reference, error) { ref, err := b.readReferenceFile(string(name)) if err == nil { return ref, nil } return b.packedRef(name) } func (b *fsBackend) ReferencePrefixMatch(prefix plumbing.ReferenceName) (*plumbing.Reference, error) { ref, err := b.readReferenceMatchPrefix(string(prefix)) if err == nil { return ref, nil } return b.matchPackedRefPrefix(prefix) } func (b *fsBackend) checkReference(old *plumbing.Reference) error { if old == nil { return nil } ref, err := b.Reference(old.Name()) if err != nil { return err } if ref.Hash() != old.Hash() { return ErrReferenceHasChanged } return nil } func openNotExists(name string) (*os.File, error) { _ = os.MkdirAll(filepath.Dir(name), 0755) return os.OpenFile(name, os.O_CREATE|os.O_EXCL|os.O_RDWR|os.O_TRUNC, 0644) } func (b *fsBackend) lockPackedRefs(fn func() error) error { lockName := filepath.Join(b.repoPath, packedRefsPath+".lock") fd, err := openNotExists(lockName) if err != nil { if os.IsExist(err) { return plumbing.NewErrResourceLocked("reference", "packed-refs") } return err } err = fn() _ = fd.Close() _ = os.Remove(lockName) return err } func CheckClose(c io.Closer, err *error) { if closeErr := c.Close(); closeErr != nil && *err == nil { *err = closeErr } } func (b *fsBackend) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) error { var tmpName string defer func() { if len(tmpName) != 0 { _ = os.Remove(tmpName) } }() packedRefs := filepath.Join(b.repoPath, packedRefsPath) rewriteNeed, err := func() (bool, error) { fd, err := os.Open(packedRefs) if err != nil { if os.IsNotExist(err) { return false, nil } return false, err } defer fd.Close() // nolint tmp, err := os.CreateTemp(b.repoPath, tmpPackedRefsPrefix) if err != nil { return false, err } defer tmp.Close() // nolint _ = tmp.Chmod(0644) tmpName = tmp.Name() s := bufio.NewScanner(fd) found := false for s.Scan() { line := s.Text() ref, err := b.processLine(line) if err != nil { return false, err } if ref != nil && ref.Name() == name { found = true continue } if _, err := fmt.Fprintln(tmp, line); err != nil { return false, err } } if err := s.Err(); err != nil { return false, err } return found, nil }() if err != nil { return err } if !rewriteNeed { return nil } return os.Rename(tmpName, packedRefs) } func (b *fsBackend) ReferenceRemove(r *plumbing.Reference) error { fileName := filepath.Join(b.repoPath, r.Name().String()) lockName := fileName + ".lock" fd, err := openNotExists(lockName) if err != nil { if os.IsExist(err) { return plumbing.NewErrResourceLocked("reference", r.Name()) } return err } _ = fd.Close() defer func() { _ = os.Remove(lockName) _ = b.prune() }() if err = os.Remove(fileName); err != nil && !os.IsNotExist(err) { return err } return b.lockPackedRefs(func() error { return b.rewritePackedRefsWithoutRef(r.Name()) }) } func (b *fsBackend) Update(r, old *plumbing.Reference) error { var content string switch r.Type() { case plumbing.SymbolicReference: content = fmt.Sprintf("ref: %s\n", r.Target()) case plumbing.HashReference: content = fmt.Sprintln(r.Hash().String()) } fileName := filepath.Join(b.repoPath, r.Name().String()) lockName := fileName + ".lock" fd, err := openNotExists(lockName) if err != nil { if os.IsExist(err) { return plumbing.NewErrResourceLocked("reference", r.Name()) } return err } defer func() { _ = os.Remove(lockName) }() if err := b.checkReference(old); err != nil { _ = fd.Close() return err } if _, err := fd.WriteString(content); err != nil { _ = fd.Close() return err } _ = fd.Close() if err := os.Rename(lockName, fileName); err != nil { return err } return nil } func (b *fsBackend) rewritePackedRefs() error { // Gather all refs using addRefsFromRefDir and addRefsFromPackedRefs. db := &DB{cache: make(map[plumbing.ReferenceName]*plumbing.Reference), references: make([]*plumbing.Reference, 0, 100)} if err := b.addRefsFromRefDir(db); err != nil { return err } if len(db.references) == 0 { // Nothing to do! return nil } looseRefs := slices.Clone(db.references) if err := b.addRefsFromPackedRefs(db); err != nil { return err } var tempPackedRefs string defer func() { if len(tempPackedRefs) != 0 { _ = os.Remove(tempPackedRefs) } }() db.Sort() err := func() error { tmp, err := os.CreateTemp(b.repoPath, tmpPackedRefsPrefix) if err != nil { return err } defer tmp.Close() // nolint tempPackedRefs = tmp.Name() w := bufio.NewWriter(tmp) _, err = w.WriteString("# pack-refs with: sorted\n") if err != nil { return err } for _, ref := range db.references { _, err = w.WriteString(ref.String() + "\n") if err != nil { return err } } err = w.Flush() if err != nil { return err } return nil }() if err != nil { return err } packedRefs := filepath.Join(b.repoPath, packedRefsPath) if err := os.Rename(tempPackedRefs, packedRefs); err != nil { return err } for _, ref := range looseRefs { refPath := filepath.Join(b.repoPath, ref.Name().String()) err = os.Remove(refPath) if err != nil && !os.IsNotExist(err) { return err } } return nil } func (b *fsBackend) Packed() error { if err := b.lockPackedRefs(b.rewritePackedRefs); err != nil { return err } _ = b.prune() return nil } var ( pruneKeeps = map[string]bool{ "heads": true, "tags": true, "remotes": true, } ) func (b *fsBackend) prune() error { refsPath := filepath.Join(b.repoPath, "refs") entries, err := os.ReadDir(refsPath) if err != nil { return err } for _, e := range entries { if !e.IsDir() { continue } absPath := filepath.Join(refsPath, e.Name()) if err := pruneDirsDFS(absPath, pruneKeeps[e.Name()]); err != nil { return err } } return nil } func pruneDirsDFS(dir string, keep bool) error { empty := true entries, err := os.ReadDir(dir) if err != nil { return err } for _, e := range entries { if !e.IsDir() { empty = false continue } absPath := filepath.Join(dir, e.Name()) if err := pruneDirsDFS(absPath, false); err != nil { return err } } if !empty || keep { return nil } return os.Remove(dir) } ================================================ FILE: modules/zeta/refs/filesystem_test.go ================================================ package refs import ( "fmt" "os" "testing" "github.com/antgroup/hugescm/modules/plumbing" ) func TestBackend(t *testing.T) { repoPath := "/tmp/repo/zeta.zeta" _ = os.MkdirAll("/tmp/repo/zeta.zeta", 0755) b := NewBackend(repoPath) refs := []string{ "refs/heads/mainline", "refs/heads/dev", "refs/tags/v1.0.0", "refs/remotes/origin/master", } for _, r := range refs { err := b.Update(plumbing.NewHashReference(plumbing.ReferenceName(r), plumbing.NewHash("adba50d9794b9ef3f7ec8cbc680f7f1fa3fbf9df0ac8d1f9b9ccab6d941bc11b")), nil) if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) } } if err := b.Packed(); err != nil { fmt.Fprintf(os.Stderr, "packed refs error: %v\n", err) return } _ = b.Update(plumbing.NewHashReference(plumbing.ReferenceName("refs/heads/dev"), plumbing.NewHash("d84149926219c5a85da48051f2b3ad296f3ade3c5cb91dac4848d84de28c12dd")), nil) } func TestRemove(t *testing.T) { repoPath := "/tmp/repo/zeta.zeta" b := NewBackend(repoPath) _ = b.ReferenceRemove(plumbing.NewHashReference(plumbing.ReferenceName("refs/heads/dev"), plumbing.NewHash("d84149926219c5a85da48051f2b3ad296f3ade3c5cb91dac4848d84de28c12dd"))) } ================================================ FILE: modules/zeta/refs/references.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package refs import ( "sort" "github.com/antgroup/hugescm/modules/plumbing" ) // DB: References DB type DB struct { references []*plumbing.Reference cache map[plumbing.ReferenceName]*plumbing.Reference head *plumbing.Reference } func (d *DB) References() []*plumbing.Reference { return d.references } func (d *DB) Sort() { sort.Sort(plumbing.ReferenceSlice(d.references)) } func (d *DB) HEAD() *plumbing.Reference { return d.head } func (d *DB) Lookup(name string) *plumbing.Reference { for _, r := range refRevParseRules { if r, ok := d.cache[r.ReferenceName(name)]; ok { return r } } return nil } func (d *DB) Resolve(name plumbing.ReferenceName) (*plumbing.Reference, error) { for range MaxResolveRecursion { r := d.Lookup(string(name)) if r == nil { return nil, plumbing.ErrReferenceNotFound } if r.Type() == plumbing.HashReference { return r, nil } if r.Type() != plumbing.SymbolicReference { return nil, plumbing.ErrReferenceNotFound } } return nil, plumbing.ErrReferenceNotFound } // Return shorten unambiguous refname func (d *DB) ShortName(refname plumbing.ReferenceName, strict bool) string { for i := len(refRevParseRules) - 1; i > 0; i-- { var j int rulesToFail := 1 shortName := refRevParseRules[i].ShortName(string(refname)) if len(shortName) == 0 { continue } /* * in strict mode, all (except the matched one) rules * must fail to resolve to a valid non-ambiguous ref */ if strict { rulesToFail = len(refRevParseRules) } /* * check if the short name resolves to a valid ref, * but use only rules prior to the matched one */ for j = range rulesToFail { /* skip matched rule */ if i == j { continue } /* * the short name is ambiguous, if it resolves * (with this previous rule) to a valid ref * read_ref() returns 0 on success */ if d.Exists(refRevParseRules[j].ReferenceName(shortName)) { break } } /* * short name is non-ambiguous if all previous rules * haven't resolved to a valid ref */ if j == rulesToFail { return shortName } } return string(refname) } func (d *DB) Exists(refname plumbing.ReferenceName) bool { _, ok := d.cache[refname] return ok } func (d *DB) IsCurrent(refname plumbing.ReferenceName) bool { return d.head != nil && d.head.Name() == refname } ================================================ FILE: modules/zeta/refs/rules.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package refs import ( "strings" "github.com/antgroup/hugescm/modules/plumbing" ) // ReferencePrefixMatch: follow git's priority for finding refs // // https://git-scm.com/docs/git-rev-parse#Documentation/git-rev-parse.txt-emltrefnamegtemegemmasterememheadsmasterememrefsheadsmasterem // // https://github.com/git/git/blob/master/Documentation/revisions.txt type Rule struct { prefix string suffix string } func (r Rule) ReferenceName(name string) plumbing.ReferenceName { return plumbing.ReferenceName(r.prefix + name + r.suffix) } func (r Rule) ShortName(name string) string { if strings.HasPrefix(name, r.prefix) { return strings.TrimSuffix(name[len(r.prefix):], r.suffix) } return "" } var ( refRevParseRules = []*Rule{ {}, {prefix: "refs/"}, {prefix: "refs/tags/"}, {prefix: "refs/heads/"}, {prefix: "refs/remotes/"}, {prefix: "refs/remotes/", suffix: "/HEAD"}, } ) // RefRevParseRules are a set of rules to parse references into short names. // These are the same rules as used by git in shorten_unambiguous_ref. // See: https://github.com/git/git/blob/9857273be005833c71e2d16ba48e193113e12276/refs.c#L610 func RefRevParseRules() []*Rule { return refRevParseRules } ================================================ FILE: modules/zeta/refs/rules_test.go ================================================ package refs import ( "fmt" "os" "testing" "github.com/antgroup/hugescm/modules/plumbing" ) func TestRefRevParseRules(t *testing.T) { for _, r := range refRevParseRules { fmt.Fprintf(os.Stderr, "%s\n", r.ReferenceName("mainline")) } } func BenchmarkRepeat(b *testing.B) { for b.Loop() { for _, r := range refRevParseRules { _ = r.ReferenceName("mainline") } } } func BenchmarkRepeat2(b *testing.B) { for b.Loop() { for _, r := range plumbing.RefRevParseRules { _ = fmt.Sprintf(r, "mainline") } } } ================================================ FILE: pkg/command/README.md ================================================ # Zeta commands TODO ================================================ FILE: pkg/command/command.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "errors" "fmt" "github.com/antgroup/hugescm/pkg/kong" "github.com/antgroup/hugescm/pkg/version" ) type Globals struct { Verbose bool `short:"V" name:"verbose" help:"Make the operation more talkative"` Version VersionFlag `short:"v" name:"version" help:"Show version number and quit"` Values []string `short:"X" shortonly:"" help:"Override default configuration, format: ="` CWD string `name:"cwd" help:"Set the path to the repository worktree" placeholder:""` } type VersionFlag bool func (v VersionFlag) Decode(ctx *kong.DecodeContext) error { return nil } func (v VersionFlag) IsBool() bool { return true } func (v VersionFlag) BeforeApply(app *kong.Kong, vars kong.Vars) error { fmt.Println(version.GetVersionString()) app.Exit(0) return nil } var ( ErrArgRequired = errors.New("arg required") ) ================================================ FILE: pkg/command/command_add.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "github.com/antgroup/hugescm/modules/term" "github.com/antgroup/hugescm/pkg/zeta" ) // --chmod=(+|-)x // Add file contents to the index type Add struct { ALL bool `name:"all" short:"A" help:"Add changes from all tracked and untracked files"` DryRun bool `name:"dry-run" short:"n" help:"Dry run"` Update bool `name:"update" short:"u" help:"Update tracked files"` Chmod string `name:"chmod" help:"Override the executable bit of the listed files" placeholder:"(+|-)x"` PathSpec []string `arg:"" optional:"" name:"pathspec" help:"Path specification, similar to Git path matching mode"` } func (a *Add) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() if a.ALL { if err := w.AddWithOptions(context.Background(), &zeta.AddOptions{All: true, DryRun: a.DryRun}); err != nil { diev("zeta add all error: %v\n", err) return err } return nil } switch a.Chmod { case "": // ignore case "+x": return w.Chmod(context.Background(), a.PathSpec, true, a.DryRun) case "-x": return w.Chmod(context.Background(), a.PathSpec, false, a.DryRun) default: diev("--chmod param '%s' must be either -x or +x\n", a.Chmod) return errors.New("bad chmod") } if a.Update { if err := w.AddTracked(context.Background(), slashPaths(a.PathSpec), a.DryRun); err != nil { diev("zeta add --update error: %v", err) return err } return nil } if len(a.PathSpec) == 0 { _, _ = term.Fprintf(os.Stderr, "%s\n\x1b[33m%s\x1b[0m\n", W("Nothing specified, nothing added."), W("hint: Maybe you wanted to say 'zeta add .'?")) return errors.New("nothing specified, nothing added") } if err := w.Add(context.Background(), slashPaths(a.PathSpec), a.DryRun); err != nil { fmt.Fprintf(os.Stderr, "zeta add error: %v\n", err) return err } return nil } ================================================ FILE: pkg/command/command_branch.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "github.com/antgroup/hugescm/pkg/zeta" ) type Branch struct { ShowCurrent bool `name:"show-current" help:"Show current branch name"` List bool `name:"list" short:"l" help:"List branches. With optional ..."` Copy bool `name:"copy" short:"c" help:"Copy a branch and its reflog"` ForceCopy bool `short:"C" shortonly:"" help:"Copy a branch, even if target exists"` Delete bool `name:"delete" short:"d" help:"Delete fully merged branch"` ForceDelete bool `short:"D" shortonly:"" help:"Delete branch (even if not merged)"` Move bool `name:"move" short:"m" help:"Move/rename a branch and its reflog"` ForceMove bool `short:"M" shortonly:"" help:"Move/rename a branch, even if target exists"` Force bool `name:"force" short:"f" help:"Force creation, move/rename, deletion"` Args []string `arg:"" optional:"" name:"args" help:""` } const ( branchSummaryFormat = `%szeta branch [] [-f] [] %szeta branch [] [-l] [...] %szeta branch [] (-d | -D) ... %szeta branch [] (-m | -M) [] %szeta branch [] (-c | -C) [] %szeta branch --show-current` ) func (b *Branch) Summary() string { or := W(" or: ") return fmt.Sprintf(branchSummaryFormat, W("Usage: "), or, or, or, or, or) } func (b *Branch) IsMove() bool { return b.ForceMove || b.Move } func (b *Branch) IsDelete() bool { return b.ForceDelete || b.Delete } func (b *Branch) IsForceMove() bool { return b.ForceMove || b.Force } func (b *Branch) IsForceDelete() bool { return b.ForceDelete || b.Force } func (b *Branch) IsForceCopy() bool { return b.ForceCopy || b.Force } func (b *Branch) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint if b.ShowCurrent { return r.ShowCurrent(os.Stdout) } if b.List { return r.ListBranch(context.Background(), b.Args) } if b.IsMove() { if len(b.Args) < 2 { diev("branch name required, eg: zeta branch --move ") return ErrArgRequired } return r.MoveBranch(b.Args[0], b.Args[1], b.IsForceMove()) } if b.IsDelete() { if len(b.Args) < 1 { diev("branch name required, eg: zeta branch --delete ") return ErrArgRequired } return r.RemoveBranch(b.Args, b.IsForceDelete()) } if len(b.Args) == 0 { return r.ListBranch(context.Background(), nil) } from := "HEAD" if len(b.Args) >= 2 { from = b.Args[1] } return r.CreateBranch(context.Background(), b.Args[0], from, b.IsForceCopy(), false) } ================================================ FILE: pkg/command/command_cat.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "github.com/antgroup/hugescm/pkg/zeta" ) type Cat struct { Object string `arg:"" name:"object" help:"The name of the object to show"` Type bool `name:"type" short:"t" help:"Show object type"` Size bool `name:"size" short:"s" help:"Show object size"` Verify bool `name:"verify" help:"Verify object hash"` Textconv bool `name:"textconv" help:"Converting text to Unicode"` JSON bool `name:"json" short:"j" help:"Returns data as JSON; limited to commits, trees, fragments, and tags"` Direct bool `name:"direct" help:"View files directly"` Limit int64 `name:"limit" short:"L" help:"Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" default:"-1" type:"size"` Output string `name:"output" help:"Output to a specific file instead of stdout" placeholder:""` } func (c *Cat) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint return r.Cat(context.Background(), &zeta.CatOptions{ Object: c.Object, Limit: c.Limit, Type: c.Type, PrintSize: c.Size, Textconv: c.Textconv, Direct: c.Direct, PrintJSON: c.JSON, Verify: c.Verify, Output: c.Output, }) } ================================================ FILE: pkg/command/command_check_ignore.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "github.com/antgroup/hugescm/pkg/zeta" ) // Debug gitignore / exclude files // https://git-scm.com/docs/git-check-ignore type CheckIgnore struct { Stdin bool `name:"stdin" help:"Read file names from stdin"` Z bool `short:"z" shortonly:"" help:"Terminate input and output records by a NUL character"` JSON bool `name:"json" short:"j" help:"Data will be returned in JSON format"` Paths []string `arg:"" name:"pathname" optional:"" help:"Pathname given via the command-line"` } const ( ciSummaryFormat = `%szeta check-ignore [] ... %szeta check-ignore [] --stdin` ) func (c *CheckIgnore) Summary() string { or := W(" or: ") return fmt.Sprintf(ciSummaryFormat, W("Usage: "), or) } func (c *CheckIgnore) Run(g *Globals) error { if c.Stdin { if len(c.Paths) > 0 { die("cannot specify pathnames with --stdin") return ErrFlagsIncompatible } } else { if c.Z { die("-z only makes sense with --stdin") return ErrFlagsIncompatible } if len(c.Paths) == 0 { die("no path specified") return ErrFlagsIncompatible } } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() return w.DoCheckIgnore(context.Background(), &zeta.CheckIgnoreOption{ Paths: slashPaths(c.Paths), Stdin: c.Stdin, Z: c.Z, JSON: c.JSON, }) } ================================================ FILE: pkg/command/command_checkout.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/transport" "github.com/antgroup/hugescm/pkg/zeta" ) type Checkout struct { Branch string `name:"branch" short:"b" help:"Direct the new HEAD to the branch after checkout" placeholder:""` TagName string `name:"tag" short:"t" help:"Direct the new HEAD to the tag's commit after checkout" placeholder:""` Refname string `name:"refname" help:"Direct the new HEAD to the ref's commit after checkout" placeholder:""` Commit string `name:"commit" help:"Direct the new HEAD to the branch after checkout" placeholder:""` Sparse []string `name:"sparse" short:"s" help:"A subset of repository files, all files are checked out by default" placeholder:""` Limit int64 `name:"limit" short:"L" help:"Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" default:"-1" type:"size"` Batch bool `name:"batch" help:"Get and checkout files for each provided on stdin"` Snapshot bool `name:"snapshot" help:"Checkout a non-editable snapshot"` Depth int `name:"depth" help:"Create a shallow clone with a history truncated to the specified number of commits" default:"1"` One bool `name:"one" help:"Checkout large files one after another"` Quiet bool `name:"quiet" help:"Operate quietly. Progress is not reported to the standard error stream"` Args []string `arg:"" optional:""` passthroughArgs []string `kong:"-"` } const ( coSummaryFormat = `%szeta checkout (co) [--branch|--tag] [--commit] [--sparse] [--limit] [] %szeta checkout (co) %szeta checkout (co) [] -- ... %szeta checkout (co) --batch [] %szeta checkout (co) []` ) func (c *Checkout) Summary() string { or := W(" or: ") return fmt.Sprintf(coSummaryFormat, W("Usage: "), or, or, or, or) } func (c *Checkout) Passthrough(paths []string) { c.passthroughArgs = append(c.passthroughArgs, paths...) } func (c *Checkout) doRemote(g *Globals, remote, destination string) error { if c.One && c.Limit != -1 { diev("--one is not compatible with --limit N") return ErrFlagsIncompatible } if len(c.TagName) != 0 && (len(c.Branch) != 0 || len(c.Commit) != 0) { diev("--tag is not compatible with --branch or --commit") return ErrFlagsIncompatible } r, err := zeta.New(context.Background(), &zeta.NewOptions{ Remote: remote, Branch: c.Branch, TagName: c.TagName, Refname: c.Refname, Commit: c.Commit, Destination: destination, SparseDirs: c.Sparse, Snapshot: c.Snapshot, SizeLimit: c.Limit, Values: g.Values, One: c.One, Depth: c.Depth, Quiet: c.Quiet, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint if err := r.Postflight(context.Background()); err != nil { fmt.Fprintf(os.Stderr, "postflight: prune objects error: %v\n", err) return err } return nil } func (c *Checkout) destination() string { if len(c.Args) >= 2 { return c.Args[1] } if len(c.passthroughArgs) > 0 { return c.passthroughArgs[0] } return "" } func (c *Checkout) revision() string { if len(c.Args) != 0 { return c.Args[0] } return "HEAD" } func (c *Checkout) runCompatibleCheckout0(r *zeta.Repository, worktreeOnly bool, branchName plumbing.ReferenceName, oid plumbing.Hash, pathSpec []string) error { w := r.Worktree() if len(pathSpec) != 0 { if err := w.DoPathCo(context.Background(), worktreeOnly, oid, pathSpec); err != nil { if oid, ok := plumbing.AsNoSuchObjectErr(err); ok { fmt.Fprintf(os.Stderr, "zeta checkout: missing object: %s\ntry download it: zeta cat -t %s\n", oid, oid) return err } fmt.Fprintf(os.Stderr, "zeta checkout: checkout files error: %v\n", err) return err } return nil } trace.DbgPrint("compatible checkout") opts := &zeta.CheckoutOptions{Branch: branchName, Merge: false, Force: false} if len(branchName) == 0 { opts.Hash = oid } if err := w.Checkout(context.Background(), opts); err != nil { if !errors.Is(err, zeta.ErrAborting) { target := string(branchName) if len(target) == 0 { target = oid.String() } diev("checkout to '%s' error: %v", target, err) } return err } return nil } func (c *Checkout) runCompatibleCheckout(r *zeta.Repository) error { pathSpec := make([]string, 0, len(c.Args)) // zeta checkout [] if len(c.Args) == 0 { pathSpec = append(pathSpec, c.passthroughArgs...) head, err := r.Current() if err != nil { diev("checkout resolve HEAD error: %v", err) return err } return c.runCompatibleCheckout0(r, true, head.Name(), head.Hash(), pathSpec) } rev, refname, err := r.RevisionEx(context.Background(), c.Args[0]) if zeta.IsErrUnknownRevision(err) { pathSpec = append(pathSpec, c.Args...) pathSpec = append(pathSpec, c.passthroughArgs...) head, err := r.Current() if err != nil { return err } trace.DbgPrint("resolve HEAD: %s", head.Name()) return c.runCompatibleCheckout0(r, true, head.Name(), head.Hash(), slashPaths(pathSpec)) } if err != nil { fmt.Fprintf(os.Stderr, "zeta checkout: resolve revision error: %v\n", err) return err } // zeta checkout [] trace.DbgPrint("resolve revision: %s", rev) pathSpec = append(pathSpec, c.Args[1:]...) pathSpec = append(pathSpec, c.passthroughArgs...) var worktreeOnly bool if len(pathSpec) != 0 { worktreeOnly = r.IsCurrent(refname) } return c.runCompatibleCheckout0(r, worktreeOnly, refname, rev, slashPaths(pathSpec)) } func (c *Checkout) Run(g *Globals) error { if len(c.Args) > 0 && transport.IsRemoteEndpoint(c.Args[0]) { return c.doRemote(g, c.Args[0], c.destination()) } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Verbose: g.Verbose, }) if err != nil { return err } defer func() { if err := r.Postflight(context.Background()); err != nil { fmt.Fprintf(os.Stderr, "postflight: prune objects error: %v\n", err) } _ = r.Close() }() if c.Batch { w := r.Worktree() if err := w.DoBatchCo(context.Background(), c.One, c.revision(), os.Stdin); err != nil { fmt.Fprintf(os.Stderr, "zeta checkout --batch error: %v\n", err) return err } return nil } if c.One { diev("--one is not compatible with checkout revision or files") return ErrFlagsIncompatible } if err := c.runCompatibleCheckout(r); err != nil { return err } return nil } ================================================ FILE: pkg/command/command_cherry_pick.go ================================================ package command import ( "context" "github.com/antgroup/hugescm/pkg/zeta" ) // Apply the changes introduced by some existing commit type CherryPick struct { Revision string `arg:"" optional:"" name:"revision" help:"Existing commit" placeholder:""` Abort bool `name:"abort" help:"Abort and checkout the original branch"` Continue bool `name:"continue" help:"Continue"` } func (c *CherryPick) Run(g *Globals) error { if c.Abort && c.Continue { diev("--abort is not compatible with --continue") return ErrFlagsIncompatible } if !c.Abort && !c.Continue && len(c.Revision) == 0 { die("missing revision arg") return ErrArgRequired } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() if err := w.CherryPick(context.Background(), &zeta.CherryPickOptions{ From: c.Revision, Abort: c.Abort, Continue: c.Continue, }); err != nil { return err } return nil } ================================================ FILE: pkg/command/command_clean.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "github.com/antgroup/hugescm/pkg/zeta" ) type Clean struct { DryRun bool `name:"dry-run" short:"n" help:"dry run"` Force bool `name:"force" short:"f" help:"force"` Dir bool `short:"d" shortonly:"" help:"Remove whole directories"` ALL bool `short:"x" shortonly:"" help:"Remove ignored files, too"` } func (c *Clean) Run(g *Globals) error { if !c.DryRun && !c.Force { die("refusing to clean, please specify at least -f or -n") return errors.New("refusing to clean") } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() if err := w.Clean(context.Background(), &zeta.CleanOptions{DryRun: c.DryRun, Dir: c.Dir, All: c.ALL}); err != nil { fmt.Fprintf(os.Stderr, "zeta clean error: %v\n", err) return err } return nil } ================================================ FILE: pkg/command/command_commit.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/zeta" ) type Commit struct { Message []string `name:"message" short:"m" help:"Use the given message as the commit message. Concatenate multiple -m options as separate paragraphs" placeholder:""` File string `name:"file" short:"F" help:"Take the commit message from the given file. Use - to read the message from the standard input" placeholder:""` All bool `name:"all" short:"a" help:"Automatically stage modified and deleted files, but newly untracked files remain unaffected"` AllowEmpty bool `name:"allow-empty" help:"Allow creating a commit with the exact same tree structure as its parent commit"` AllowEmptyMessage bool `name:"allow-empty-message" help:"Like --allow-empty this command is primarily for use by foreign SCM interface scripts"` Amend bool `name:"amend" help:"Replace the tip of the current branch by creating a new commit"` } func (c *Commit) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() opts := &zeta.CommitOptions{ All: c.All, AllowEmptyCommits: c.AllowEmpty, AllowEmptyMessage: c.AllowEmptyMessage, Amend: c.Amend, Message: c.Message, File: c.File, } oid, err := w.Commit(context.Background(), opts) if err != nil { if errors.Is(err, zeta.ErrMissingAuthor) { fmt.Fprintf(os.Stderr, `zeta commit: %s %s %s zeta config --global user.email "you@example.com" zeta config --global user.name "Your Name" %s %s `, W("Author identity unknown"), W("*** Please tell me who you are."), W("Run"), W("to set your account's default identity."), W("Omit --global to set the identity only in this repository.")) return err } else if errors.Is(err, zeta.ErrNotAllowEmptyMessage) { fmt.Fprintln(os.Stderr, W("Aborting commit due to empty commit message.")) return err } else if errors.Is(err, zeta.ErrNoChanges) { fmt.Fprintln(os.Stderr, W("nothing to commit, working tree clean")) return err } else if errors.Is(err, zeta.ErrNothingToCommit) { return err } else { fmt.Fprintf(os.Stderr, "zeta commit error: %v\n", err) return err } } trace.DbgPrint("create commit: %s\n", oid.String()) return w.Stats(context.Background()) } ================================================ FILE: pkg/command/command_config.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "errors" "strings" "github.com/antgroup/hugescm/pkg/zeta" ) type Config struct { Args []string `arg:"" name:"args" optional:"" help:"Name and value, support: appears in pairs or , eg: zeta config K1=V1 K2=V2"` System bool `name:"system" help:"Use system config file"` Global bool `name:"global" help:"Only read or write to global ~/.zeta.toml"` Local bool `name:"local" help:"Only read or write to repository .zeta/zeta.toml, which is the default behavior when writing"` Unset bool `name:"unset" short:"u" help:"Remove the line matching the key from config file"` List bool `name:"list" short:"l" help:"List all variables set in config file, along with their values"` Get bool `name:"get" help:"Get the value for a given Key"` GetALL bool `name:"get-all" help:"Get all values for a given Key"` Add bool `name:"add" help:"Add a new variable: name value"` Z bool `short:"z" shortonly:"" help:"Terminate values with NUL byte"` Type string `name:"type" short:"T" help:"zeta config will ensure that any input or output is valid under the given type constraint(s), support: bool, int, float, date" placeholder:""` } func (c *Config) Run(g *Globals) error { if c.List { if len(c.Args) != 0 { die("wrong number of arguments, should be 0") return errors.New("wrong number of arguments, should be 0") } return zeta.ListConfig(&zeta.ListConfigOptions{ System: c.System, Global: c.Global, Local: c.Local, Z: c.Z, CWD: g.CWD, Values: g.Values, }) } if c.Get { return zeta.GetConfig(&zeta.GetConfigOptions{ System: c.System, Global: c.Global, Local: c.Local, Z: c.Z, Keys: c.Args, CWD: g.CWD, Values: g.Values, }) } if c.GetALL { return zeta.GetConfig(&zeta.GetConfigOptions{ System: c.System, Global: c.Global, Local: c.Local, ALL: true, Z: c.Z, Keys: c.Args, CWD: g.CWD, Values: g.Values, }) } if c.Unset { return zeta.UnsetConfig(&zeta.UnsetConfigOptions{ System: c.System, Global: c.Global, Keys: c.Args, CWD: g.CWD, }) } if len(c.Args) == 1 { kv := c.Args[0] if strings.IndexByte(kv, '=') == -1 { return zeta.GetConfig(&zeta.GetConfigOptions{ System: c.System, Global: c.Global, Local: c.Local, Z: c.Z, Keys: c.Args, CWD: g.CWD, Values: g.Values, }) } } return zeta.UpdateConfig(&zeta.UpdateConfigOptions{ System: c.System, Global: c.Global, Add: c.Add, NameAndValues: c.Args, Type: c.Type, CWD: g.CWD, }) } ================================================ FILE: pkg/command/command_diff.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "fmt" "os" "path/filepath" "strings" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/modules/zeta/object" "github.com/antgroup/hugescm/pkg/zeta" ) type Diff struct { NoIndex bool `name:"no-index" help:"Compares two given paths on the filesystem"` Nav bool `name:"nav" negatable:"" help:"Use built-in interactive navigation view"` NameOnly bool `name:"name-only" help:"Show only names of changed files"` NameStatus bool `name:"name-status" help:"Show names and status of changed files"` Numstat bool `name:"numstat" help:"Show numeric diffstat instead of patch"` Stat bool `name:"stat" help:"Show diffstat instead of patch"` Shortstat bool `name:"shortstat" help:"Output only the last line of --stat format"` Z bool `short:"z" shortonly:"" help:"Output diff-raw with lines terminated with NUL"` Staged bool `name:"staged" help:"Compare the differences between the staging area and "` Cached bool `name:"cached" help:"Compare the differences between the staging area and "` Textconv bool `name:"textconv" help:"Converting text to Unicode"` MergeBase string `name:"merge-base" help:"If --merge-base is given, use the common ancestor of and HEAD instead" placeholder:""` Histogram bool `name:"histogram" help:"Generate a diff using the \"Histogram diff\" algorithm"` ONP bool `name:"onp" help:"Generate a diff using the \"O(NP) diff\" algorithm"` Myers bool `name:"myers" help:"Generate a diff using the \"Myers diff\" algorithm"` Patience bool `name:"patience" help:"Generate a diff using the \"Patience diff\" algorithm"` Minimal bool `name:"minimal" help:"Spend extra time to make sure the smallest possible diff is produced"` DiffAlgorithm string `name:"diff-algorithm" help:"Choose a diff algorithm, supported: histogram|onp|myers|patience|minimal" placeholder:""` Output string `name:"output" help:"Output to a specific file instead of stdout" placeholder:""` From string `arg:"" optional:"" name:"from" help:""` To string `arg:"" optional:"" name:"to" help:""` passthroughArgs []string `kong:"-"` } const ( diffSummaryFormat = `%s zeta diff [] [] [--] [...] %s zeta diff [] --cached [] [--] [...] %s zeta diff [] [--] [...] %s zeta diff [] ... [--] [...] %s zeta diff [] %s zeta diff [] --no-index [--] ` ) func (c *Diff) Summary() string { or := W(" or: ") return fmt.Sprintf(diffSummaryFormat, W("Usage: "), or, or, or, or, or) } func (c *Diff) NewLine() byte { if c.Z { return '\x00' } return '\n' } func (c *Diff) Passthrough(paths []string) { c.passthroughArgs = append(c.passthroughArgs, paths...) } func (c *Diff) checkAlgorithm() (diferenco.Algorithm, error) { if len(c.DiffAlgorithm) != 0 { return diferenco.AlgorithmFromName(c.DiffAlgorithm) } switch { case c.Histogram: return diferenco.Histogram, nil case c.ONP: return diferenco.ONP, nil case c.Myers: return diferenco.Myers, nil case c.Patience: return diferenco.Patience, nil case c.Minimal: return diferenco.Minimal, nil default: } return diferenco.Unspecified, nil } func (c *Diff) NewOptions() (*zeta.DiffOptions, error) { a, err := c.checkAlgorithm() if err != nil { return nil, err } opts := &zeta.DiffOptions{ Nav: c.Nav && len(c.Output) == 0, NameOnly: c.NameOnly, NameStatus: c.NameStatus, Numstat: c.Numstat, Stat: c.Stat, Shortstat: c.Shortstat, NewLine: c.NewLine(), NewOutput: c.NewOutput, PathSpec: slashPaths(c.passthroughArgs), From: c.From, To: c.To, Staged: c.Staged || c.Cached, MergeBase: c.MergeBase, Textconv: c.Textconv, Algorithm: a, } if len(c.To) == 0 { if from, to, ok := strings.Cut(c.From, "..."); ok { opts.From = from opts.To = to opts.ThreeWay = true return opts, nil } if from, to, ok := strings.Cut(c.From, ".."); ok { opts.From = from opts.To = to return opts, nil } } return opts, nil } func (c *Diff) NewOutput(ctx context.Context) (zeta.Printer, error) { if len(c.Output) != 0 { if err := os.MkdirAll(filepath.Dir(c.Output), 0755); err != nil { return nil, err } fd, err := os.Create(c.Output) if err != nil { return nil, err } return &zeta.WrapPrinter{WriteCloser: fd}, nil } if c.Nav { return zeta.NewBuiltinPrinter(ctx), nil } return zeta.NewPrinter(ctx), nil } func (c *Diff) render(u *diferenco.Patch) error { opts := &zeta.DiffOptions{ NameOnly: c.NameOnly, NameStatus: c.NameStatus, Numstat: c.Numstat, Stat: c.Stat, Shortstat: c.Shortstat, NewLine: c.NewLine(), NewOutput: c.NewOutput, NoRename: true, } switch { case c.Numstat, c.Stat, c.Shortstat: s := u.Stat() name := c.From if c.From != c.To { name = object.PathRenameCombine(c.From, c.To) } return opts.ShowStats(context.Background(), object.FileStats{ object.FileStat{ Name: name, Addition: s.Addition, Deletion: s.Deletion, }, }) default: return opts.ShowPatch(context.Background(), []*diferenco.Patch{u}) } } func (c *Diff) nameStatus() error { w, err := c.NewOutput(context.Background()) if err != nil { return err } defer w.Close() // nolint if c.NameOnly { _, _ = fmt.Fprintf(w, "%s%c", c.From, c.NewLine()) return nil } _, _ = fmt.Fprintf(w, "%c %s%c", 'M', c.To, c.NewLine()) return nil } func (c *Diff) diffNoIndex() error { if len(c.From) == 0 || len(c.To) == 0 { die("missing arg, example: zeta diff --no-index from to") return ErrArgRequired } c.From = cleanPath(c.From) c.To = cleanPath(c.To) if c.NameOnly || c.NameStatus { return c.nameStatus() } a, err := c.checkAlgorithm() if err != nil { fmt.Fprintf(os.Stderr, "zeta diff --no-index: parse options error: %v\n", err) return err } trace.DbgPrint("from %s to %s", c.From, c.To) from, err := zeta.ReadContent(c.From, c.Textconv) if err != nil { diev("zeta diff --no-index hash error: %v", err) return err } to, err := zeta.ReadContent(c.To, c.Textconv) if err != nil && !errors.Is(err, diferenco.ErrBinaryData) { diev("zeta diff --no-index read text error: %v", err) return err } if from.IsBinary || to.IsBinary { return c.render(&diferenco.Patch{ From: &diferenco.File{Name: c.From, Hash: from.Hash, Mode: uint32(from.Mode)}, To: &diferenco.File{Name: c.To, Hash: to.Hash, Mode: uint32(to.Mode)}, IsBinary: true, }) } if from.Hash == to.Hash { return c.render(&diferenco.Patch{ From: &diferenco.File{Name: c.From, Hash: from.Hash, Mode: uint32(from.Mode)}, To: &diferenco.File{Name: c.To, Hash: to.Hash, Mode: uint32(to.Mode)}, IsBinary: false, }) } u, err := diferenco.Unified(context.Background(), &diferenco.Options{ From: &diferenco.File{Name: c.From, Hash: from.Hash, Mode: uint32(from.Mode)}, To: &diferenco.File{Name: c.To, Hash: to.Hash, Mode: uint32(to.Mode)}, S1: from.Text, S2: to.Text, A: a, }) if err != nil { diev("zeta diff --no-index error: %v", err) return err } return c.render(u) } func (c *Diff) Run(g *Globals) error { if c.NoIndex { return c.diffNoIndex() } if _, _, err := zeta.FindZetaDir(g.CWD); err != nil { return c.diffNoIndex() } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() opts, err := c.NewOptions() if err != nil { fmt.Fprintf(os.Stderr, "parse options error: %v\n", err) return err } if err = w.DiffContext(context.Background(), opts); err != nil { return err } return nil } ================================================ FILE: pkg/command/command_fetch.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "github.com/antgroup/hugescm/pkg/zeta" ) // In the design of HugeSCM, we have abandoned the philosophy of git where the retrieval of repository data should be minimalistic, that is, to fetch only what is needed. Therefore, // when implementing the fetch feature, it's important to adhere to the principle that zeta fetch will not support fetching all data at once, // but will only support fetching specific reference metadata and particular objects. type Fetch struct { Name string `arg:"" optional:"" name:"name" help:"Reference or commit to be downloaded"` Unshallow bool `name:"unshallow" help:"Get complete history"` Tag bool `name:"tag" short:"t" help:"Download tags instead of branches only when refname is incomplete"` // Limit int64 `name:"limit" short:"L" help:"Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" default:"-1" type:"size"` Force bool `name:"force" short:"f" help:"Override reference update check"` } const ( fetchSummaryFormat = `%szeta fetch [reference] [--unshallow] [--tag] [--skip-larges]` ) func (c *Fetch) Summary() string { return fmt.Sprintf(fetchSummaryFormat, W("Usage: ")) } func (c *Fetch) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint _, err = r.DoFetch(context.Background(), &zeta.DoFetchOptions{ Name: c.Name, Unshallow: c.Unshallow, Limit: c.Limit, Tag: c.Tag, FetchAlways: true, }) return err } ================================================ FILE: pkg/command/command_for_each_ref.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "github.com/antgroup/hugescm/pkg/zeta" ) // Output information on each ref type ForEachRef struct { JSON bool `name:"json" short:"j" help:"Data will be returned in JSON format"` Sort string `name:"sort" help:"Field name to sort on" placeholder:""` Pattern []string `arg:"" optional:"" name:"pattern" help:"If given, only refs matching at least one pattern are shown"` } func (c *ForEachRef) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint return r.ForEachReference(context.Background(), &zeta.ForEachReferenceOptions{ FormatJSON: c.JSON, Order: c.Sort, Pattern: c.Pattern, }) } ================================================ FILE: pkg/command/command_gc.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "time" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/zeta" ) type GC struct { Prune time.Duration `name:"prune" help:"Pruning objects older than specified date (default is 2 weeks ago, configurable with gc.pruneExpire)" type:"expire" default:"2.weeks.ago"` Quiet bool `name:"quiet" help:"Operate quietly. Progress is not reported to the standard error stream"` } func (c *GC) Run(g *Globals) error { trace.DbgPrint("prune: %v", c.Prune) r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, Quiet: c.Quiet, }) if err != nil { return err } defer r.Close() // nolint return r.Gc(context.Background(), &zeta.GcOptions{Prune: c.Prune}) } ================================================ FILE: pkg/command/command_hash_object.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "io" "os" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/pkg/zeta" ) type HashObject struct { W bool `short:"w" shortonly:"" help:"Write the object into the object database"` Stdin bool `name:"stdin" help:"Read the object from stdin"` Path string `name:"path" help:"Process file as it were from this path" placeholder:""` } func (c *HashObject) Run(g *Globals) error { if !c.W { return c.hashObject() } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint if c.Stdin { oid, err := r.ODB().HashTo(context.Background(), os.Stdin, -1) if err != nil { diev("hash-object error: %v", err) return err } _, _ = fmt.Fprintln(os.Stdout, oid) return nil } if len(c.Path) == 0 { diev("require --stdin or --path") return ErrArgRequired } fd, err := os.Open(c.Path) if err != nil { diev("open %s error: %v", c.Path, err) return err } defer fd.Close() // nolint si, err := fd.Stat() if err != nil { diev("stat %s error: %v", c.Path, err) return err } oid, _, err := r.HashTo(context.Background(), fd, si.Size()) if err != nil { diev("hash-object error: %v", err) return err } _, _ = fmt.Fprintln(os.Stdout, oid) return nil } func (c *HashObject) hashObject() error { var r io.Reader switch { case c.Stdin: r = os.Stdin case len(c.Path) != 0: fd, err := os.Open(c.Path) if err != nil { diev("open %s error: %v", c.Path, err) return err } defer fd.Close() // nolint r = fd default: diev("require --stdin or --path") return ErrArgRequired } h := plumbing.NewHasher() if _, err := io.Copy(h, r); err != nil { diev("hash-object error: %v", err) return err } _, _ = fmt.Fprintln(os.Stdout, h.Sum()) return nil } ================================================ FILE: pkg/command/command_init.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "github.com/antgroup/hugescm/modules/plumbing" "github.com/antgroup/hugescm/modules/zeta/config" "github.com/antgroup/hugescm/pkg/transport" "github.com/antgroup/hugescm/pkg/zeta" ) type Init struct { Branch string `name:"branch" short:"b" help:"Override the name of the initial branch" default:"mainline" placeholder:""` Remote string `name:"remote" help:"Initialize and start tracking a new repository" placeholder:""` Directory string `arg:"" name:"directory" help:"Repository directory"` } func (c *Init) Run(g *Globals) error { if len(c.Branch) != 0 { if !plumbing.ValidateBranchName([]byte(c.Branch)) { diev("'%s' is not a valid branch name", c.Branch) return &zeta.ErrExitCode{ExitCode: 129} } } if worktree, _, err := zeta.FindZetaDir(c.Directory); err == nil { diev("Directory '%s' is already managed by zeta", worktree) return &zeta.ErrExitCode{ExitCode: 127} } r, err := zeta.Init(context.Background(), &zeta.InitOptions{ Branch: c.Branch, Worktree: c.Directory, MustEmpty: false, Verbose: g.Verbose}) if err != nil { return err } defer r.Close() // nolint if len(c.Remote) != 0 { e, err := transport.NewEndpoint(c.Remote, nil) if err != nil { fmt.Fprintf(os.Stderr, "zeta remote set remote to '%s' error: %v\n", c.Remote, err) return err } newRemote := e.String() if err := config.UpdateLocal(r.ZetaDir(), &config.UpdateOptions{ Values: map[string]any{ "core.remote": newRemote, }, }); err != nil { fmt.Fprintf(os.Stderr, "zeta remote set remote to '%s' error: %v\n", newRemote, err) return err } } return nil } ================================================ FILE: pkg/command/command_log.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "github.com/antgroup/hugescm/pkg/zeta" ) // --since=, --after= // Show commits more recent than a specific date. // --since-as-filter= // Show all commits more recent than a specific date. This visits all commits in the range, rather than stopping at the first commit which is older than a specific date. // --until=, --before= // Show commits older than a specific date. // --author=, --committer= // Limit the commits output to ones with author/committer header lines that match the specified pattern (regular expression). With more than one --author=, commits whose author matches any of // the given patterns are chosen (similarly for multiple --committer=). type Log struct { Revision string `arg:"" optional:"" name:"revision-range" help:"Revision range"` DateOrder bool `name:"date-order" help:"Order by committer date"` AuthorDateOrder bool `name:"author-date-order" help:"Order by author date"` Reverse bool `name:"reverse" help:"Reverse order"` FirstParent bool `name:"first-parent" help:"Follow only the first parent commit upon seeing a merge commit"` JSON bool `name:"json" short:"j" help:"Data will be returned in JSON format"` paths []string `kong:"-"` } const ( logSummaryFormat = `%szeta log [] [] [[--] ...]` ) func (c *Log) Summary() string { return fmt.Sprintf(logSummaryFormat, W("Usage: ")) } func (c *Log) Passthrough(paths []string) { c.paths = append(c.paths, paths...) } func (c *Log) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint opts := &zeta.LogCommandOptions{ Revision: c.Revision, Order: zeta.LogOrderTopo, // --topo-order OrderByCommitterDate: c.DateOrder, OrderByAuthorDate: c.AuthorDateOrder, Paths: slashPaths(c.paths), Reverse: c.Reverse, FormatJSON: c.JSON, } switch { case c.DateOrder || c.AuthorDateOrder: opts.Order = zeta.LogOrderBFS // order --> DATE: switch to BFS and sort by committer time case c.FirstParent: opts.Order = zeta.LogOrderDFSPostFirstParent } if err := r.Log(context.Background(), opts); err != nil { return err } return nil } ================================================ FILE: pkg/command/command_ls_files.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "github.com/antgroup/hugescm/pkg/zeta" ) type LsFiles struct { Cached bool `name:"cached" short:"c" help:"Show cached files in the output (default)"` Deleted bool `name:"deleted" short:"d" help:"Show deleted files in the output"` Modified bool `name:"modified" short:"m" help:"Show modified files in the output"` Others bool `name:"others" short:"o" help:"Show other files in the output"` Stage bool `name:"stage" short:"s" help:"Show staged contents' object name in the output"` Z bool `short:"z" shortonly:"" help:"Terminate entries with NUL byte"` JSON bool `name:"json" short:"j" help:"Data will be returned in JSON format"` Paths []string `arg:"" name:"path" optional:"" help:"Given paths, show as match patterns; else, use root as sole argument"` } func (c *LsFiles) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() opts := &zeta.LsFilesOptions{ Z: c.Z, JSON: c.JSON, Paths: slashPaths(c.Paths), } switch { case c.Stage: opts.Mode = zeta.ListFilesStage case c.Deleted: opts.Mode = zeta.ListFilesDeleted case c.Modified: opts.Mode = zeta.ListFilesModified case c.Others: opts.Mode = zeta.ListFilesOthers } if err := w.LsFiles(context.Background(), opts); err != nil { diev("zeta ls-files error: %v", err) return err } return nil } ================================================ FILE: pkg/command/command_ls_tree.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "os" "github.com/antgroup/hugescm/pkg/zeta" ) type LsTree struct { OnlyTrees bool `short:"d" shortonly:"" help:"Only show trees"` Recurse bool `short:"r" shortonly:"" help:"Recurse into subtrees"` Tree bool `short:"t" shortonly:"" help:"Show trees when recursing"` Z bool `short:"z" shortonly:"" help:"Terminate entries with NUL byte"` Long bool `name:"long" short:"l" help:"Include object size"` NameOnly bool `name:"name-only" alias:"name-status" help:"List only filenames"` Abbrev int `name:"abbrev" help:"Use digits to display object names" placeholder:""` JSON bool `name:"json" short:"j" help:"Data will be returned in JSON format"` Revision string `arg:"" name:"tree-ish" help:"ID of a tree-ish"` Paths []string `arg:"" name:"path" optional:"" help:"Given paths, show as match patterns; else, use root as sole argument"` } func (c *LsTree) NewLine() byte { if c.Z { return '\x00' } return '\n' } // List the contents of a tree object func (c *LsTree) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint if err := r.LsTree(context.Background(), &zeta.LsTreeOptions{ OnlyTrees: c.OnlyTrees, Recurse: c.Recurse, Tree: c.Tree, NewLine: c.NewLine(), Long: c.Long, NameOnly: c.NameOnly, Abbrev: c.Abbrev, Revision: c.Revision, Paths: slashPaths(c.Paths), JSON: c.JSON, }); err != nil { fmt.Fprintf(os.Stderr, "zeta ls-tree error: %v\n", err) return err } return nil } ================================================ FILE: pkg/command/command_merge.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "github.com/antgroup/hugescm/pkg/zeta" ) // Join two or more development histories together type Merge struct { Revision string `arg:"" optional:"" name:"revision" help:"Merge specific revision into HEAD"` FF bool `name:"ff" negatable:"" help:"Allow fast-forward" default:"true"` FFOnly bool `name:"ff-only" help:"Abort if fast-forward is not possible"` Squash bool `name:"squash" help:"Create a single commit instead of doing a merge"` AllowUnrelatedHistories bool `name:"allow-unrelated-histories" help:"Allow merging unrelated histories"` Textconv bool `name:"textconv" help:"Converting text to Unicode"` Message []string `name:"message" short:"m" help:"Merge commit message (for a non-fast-forward merge)" placeholder:""` File string `name:"file" short:"F" help:"Read message from file" placeholder:""` Signoff bool `name:"signoff" negatable:"" help:"Add a Signed-off-by trailer" default:"false"` Abort bool `name:"abort" help:"Abort a conflicting merge"` Continue bool `name:"continue" help:"Continue a merge with resolved conflicts"` } const ( mergeSummaryFormat = `%szeta merge [] [] %szeta merge --abort %szeta merge --continue` ) func (c *Merge) Summary() string { or := W(" or: ") return fmt.Sprintf(mergeSummaryFormat, W("Usage: "), or, or) } func (c *Merge) Run(g *Globals) error { if c.FFOnly && c.Squash { diev("--ff-only is not compatible with --squash") return ErrFlagsIncompatible } if c.Abort && c.Continue { diev("--abort is not compatible with --continue") return ErrFlagsIncompatible } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() if err := w.Merge(context.Background(), &zeta.MergeOptions{ From: c.Revision, FF: c.FF, FFOnly: c.FFOnly, Squash: c.Squash, Signoff: c.Signoff, Message: c.Message, File: c.File, AllowUnrelatedHistories: c.AllowUnrelatedHistories, Textconv: c.Textconv, Abort: c.Abort, Continue: c.Continue, }); err != nil { return err } return nil } ================================================ FILE: pkg/command/command_merge_base.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "fmt" "github.com/antgroup/hugescm/pkg/zeta" ) type MergeBase struct { // --is-ancestor All bool `name:"all" short:"a" negatable:"" default:"false" help:"Output all common ancestors"` IsAncestor bool `name:"is-ancestor" help:"Is the first one ancestor of the other?"` Args []string `arg:"" name:"commit"` } // usage: zeta merge-base [-a | --all] ... // or: zeta merge-base [-a | --all] --octopus ... // or: zeta merge-base --is-ancestor const ( mergeBaseSummaryFormat = `%szeta merge-base [-a | --all] ... %szeta merge-base --is-ancestor ` ) func (c *MergeBase) Summary() string { or := W(" or: ") return fmt.Sprintf(mergeBaseSummaryFormat, W("Usage: "), or) } func (c *MergeBase) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint if c.IsAncestor { if len(c.Args) != 2 { diev("Need two revisions, eg: zeta merge-base --is-ancestor A B") return ErrArgRequired } return r.IsAncestor(context.Background(), c.Args[0], c.Args[1]) } if len(c.Args) < 2 { diev("At least two versions are required, eg: zeta merge-base A B") return ErrArgRequired } return r.MergeBase(context.Background(), c.Args, c.All) } ================================================ FILE: pkg/command/command_merge_file.go ================================================ package command import ( "context" "fmt" "io" "os" "github.com/antgroup/hugescm/modules/diferenco" "github.com/antgroup/hugescm/modules/trace" "github.com/antgroup/hugescm/pkg/zeta" ) type MergeFile struct { Stdout bool `name:"stdout" short:"p" negatable:"" help:"Send results to standard output"` ObjectID bool `name:"object-id" negatable:"" help:"Use object IDs instead of filenames"` Diff3 bool `name:"diff3" negatable:"" help:"Use a diff3 based merge"` ZDiff3 bool `name:"zdiff3" negatable:"" help:"Use a zealous diff3 based merge"` DiffAlgorithm string `name:"diff-algorithm" help:"Choose a diff algorithm, supported: histogram|onp|myers|patience|minimal" placeholder:""` L []string `short:"L" shortonly:"" help:"Set labels for file1/orig-file/file2"` F1 string `arg:"" name:"file1" help:""` O string `arg:"" name:"orig-file" help:""` F2 string `arg:"" name:"file2" help:""` } const ( mergeFileSummaryFormat = `%szeta merge-file [] [-L [-L [-L ]]] ` ) func (c *MergeFile) Summary() string { return fmt.Sprintf(mergeFileSummaryFormat, W("Usage: ")) } func (c *MergeFile) labelName(i int, n string) string { if i < len(c.L) { return c.L[i] } return n } func (c *MergeFile) mergeExtra() error { var a diferenco.Algorithm var err error if len(c.DiffAlgorithm) != 0 { if a, err = diferenco.AlgorithmFromName(c.DiffAlgorithm); err != nil { fmt.Fprintf(os.Stderr, "parse diff.algorithm error: %v\n", err) return err } } var style int switch { case c.Diff3: style = diferenco.STYLE_DIFF3 case c.ZDiff3: style = diferenco.STYLE_ZEALOUS_DIFF3 } trace.DbgPrint("algorithm: %s conflict style: %v", a, style) textO, err := zeta.ReadText(c.O, false) if err != nil { fmt.Fprintf(os.Stderr, "merge-file: open error: %v\n", err) return err } textA, err := zeta.ReadText(c.F1, false) if err != nil { fmt.Fprintf(os.Stderr, "merge-file: open error: %v\n", err) return err } textB, err := zeta.ReadText(c.F2, false) if err != nil { fmt.Fprintf(os.Stderr, "merge-file: open error: %v\n", err) return err } opts := &diferenco.MergeOptions{ TextO: textO, TextA: textA, TextB: textB, A: a, Style: style, LabelA: c.labelName(0, c.F1), LabelO: c.labelName(1, c.O), LabelB: c.labelName(2, c.F2), } mergedText, conflict, err := diferenco.Merge(context.Background(), opts) if err != nil { fmt.Fprintf(os.Stderr, "merge-file: merge error: %v\n", err) return err } _, _ = io.WriteString(os.Stdout, mergedText) if conflict { return &zeta.ErrExitCode{ExitCode: 1, Message: "conflict"} } return nil } func (c *MergeFile) Run(g *Globals) error { if !c.ObjectID { return c.mergeExtra() } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint var style int switch { case c.Diff3: style = diferenco.STYLE_DIFF3 case c.ZDiff3: style = diferenco.STYLE_ZEALOUS_DIFF3 } opts := &zeta.MergeFileOptions{ O: c.O, A: c.F1, B: c.F2, Style: style, DiffAlgorithm: c.DiffAlgorithm, Stdout: c.Stdout, LabelA: c.labelName(0, c.F1), LabelO: c.labelName(1, c.O), LabelB: c.labelName(2, c.F2), } if err := r.MergeFile(context.Background(), opts); err != nil { if !zeta.IsExitCode(err, 1) { diev("merge-file: error: %v", err) } return err } return nil } ================================================ FILE: pkg/command/command_merge_tree.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "errors" "context" "github.com/antgroup/hugescm/pkg/zeta" ) type MergeTree struct { Branch1 string `arg:"" name:"branch1" help:"branch1"` Branch2 string `arg:"" name:"branch2" help:"branch2"` MergeBase string `name:"merge-base" help:"Specify a merge-base for the merge" placeholder:""` AllowUnrelatedHistories bool `name:"allow-unrelated-histories" help:"If branches lack common history, merge-tree errors. Use this flag to force merge"` NameOnly bool `name:"name-only" help:"Only output conflict-related file names"` Textconv bool `name:"textconv" help:"Converting text to Unicode"` Z bool `short:"z" shortonly:"" help:"Terminate entries with NUL byte"` JSON bool `name:"json" help:"Convert conflict results to JSON"` } func (c *MergeTree) Run(g *Globals) error { r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint err = r.MergeTree(context.Background(), &zeta.MergeTreeOptions{ Branch1: c.Branch1, Branch2: c.Branch2, MergeBase: c.MergeBase, AllowUnrelatedHistories: c.AllowUnrelatedHistories, NameOnly: c.NameOnly, Textconv: c.Textconv, Z: c.Z, JSON: c.JSON, }) if errors.Is(err, zeta.ErrHasConflicts) { return &zeta.ErrExitCode{ExitCode: 1, Message: err.Error()} } if errors.Is(err, zeta.ErrUnrelatedHistories) { return &zeta.ErrExitCode{ExitCode: 2, Message: err.Error()} } if err != nil { return &zeta.ErrExitCode{ExitCode: 127, Message: err.Error()} } return nil } ================================================ FILE: pkg/command/command_pull.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "github.com/antgroup/hugescm/pkg/zeta" ) type Pull struct { FF bool `name:"ff" negatable:"" help:"Allow fast-forward" default:"true"` FFOnly bool `name:"ff-only" help:"Abort if fast-forward is not possible"` Rebase bool `name:"rebase" help:"Incorporate changes by rebasing rather than merging"` Squash bool `name:"squash" help:"Create a single commit instead of doing a merge"` Unshallow bool `name:"unshallow" help:"Get complete history"` One bool `name:"one" help:"Checkout large files one after another"` Limit int64 `name:"limit" short:"L" help:"Omits blobs larger than n bytes or units. n may be zero. Supported units: KB, MB, GB, K, M, G" default:"-1" type:"size"` } func (c *Pull) Run(g *Globals) error { if c.FFOnly && c.Rebase { diev("--ff-only is not compatible with --rebase") return ErrFlagsIncompatible } r, err := zeta.Open(context.Background(), &zeta.OpenOptions{ Worktree: g.CWD, Values: g.Values, Verbose: g.Verbose, }) if err != nil { return err } defer r.Close() // nolint w := r.Worktree() if err := w.Pull(context.Background(), &zeta.PullOptions{ FF: c.FF, FFOnly: c.FFOnly, Rebase: c.Rebase, Squash: c.Squash, Unshallow: c.Unshallow, One: c.One, Limit: c.Limit, }); err != nil { return err } return nil } ================================================ FILE: pkg/command/command_push.go ================================================ // Copyright ©️ Ant Group. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package command import ( "context" "errors" "github.com/antgroup/hugescm/pkg/zeta" ) type Push struct { Refspec string `arg:"" optional:"" name:"refspec" default:"" help:"Specify what destination ref to update with what source object"` PushOptions []string `name:"push-option" short:"o" help:"Option to transmit" placeholder:"